diff -urN linux-2.4.17-rc2-virgin/CREDITS linux-2.4.17-rc2-wli2/CREDITS --- linux-2.4.17-rc2-virgin/CREDITS Tue Dec 18 23:18:01 2001 +++ linux-2.4.17-rc2-wli2/CREDITS Thu Dec 20 19:49:13 2001 @@ -576,6 +576,13 @@ S: University of Michigan S: Ann Arbor, MI +N: Michael Cornwell +E: cornwell@acm.org +D: Original designer and co-author of ATA Taskfile +D: Kernel module SMART utilities +S: Santa Cruz, California +S: USA + N: Kees Cook E: cook@cpoint.net W: http://outflux.net/ @@ -971,8 +978,8 @@ N: Nigel Gamble E: nigel@nrg.org -E: nigel@sgi.com D: Interrupt-driven printer driver +D: Preemptible kernel S: 120 Alley Way S: Mountain View, California 94040 S: USA @@ -1174,22 +1181,19 @@ N: Andre Hedrick E: andre@linux-ide.org -E: andre@aslab.com -E: andre@suse.com +E: andre@linuxdiskcert.org W: http://www.linux-ide.org/ +W: http://www.linuxdiskcert.org/ D: Random SMP kernel hacker... D: Uniform Multi-Platform E-IDE driver D: Active-ATA-Chipset maddness.......... -D: Ultra DMA 100/66/33 -D: ATA-Disconnect +D: Ultra DMA 133/100/66/33 w/48-bit Addressing +D: ATA-Disconnect, ATA-TCQ D: ATA-Smart Kernel Daemon +D: Serial ATA +D: ATA Command Block and Taskfile S: Linux ATA Development (LAD) S: Concord, CA -S: ASL, Inc. 1-877-ASL-3535 -S: 1757 Houret Court, Milpitas, CA 95035 -S: SuSE Linux, Inc. -S: 580 Second Street, Suite 210 Oakland, CA 94607 -S: USA N: Jochen Hein E: jochen@jochen.org diff -urN linux-2.4.17-rc2-virgin/Changelog-wli linux-2.4.17-rc2-wli2/Changelog-wli --- linux-2.4.17-rc2-virgin/Changelog-wli Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Changelog-wli Thu Dec 20 19:48:48 2001 @@ -0,0 +1,58 @@ +Changelog for 2.4.17-rc2-wli2 +---------------------------------------------------------------------- +(1) IDE fixes and speedups (Andre Hedrick) +(2) fix comment in buffer cache hash function (Willliam Irwin) +(3) #define CLOCKS_PER_SECOND HZ (Robert Love) + +Changelog for 2.4.17-rc2-wli1 +---------------------------------------------------------------------- +(1) kdb-v1.9 (Keith Owens) +(2) remove ->zone from struct page (William Irwin) +(3) #ifndef CONFIG_HIGHMEM remove ->virtual (William Irwin) +(4) switch to Fibonacci hashing for the buffer cache (William Irwin) +(5) slab allocator speedup (Momchil Velikov) +(6) entry.S read-after-write speedup (Alex K.) +(7) Document testing of inode cache hash function (Anton Blanchard, + Rusty Russell, + William Irwin) +(8) initialize max_bomb_segments to 6 in elevator (William Irwin, + reasonable value + suggested by + Andrew Morton) + +Changelog for 2.4.17-rc1-wli3 +---------------------------------------------------------------------- +(1) in FNV change shift/add to multiply (William Irwin) +(2) inode hash function like Lever pagecache (William Irwin) +(3) attribution on comment in pagecache hash function (William Irwin) +(4) lock breaking patch, minus vmscan.c (Robert Love) +(5) back out conditional_schedule in wait_for_buffers (William Irwin) +(6) reverted to Lever dcache but shifting D_HASHBITS (William Irwin) +(7) shifting for high-order bits in UID hash (William Irwin) +(8) shifting for high-order bits in PID hash (William Irwin) +(9) removed comment about inode.c quadratic hashing (William Irwin) + +Changelog for 2.4.17-rc1-wli2 +---------------------------------------------------------------------- +(1) switch dcache to Mersenne hash (William Irwin) +(2) convert partial_name_hash() to FNV (William Irwin) +(3) back off HZ from 600 to 256 (William Irwin) + +Changelog for 2.4.17-rc1-wli1 +---------------------------------------------------------------------- +(1) reverse-mapping VM (Rik van Riel) +(2) preemptive kernel (Robert Love) +(3) realtime scheduler that scans less (George Anziger) +(4) page cache hash function (Chuck Lever) +(5) pidhash hash function (William Irwin) +(6) dentry cache hash function (Chuck Lever) +(7) check for priority == 0 in shrink_dcache_memory() (William Irwin) +(8) buffer cache hash function (Chuck Lever) +(9) uid hash function (William Irwin) +(10) inode hash function restored to Lever paper form (Chuck Lever) +(11) removal of statm_pgd_range() (William Irwin) +(12) elevator read starvation prevention (Andrew Morton) + +revert before distribution: +(1) bootmem rewrite +(2) timeslice change (HZ in asm-i386/param.h) diff -urN linux-2.4.17-rc2-virgin/Documentation/Configure.help linux-2.4.17-rc2-wli2/Documentation/Configure.help --- linux-2.4.17-rc2-virgin/Documentation/Configure.help Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/Documentation/Configure.help Thu Dec 20 19:49:13 2001 @@ -266,6 +266,31 @@ If you have a system with several CPUs, you do not need to say Y here: the local APIC will be used automatically. +Preemptible Kernel +CONFIG_PREEMPT + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load due to other, lower priority, processes. + + Say Y here if you are building a kernel for a desktop system, embedded + system or real-time system. Say N if you are building a kernel for a + system where throughput is more important than interactive response, + such as a server system. Say N if you are unsure. + +Break Selected Locks +CONFIG_LOCK_BREAK + This option will break certain locks in high-latency regions + throughout the kernel. It is intended for use in conjunction with + the preemptible kernel (CONFIG_PREEMPT). Since in-kernel preemption + can not occur while locks are held, temporarily releasing and then + reacquiring long-held locks will further improve system response. + + Say Y if you are compiling for a system with strict latency + requirements such as an embedded, real-time, or audio processing + system. Say N otherwise. + Kernel math emulation CONFIG_MATH_EMULATION Linux can emulate a math coprocessor (used for floating point @@ -290,6 +315,28 @@ If you are not sure, say Y; apart from resulting in a 66 KB bigger kernel, it won't hurt. +Real Time Scheduler +CONFIG_RTSCHED + + This option replaces the standard linux scheduler with a real time + scheduler. The real time scheduler provides load independent fast + context switch times for real time tasks where as the standard linux + scheduler slows down with increasing load (i.e. more tasks ready to + run). For non-real time tasks both schedulers context switch times are + load dependent. The real time scheduler also provides a configure + option for real time priorities ranging from 1 to a max of 2047 while + the standard schedulers real time priorities range from 1-99. + Real time tasks are tasks that have a scheduling policy of SCHED_FIFO + or SCHED_RR. Scheduling policy is set by the sched_setscheduler(2) + system call and is inherited thru fork and thread creation. + +Maximum Priority? +CONFIG_MAX_PRI + This option lets you set the number of priorities available to real time + tasks. Priorities 1 thru maximum priority are real time tasks. The + default here is 127. The system will quietly change any thing less than + 99 to 99 and any thing greater than 2047 to 2047. + Timer and CPU usage LEDs CONFIG_LEDS If you say Y here, the LEDs on your machine will be used @@ -747,6 +794,14 @@ If both this SCSI emulation and native ATAPI support are compiled into the kernel, the native support will be used. +Use the NOOP Elevator (WARNING) +CONFIG_BLK_DEV_ELEVATOR_NOOP + If you are using a raid class top-level driver above the ATA/IDE core, + one may find a performance boost by preventing a merging and re-sorting + of the new requests. + + If unsure, say N. + ISA-PNP EIDE support CONFIG_BLK_DEV_ISAPNP If you have an ISA EIDE card that is PnP (Plug and Play) and @@ -18934,6 +18989,47 @@ send a BREAK and then within 5 seconds a command keypress. The keys are documented in . Don't say Y unless you really know what this hack does. + +Kernel Debugging support +CONFIG_KDB + This option provides a built-in kernel debugger. The built-in + kernel debugger contains commands which allow memory to be examined, + instructions to be disassembled and breakpoints to be set. For details, + see Documentation/kdb/kdb.mm and the manual pages kdb_bt, kdb_ss, etc. + Kdb can also be used via the serial port. Set up the system to + have a serial console (see Documentation/serial-console.txt). + The Control-A key sequence on the serial port will cause the + kernel debugger to be entered with input from the serial port and + output to the serial console. Selecting this option will + automatically set CONFIG_KALLSYMS. If unsure, say N. + +KDB modules +CONFIG_KDB_MODULES + KDB can be extended by adding your own modules, in directory + kdb/modules. This option selects the way that these modules should + be compiled, as free standing modules (select M) or built into the + kernel (select Y). If unsure say M. + +KDB off by default +CONFIG_KDB_OFF + Normally kdb is activated by default, as long as CONFIG_KDB is set. + If you want to ship a kernel with kdb support but only have kdb + turned on when the user requests it then select this option. When + compiled with CONFIG_KDB_OFF, kdb ignores all events unless you boot + with kdb=on or you echo "1" > /proc/sys/kernel/kdb. This option also + works in reverse, if kdb is normally activated, you can boot with + kdb=off or echo "0" > /proc/sys/kernel/kdb to deactivate kdb. If + unsure, say N. + +Load all symbols for debugging +CONFIG_KALLSYMS + Normally only exported symbols are available to modules. For + debugging you may want all symbols, not just the exported ones. If + you say Y here then extra data is added to the kernel and modules, + this data lists all the non-stack symbols in the kernel or module + and can be used by any debugger. You need modutils >= 2.3.11 to use + this option. See "man kallsyms" for the data format, it adds 10-20% + to the size of the kernel and the loaded modules. If unsure, say N. ISDN support CONFIG_ISDN diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb.mm linux-2.4.17-rc2-wli2/Documentation/kdb/kdb.mm --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb.mm Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb.mm Tue Dec 18 22:21:49 2001 @@ -0,0 +1,286 @@ +.TH KDB 8 "25 September 2001" +.hy 0 +.SH NAME +Built-in Kernel Debugger for Linux - v1.8 +.SH "Overview" +This document describes the built-in kernel debugger available +for linux. This debugger allows the programmer to interactively +examine kernel memory, disassemble kernel functions, set breakpoints +in the kernel code and display and modify register contents. +.P +A symbol table is included in the kernel image and in modules which +enables all non-stack symbols (including static symbols) to be used as +arguments to the kernel debugger commands. +.SH "Getting Started" +To include the kernel debugger in a linux kernel, use a +configuration mechanism (e.g. xconfig, menuconfig, et. al.) +to enable the \fBCONFIG_KDB\fP option. Additionally, for accurate +stack tracebacks, it is recommended that the \fBCONFIG_FRAME_POINTER\fP +option be enabled. \fBCONFIG_FRAME_POINTER\fP changes the compiler +flags so that the frame pointer register will be used as a frame +pointer rather than a general purpose register. +.P +After linux has been configured to include the kernel debugger, +make a new kernel with the new configuration file (a make clean +is recommended before making the kernel), and install the kernel +as normal. +.P +You can compile a kernel with kdb support but have kdb off by default, +select \fBCONFIG_KDB_OFF\fR. Then the user has to explicitly activate +kdb by booting with the 'kdb=on' flag or, after /proc is mounted, by +.nf + echo "1" > /proc/sys/kernel/kdb +.fi +You can also do the reverse, compile a kernel with kdb on and +deactivate kdb with the boot flag 'kdb=off' or, after /proc is mounted, +by +.nf + echo "0" > /proc/sys/kernel/kdb +.fi +.P +When booting the new kernel using \fIlilo\fP(1), the 'kdb=early' flag +may be added after the image name on the \fBLILO\fP boot line to +force the kernel to stop in the kernel debugger early in the +kernel initialization process. 'kdb=early' implies 'kdb=on'. +If the 'kdb=early' flag isn't provided, then kdb will automatically be +invoked upon system panic or when the \fBPAUSE\fP key is used from the +keyboard, assuming that kdb is on. Older versions of kdb used just a +boot flag of 'kdb' to activate kdb early, this is still supported but +is deprecated. +.P +Kdb can also be used via the serial port. Set up the system to +have a serial console (see \fIDocumentation/serial-console.txt\fP). +The \fBControl-A\fP key sequence on the serial port will cause the +kernel debugger to be entered, assuming that kdb is on. +.P +If you have both a keyboard+video and a serial console, you can use +either for kdb. +Define both video and serial consoles with boot parameters +.P +.nf + console=tty0 console=ttyS0,38400 +.fi +.P +Any kdb data entered on the keyboard or the serial console will be echoed +to both. +.P +While kdb is active, the keyboard (not serial console) indicators will strobe. +The caps lock and scroll lock lights will turn on and off, num lock is not used +because it can confuse laptop keyboards where the numeric keypad is mapped over +the normal keys. +On exit from kdb the keyboard indicators will probably be wrong, they will not match the kernel state. +Pressing caps lock twice should get the indicators back in sync with +the kernel. +.SH "Basic Commands" +There are several categories of commands available to the +kernel debugger user including commands providing memory +display and modification, register display and modification, +instruction disassemble, breakpoints and stack tracebacks. +.P +The following table shows the currently implemented commands: +.DS +.TS +box, center; +l | l +l | l. +Command Description +_ +bc Clear Breakpoint +bd Disable Breakpoint +be Enable Breakpoint +bl Display breakpoints +bp Set or Display breakpoint +bph Set or Display hardware breakpoint +bpa Set or Display breakpoint globally +bpha Set or Display hardware breakpoint globally +bt Stack backtrace for current process +btp Stack backtrace for specific process +bta Stack backtrace for all processes +cpu Display or switch cpus +ef Print exception frame +env Show environment +go Restart execution +help Display help message +id Disassemble Instructions +ll Follow Linked Lists +lsmod List loaded modules +md Display memory contents +mdWcN Display memory contents with width W and count N. +mdr Display raw memory contents +mds Display memory contents symbolically +mm Modify memory contents, words +mmW Modify memory contents, bytes +reboot Reboot the machine +rd Display register contents +rm Modify register contents +rmmod Remove a module +sections List information on all known sections +set Add/change environment variable +sr Invoke SysReq commands +ss Single step a cpu +ssb Single step a cpu until a branch instruction +.TE +.DE +.P +Some commands can be abbreviated, such commands are indicated by a +non-zero \fIminlen\fP parameter to \fBkdb_register\fP; the value of +\fIminlen\fP being the minimum length to which the command can be +abbreviated (for example, the \fBgo\fP command can be abbreviated +legally to \fBg\fP). +.P +If an input string does not match a command in the command table, +it is treated as an address expression and the corresponding address +value and nearest symbol are shown. +.P +Some of the commands are described here. +Information on the more complicated commands can be found in the +appropriate manual pages. +.TP 8 +cpu +With no parameters, it lists the available cpus, '*' after a cpu number +indicates a cpu that did not respond to the kdb stop signal. +.I cpu +followed by a number will switch to that cpu, you cannot switch to +a cpu marked '*'. +This command is only available if the kernel was configured for SMP. +.TP 8 +go +Continue normal execution. +Active breakpoints are reestablished and the processor(s) allowed to +run normally. +To continue at a specific address, use +.I rm +to change the instruction pointer then go. +.TP 8 +id +Disassemble instructions starting at an address. +Environment variable IDCOUNT controls how many lines of disassembly +output the command produces. +.TP 8 +lsmod +Internal command to list modules. +This does not use any kernel nor user space services so can be used at any time. +.TP 8 +reboot +Reboot the system, with no attempt to do a clean close down. +.TP 8 +rmmod +Internal command to remove a module. +This does not use any user space services, however it calls the module +cleanup routine and that routine may try to use kernel services. +Because kdb runs disabled there is no guarantee that the module cleanup +routine will succeed, there is a real risk of the routine hanging and +taking kdb with it. +Use the +.I rmmod +command with extreme care. +.TP 8 +sections +List information for all known sections. The output is one line per +module plus the kernel, starting with the module name. This is +followed by one or more repeats of section name, section start, +section end and section flags. This data is not designed for human +readability, it is intended to tell external debuggers where each +section has been loaded. +.TP 8 +sr +Invoke the SysReq code. +This command takes a single character which is passed to SysReq +processing, as if you had entered the SysReq key sequence followed by +that character. +.SH INITIAL KDB COMMANDS +kdb/kdb_cmds is a plain text file where you can define kdb commands +which are to be issued during kdb_init(). One command per line, blank +lines are ignored, lines starting with '#' are ignored. kdb_cmds is +intended for per user customization of kdb, you can use it to set +environment variables to suit your hardware or to set standard +breakpoints for the problem you are debugging. This file is converted +to a small C object, compiled and linked into the kernel. You must +rebuild and reinstall the kernel after changing kdb_cmds. This file +will never be shipped with any useful data so you can always override +it with your local copy. Sample kdb_cmds: +.P +.nf +# Initial commands for kdb, alter to suit your needs. +# These commands are executed in kdb_init() context, no SMP, no +# processes. Commands that require process data (including stack or +# registers) are not reliable this early. set and bp commands should +# be safe. Global breakpoint commands affect each cpu as it is booted. + +set LINES=50 +set MDCOUNT=25 +set RECURSE=1 +bp sys_init_module +.fi +.SH INTERRUPTS AND KDB +When a kdb event occurs, one cpu (the initial cpu) enters kdb state. +It uses a cross system non maskable interrupt (NMI) to interrupt the +other cpus and bring them all into kdb state. All cpus run with +interrupts disabled while they are inside kdb, this prevents most +external events from disturbing the kernel while kdb is running. +.B Note: +Disabled interrupts means that any I/O that relies on interrupts cannot +proceed while kdb is in control, devices can time out. The clock tick +is also disabled, machines will lose track of time while they are +inside kdb. +.P +Even with interrupts disabled, some NMI events will still occur, these +can disturb the kernel while you are debugging it. The initial cpu +will still accept NMI events, assuming that kdb was not entered for an +NMI event. Any cpu where you use the SS or SSB commands will accept +NMI events, even after the instruction has finished and the cpu is back +in kdb. This is an unavoidable side effect of the fact that doing +SS[B] requires the cpu to drop all the way out of kdb, including +exiting from the NMI event that brought the cpu into kdb. Under normal +circumstances the only NMI event is for the NMI oopser and that is kdb +aware so it does not disturb the kernel while kdb is running. +.P +Sometimes doing SS or SSB on ix86 will allow one interrupt to proceed, +even though the cpu is disabled for interrupts. I have not been able +to track this one down but I suspect that the interrupt was pending +when kdb was entered and it runs when kdb exits through IRET even +though the popped flags are marked as cli(). If any ix86 hardware +expert can shed some light on this problem, please notify the kdb +maintainer. +.SH RECOVERING FROM KDB ERRORS +If a kdb command breaks and kdb has enough of a recovery environment +then kdb will abort the command and drop back into mainline kdb code. +This means that user written kdb commands can follow bad pointers +without killing kdb. Ideally all code should verify that data areas +are valid (using kdba_getword) before accessing it but lots of calls +to kdba_getword can be clumsy. +.SH DEBUGGING THE DEBUGGER +kdb has limited support for debugging problems within kdb. If you +suspect that kdb is failing, you can set environment variable KDBDEBUG +to a bit pattern which will activate kdb_printf statements within kdb. +See include/linux/kdb.h, KDB_DEBUG_FLAG_xxx defines. For example +.nf + set KDBDEBUG=0x60 +.fi +activates the event callbacks into kdb plus state tracing in sections +of kdb. +.nf + set KDBDEBUG=0x18 +.fi +gives lots of tracing as kdb tries to decode the process stack. +.P +You can also perform one level of recursion in kdb. If environment +variable RECURSE is not set or is 0 then kdb will either recover from +an error (if the recovery environment is satisfactory) or kdb will +allow the error to percolate, usually resulting in a dead system. When +RECURSE is 1 then kdb will recover from an error or, if there is no +satisfactory recovery environment, it will drop into kdb state to let +you diagnose the problem. When RECURSE is 2 then all errors drop into +kdb state, kdb does not attempt recovery first. Errors while in +recursive state all drop through, kdb does not even attempt to recover +from recursive errors. +.SH WRITING NEW COMMANDS +TBD +.SH AUTHORS +Scott Lurndal, Richard Bass, Scott Foehner, Srinivasa Thirumalachar, +Masahiro Adegawa, Marc Esipovich, Ted Kline, Steve Lord, Andi Kleen. +.br +Keith Owens - kdb maintainer. +.SH SEE ALSO +.P +linux/Documentation/kdb/kdb_{bp,bt,env,ll,md,rd,ss}.man diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_bp.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_bp.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_bp.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_bp.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,172 @@ +.TH BD 1 "19 May 2000" +.SH NAME +bp, bpa, bph, bpha, bd, bc, be, bl \- breakpoint commands +.SH SYNOPSIS +bp \fIaddress-expression\fP +.LP +bpa \fIaddress-expression\fP +.LP +bph \fIaddress-expression\fP [\f(CWDATAR|DATAW|IO\fP [\fIlength\fP]] +.LP +bpha \fIaddress-expression\fP [\f(CWDATAR|DATAW|IO\fP [\fIlength\fP]] +.LP +bd \fIbreakpoint-number\fP +.LP +bc \fIbreakpoint-number\fP +.LP +be \fIbreakpoint-number\fP +.LP +bl +.SH DESCRIPTION +.hy 0 +The +.B bp +family of commands are used to establish a breakpoint. +The \fIaddress-expression\fP may be a numeric value (decimal or +hexidecimal), a symbol name, a register name preceeded by a +percent symbol '%', or a simple expression consisting of a +symbol name, an addition or subtraction character and a numeric +value (decimal or hexidecimal). +.P +\fBbph\fP and \fBbpha\fP will force the use of a hardware register, provided +the processor architecture supports them. +.P +The \fIaddress-expression\fP may also consist of a single +asterisk '*' symbol which indicates that the command should +operate on all existing breakpoints (valid only for \fBbc\fP, +\fBbd\fP and \fBbe\fP). +.P +Four different types of +breakpoints may be set: + +.TP 8 +Instruction +Causes the kernel debugger to be invoked from the debug exception +path when an instruction is fetched from the specified address. This +is the default if no other type of breakpoint is requested or when +the \fBbp\fP command is used. + +.TP 8 +DATAR +Causes the kernel debugger to be entered when data of length +\fIlength\fP is read from or written to the specified address. +This type of breakpoint must use a processor debug register which +places an architecture dependent limit on the number of data and I/O +breakpoints that may be established. +The \fBbph\fP or \fBbpha\fP commands must be used. + +.TP 8 +DATAW +Enters the kernel debugger when data of length \fIlength\fP +is written to the specified address. \fIlength\fP defaults +to four bytes if it is not explicitly specified. +Note that the processor may have already overwritten the prior data at +the breakpoint location before the kernel debugger is invoked. +The prior data should be saved before establishing the breakpoint, if +required. +The \fBbph\fP or \fBbpha\fP commands must be used. + +.TP 8 +IO +Enters the kernel debugger when an \fBin\fP or \fBout\fP instruction +targets the specified I/O address. The \fBbph\fP or \fBbpha\fP +commands must be used. + +.P +The +.B bpha +command will establish a breakpoint on all processors in an +SMP system. This command is not available in an uniprocessor +kernel. +.P +The +.B bd +command will disable a breakpoint without removing it from the kernel +debugger's breakpoint table. +This can be used to keep breakpoints in the table without exceeding the +architecture limit on breakpoint registers. +.P +The +.B be +command will re-enable a disabled breakpoint. +.P +The +.B bc +command will clear a breakpoint from the breakpoint table. +.P +The +.B bl +command will list the existing set of breakpoints. +.SH LIMITATIONS +There is a compile time limit of sixteen entries in the +breakpoint table at any one time. +.P +There are architecture dependent limits on the number of hardware +breakpoints that can be set. +.IP ix86 8 +Four. +.PD 0 +.IP ia64 8 +? +.PD 1 +.SH ENVIRONMENT +The breakpoint subsystem does not currently use any environment +variables. +.SH SMP CONSIDERATIONS +Using +.B bc +is risky on SMP systems. +If you clear a breakpoint when another cpu has hit that breakpoint but +has not been processed then it may not be recognised as a kdb +breakpoint, usually resulting in incorrect program counters and kernel +panics. +It is safer to disable the breakpoint with +.BR bd , +then +.B go +to let any other processors that are waiting on the breakpoint to +clear. +After all processors are clear of the disabled breakpoint then it is +safe to clear it using +.BR bc . +.P +Breakpoints which use the processor breakpoint registers +are only established on the processor which is +currently active. If you wish breakpoints to be universal +use the +.B bpa +or +.B bpha +commands. +.SH EXAMPLES +.TP 8 +bp schedule +Sets an instruction breakpoint at the begining of the +function \fBschedule\fP. + +.TP 8 +bp schedule+0x12e +Sets an instruction breakpoint at the instruction located +at \fBschedule\fP+\fI0x12e\fP. + +.TP 8 +bph ttybuffer+0x24 dataw +Sets a data write breakpoint at the location referenced by +\fBttybuffer\fP+\fI0x24\fP for a length of four bytes. + +.TP 8 +bph 0xc0254010 datar 1 +Establishes a data reference breakpoint at address \fB0xc0254010\fP +for a length of one byte. + +.TP 8 +bp +List current breakpoint table. + +.TP 8 +bd 0 +Disable breakpoint #0. + +.TP 8 +bc * +Clear all breakpoints diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_bt.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_bt.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_bt.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_bt.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,172 @@ +.TH BT 1 "16 September 2000" +.SH NAME +bt \- Stack Traceback command +.SH SYNOPSIS +bt [ ] +.LP +btp +.LP +bta +.SH DESCRIPTION +.hy 0 +The +.B bt +command is used to print a stack traceback. It uses the +current registers (see \fBrd\fP command) to determine +the starting context and attempts to provide a complete +stack traceback for the active thread. If \fIstack-frame-address\fP +is supplied, it is assumed to point to the start of a valid +stack frame and the stack will be traced back from that +point (e.g. on i386 architecture, \fIstack-frame-address\fP +should be the stack address of a saved \fB%eip\fP value from a \fBcall\fP +instruction). +.P +A kernel configuration option \fBCONFIG_FRAME_POINTER\fP should +be enabled so that the compiler will utilize the frame pointer +register properly to maintain a stack which can be correctly +analyzed. +.P +The \fBbt\fP command will attempt to analyze the stack without +frame pointers if the \fBCONFIG_FRAME_POINTER\fP option is not +enabled, but the analysis is difficult and may not produce +accurate nor complete results. +.P +The \fBbtp\fP command will analyze the stack for the given +process identification (see the \fBps\fP command). +.P +The \fBbta\fP command lists the stack for all processes. +.P +For each function, the stack trace prints at least two lines. +The first line contains four or five fields\ :- +.IP * 3 +The pointer to the previous stack frame, blank if there is no valid +frame pointer. +.PD 0 +.IP * 3 +The current address within this frame. +.IP * 3 +The address converted to a function name (actually the first non-local +label which is <= the address). +.IP * 3 +The offset of the address within the function. +.IP * 3 +Any parameters to the function. +.PD 1 +.PP +On the next line there are five fields which are designed to make it +easier to match the trace against the kernel code\ :- +.IP * 3 +The module name that contains the address, "kernel" if it is in the +base kernel. +.PD 0 +.IP * 3 +The section name that contains the address. +.IP * 3 +The start address of the section. +.IP * 3 +The start address of the function. +.IP * 3 +The end address of the function (the first non-local label which is > +the address). +.PD 1 +.PP +If arguments are being converted to symbols, any argument which +converts to a kernel or module address is printed as\ :- +.IP * 3 +Argument address. +.PD 0 +.IP * 3 +The module name that contains the address, "kernel" if it is in the +base kernel. +.IP * 3 +The symbol name the argument maps to. +.IP * 3 +The offset of the argument from the symbol, suppressed if 0. +.PD 1 +.SH MATCHING TRACE TO KERNEL CODE +The command "objdump\ -S" will disassemble an object and, if the code +was compiled with debugging (gcc flag -g), objdump will interleave the +C source lines with the generated object. +.PP +A complete objdump of the kernel or a module is too big, normally you +only want specific functions. +By default objdump will only print the .text section but Linux uses +other section names for executable code. +When objdump prints relocatable objects (modules) it uses an offset of +0 which is awkward to relate to the stack trace. +The five fields which are printed for each function are designed to +make it easier to match the stack trace against the kernel code using +"objdump\ -S". +.PP +If the function is in the kernel then you need the section name, the +start and end address of the function. The command is +.PP +.nf + objdump -S -j \\ + --start-address= \\ + --stop-address= \\ + /usr/src/linux/vmlinux +.fi +.PP +If the function is in a module then you need the section name, the +start address of the section, the start and end address of the +function, the module name. The command is +.PP +.nf + objdump -S -j \\ + --adjust-vma= \\ + --start-address= \\ + --stop-address= \\ + /path/to/module/.o +.fi +.PP +All addresses to objdump must be preceded by '0x' if they are in hex, +objdump does not assume hex. +The stack trace values are printed with leading '0x' to make it easy to +run objdump. +.SH LIMITATIONS +If the kernel is compiled without frame pointers, stack tracebacks +may be incomplete. The \fBmds %esp\fP command may be useful in +attemping to determine the actual stack traceback manually. +.P +A stack trace can be misleading if any code in a function exit has been +executed, the stack is partially unwound at that stage. +.P +The \fBbt\fP command may print more arguments for a function +than that function accepts; this happens when the C compiler +doesn't immediately pop the arguments off the stack upon return +from a called function. When this is this case, these extra +stack words will be considered additional arguments by the \fBbt\fP +command. +.SH ENVIRONMENT +The \fBBTARGS\fP environment variable governs the maximum number +of arguments that are printed for any single function. +.PP +If the \fBBTSYMARG\fP environment variable is non-zero then any +arguments that fall within the kernel are converted to symbols. +.PP +If the \fBNOSECT\fP environment variable is non-zero then the +section information is suppressed. +.PP +The \fBBTAPROMPT\fP environment variable controls the prompt after each +process is listed by the \fBbta\fP command. If \fBBTAPROMPT\fP is not +set or is non-zero then \fBbta\fP issues a prompt after each process is +listed. If \fBBTAPROMPT\fP is set to zero then no prompt is issued and +all processes are listed without human intervention. +.SH SMP CONSIDERATIONS +None. +.SH EXAMPLES +.nf +.na +.ft CW +Entering kdb (0xc3cb4000) due to Breakpoint @ 0xc011725d +Instruction(i) breakpoint #0 at 0xc011725c +qm_modules+0xd1: movl %ebp,%esp +kdb> bt + EBP EIP Function(args) +0xc3cb5f98 0xc011725d qm_modules+0xd1 (0x80721c0, 0x100, 0xbfff5000) + kernel .text 0xc0100000 0xc011718c 0xc0117264 +0xc3cb5fbc 0xc0117875 sys_query_module+0x1b1 (0x0, 0x1, 0x80721c0, 0x100, 0xbfff5000) + kernel .text 0xc0100000 0xc01176c4 0xc01178e8 + 0xc01095f8 system_call+0x34 + kernel .text 0xc0100000 0xc01095c4 0xc01095fc diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_env.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_env.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_env.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_env.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,46 @@ +.TH ENV 1 "24 September 2000" +.SH NAME +env, set \- Environment manipulation commands +.SH SYNOPSIS +env +.LP +set \fIenvironment-variable\fP=\fIvalue\fP +.SH DESCRIPTION +The kernel debugger contains an environment which contains a series +of name-value pairs. Some environment variables are known to the +various kernel debugger commands and have specific meaning to the +command; such are enumerated on the respective reference material. +.P +Arbitrary environment variables may be created and used with +many commands (those which require an \fIaddress-expression\fP). +.P +The +.B env +command is used to display the current environment. +.P +The +.B set +command is used to alter an existing environment variable or +establish a new environment variable. +.SH LIMITATIONS +There is a compile-time limit of 33 environment variables. +.P +There is a compile-time limit of 512 bytes (\fBKDB_ENVBUFSIZE\fP) +of heap space available for new environment variables and for +environment variables changed from their compile-time values. +.SH ENVIRONMENT +These commands explicitly manipulate the environment. +.SH SMP CONSIDERATIONS +None. +.SH USER SETTINGS +You can include "set" commands in kdb/kdb_cmds (see kdb.mm) to define +your environment variables at kernel startup. +.SH EXAMPLES +.TP 8 +env +Display current environment settings. + +.TP 8 +set IDCOUNT=100 +Set the number of lines to display for the \fBid\fP command +to the value \fI100\fP. diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_ll.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_ll.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_ll.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_ll.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,134 @@ +.TH LL 1 "19 April 1999" +.SH NAME +ll \- Linked List examination +.SH SYNOPSIS +ll +.SH DESCRIPTION +The +.B ll +command is used to execute a single command repetitively for +each element of a linked list. +.P +The command specified by will be executed with a single +argument, the address of the current element. +.SH LIMITATIONS +Be careful if using this command recursively. +.SH ENVIRONMENT +None. +.SH SMP CONSIDERATIONS +None. +.SH EXAMPLES +.nf +.na +.ft CW +# cd modules +# insmod kdbm_vm.o +# Entering kdb on processor 0 due to PAUSE +kdb> ps +Task Addr Pid Parent cpu lcpu Tss Command +0xc03de000 0000000001 0000000000 0000 0000 0xc03de2d4 init +0xc0090000 0000000002 0000000001 0000 0000 0xc00902d4 kflushd +0xc000e000 0000000003 0000000001 0000 0000 0xc000e2d4 kpiod +0xc000c000 0000000004 0000000001 0000 0000 0xc000c2d4 kswapd +0xc7de2000 0000000056 0000000001 0000 0000 0xc7de22d4 kerneld +0xc7d3a000 0000000179 0000000001 0000 0000 0xc7d3a2d4 syslogd +0xc7a7e000 0000000188 0000000001 0000 0000 0xc7a7e2d4 klogd +0xc7a04000 0000000199 0000000001 0000 0000 0xc7a042d4 atd +0xc7b84000 0000000210 0000000001 0000 0000 0xc7b842d4 crond +0xc79d6000 0000000221 0000000001 0000 0000 0xc79d62d4 portmap +0xc798e000 0000000232 0000000001 0000 0000 0xc798e2d4 snmpd +0xc7904000 0000000244 0000000001 0000 0000 0xc79042d4 inetd +0xc78fc000 0000000255 0000000001 0000 0000 0xc78fc2d4 lpd +0xc77ec000 0000000270 0000000001 0000 0000 0xc77ec2d4 sendmail +0xc77b8000 0000000282 0000000001 0000 0000 0xc77b82d4 gpm +0xc7716000 0000000300 0000000001 0000 0000 0xc77162d4 smbd +0xc7ee2000 0000000322 0000000001 0000 0000 0xc7ee22d4 mingetty +0xc7d6e000 0000000323 0000000001 0000 0000 0xc7d6e2d4 login +0xc778c000 0000000324 0000000001 0000 0000 0xc778c2d4 mingetty +0xc78b6000 0000000325 0000000001 0000 0000 0xc78b62d4 mingetty +0xc77e8000 0000000326 0000000001 0000 0000 0xc77e82d4 mingetty +0xc7708000 0000000327 0000000001 0000 0000 0xc77082d4 mingetty +0xc770e000 0000000328 0000000001 0000 0000 0xc770e2d4 mingetty +0xc76b0000 0000000330 0000000001 0000 0000 0xc76b02d4 update +0xc7592000 0000000331 0000000323 0000 0000 0xc75922d4 ksh +0xc7546000 0000000338 0000000331 0000 0000 0xc75462d4 su +0xc74dc000 0000000339 0000000338 0000 0000 0xc74dc2d4 ksh +kdb> md 0xc74dc2d4 +c74dc2d4: 00000000 c74de000 00000018 00000000 .....`MG........ +c74dc2e4: 00000000 00000000 00000000 074de000 .............`M. +c74dc2f4: c01123ff 00000000 00000000 00000000 #.@............ +c74dc304: 00000000 00000000 c74dded0 00000000 ........P^MG.... +[omitted] +c74dc474: 00000000 00000000 00000000 00000000 ................ +c74dc484: 00000000 c7c15d00 c77b0900 c026fbe0 .....]AG..{G`{&@ +c74dc494: 00000000 c76c2000 00000000 00000000 ..... lG........ +c74dc4a4: 00000000 00000000 00000000 c74dc4ac ............,DMG +kdb> md 0xc026fbe0 +c026fbe0: c0262b60 00000000 c7594940 c74de000 @HYG....@IYG.`MG +[omitted] +kdb> md 0xc0262b60 +c0262b60: c0266660 08048000 0804c000 c7bec360 `f&@.....@..`C>G +kdb> ll c0262b60 12 md +c0262b60: c0266660 08048000 0804c000 c7bec360 `f&@.....@..`C>G +c7bec360: c0266660 0804c000 0804d000 c7becb20 `f&@.@...P.. K>G +c7becb20: c0266660 0804d000 08050000 c7bec3a0 `f&@.P...... C>G +c7bec3a0: c0266660 40000000 40009000 c7bec420 `f&@...@...@ D>G +c7bec420: c0266660 40009000 4000b000 c7bec4a0 `f&@...@.0.@ D>G +c7bec4a0: c0266660 4000b000 40010000 c7bec8e0 `f&@.0.@...@`H>G +c7bec8e0: c0266660 40010000 400a1000 c7becbe0 `f&@...@...@`K>G +c7becbe0: c0266660 400a1000 400a8000 c7becc60 `f&@...@...@`L>G +c7becc60: c0266660 400a8000 400b4000 c7952300 `f&@...@.@.@.#.G +c7952300: c0266660 400b5000 400bc000 c79521c0 `f&@.P.@.@.@@!.G +c79521c0: c0266660 400bc000 400bd000 c7bec6e0 `f&@.@.@.P.@`F>G +c7bec6e0: c0266660 bffff000 c0000000 00000000 `f&@.p?...@.... +kdb> +kdb> ll c0262b60 12 vm +struct vm_area_struct at 0xc0262b60 for 56 bytes +vm_start = 0x8048000 vm_end = 0x804c000 +page_prot = 0x25 avl_height = 2244 vm_offset = 0x0 +flags: READ EXEC MAYREAD MAYWRITE MAYEXEC DENYWRITE EXECUTABLE +struct vm_area_struct at 0xc7bec360 for 56 bytes +vm_start = 0x804c000 vm_end = 0x804d000 +page_prot = 0x25 avl_height = -31808 vm_offset = 0x3000 +flags: READ WRITE MAYREAD MAYWRITE MAYEXEC DENYWRITE EXECUTABLE +struct vm_area_struct at 0xc7becb20 for 56 bytes +vm_start = 0x804d000 vm_end = 0x8050000 +page_prot = 0x25 avl_height = -28664 vm_offset = 0x0 +flags: READ WRITE EXEC MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc7bec3a0 for 56 bytes +vm_start = 0x40000000 vm_end = 0x40009000 +page_prot = 0x25 avl_height = 30126 vm_offset = 0x0 +flags: READ EXEC MAYREAD MAYWRITE MAYEXEC DENYWRITE +struct vm_area_struct at 0xc7bec420 for 56 bytes +vm_start = 0x40009000 vm_end = 0x4000b000 +page_prot = 0x25 avl_height = 30126 vm_offset = 0x8000 +flags: READ WRITE MAYREAD MAYWRITE MAYEXEC DENYWRITE +struct vm_area_struct at 0xc7bec4a0 for 56 bytes +vm_start = 0x4000b000 vm_end = 0x40010000 +page_prot = 0x25 avl_height = 26853 vm_offset = 0x0 +flags: READ MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc7bec8e0 for 56 bytes +vm_start = 0x40010000 vm_end = 0x400a1000 +page_prot = 0x25 avl_height = 2244 vm_offset = 0x0 +flags: READ EXEC MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc7becbe0 for 56 bytes +vm_start = 0x400a1000 vm_end = 0x400a8000 +page_prot = 0x25 avl_height = 30126 vm_offset = 0x90000 +flags: READ WRITE MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc7becc60 for 56 bytes +vm_start = 0x400a8000 vm_end = 0x400b4000 +page_prot = 0x25 avl_height = 2244 vm_offset = 0x0 +flags: READ WRITE MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc7952300 for 56 bytes +vm_start = 0x400b5000 vm_end = 0x400bc000 +page_prot = 0x25 avl_height = 30126 vm_offset = 0x0 +flags: READ EXEC MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc79521c0 for 56 bytes +vm_start = 0x400bc000 vm_end = 0x400bd000 +page_prot = 0x25 avl_height = -16344 vm_offset = 0x6000 +flags: READ WRITE MAYREAD MAYWRITE MAYEXEC +struct vm_area_struct at 0xc7bec6e0 for 56 bytes +vm_start = 0xbffff000 vm_end = 0xc0000000 +page_prot = 0x25 avl_height = 2244 vm_offset = 0x0 +flags: READ WRITE EXEC MAYREAD MAYWRITE MAYEXEC GROWSDOWN +kdb> diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_md.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_md.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_md.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_md.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,125 @@ +.TH MD 1 "25 September, 2001" +.SH NAME +md, mdWcN, mdr, mds, mm, mmW\- Memory manipulation commands +.SH SYNOPSIS +md [ \fIaddress-expression\fP [ \fIline-count\fP [\fIoutput-radix\fP ] ] ] +.LP +md\fIW\fRc\fIn\fR [ \fIaddress-expression\fP [ \fIline-count\fP [\fIoutput-radix\fP ] ] ] +.LP +mdr \fIaddress-expression\fP,\fIbytes\fP +.LP +mds [ \fIaddress-expression\fP [ \fIline-count\fP [\fIoutput-radix\fP ] ] ] +.LP +mm \fIaddress-expression\fP \fInew-contents\fP +.LP +mm\fIW\fR \fIaddress-expression\fP \fInew-contents\fP +.SH DESCRIPTION +The +.B md +command is used to display the contents of memory. +The \fIaddress-expression\fP may be a numeric value (decimal or +hexidecimal), a symbol name, a register name preceeded by one or more +percent symbols '%', an environment variable name preceeded by +a currency symbol '$', or a simple expression consisting of a +symbol name, an addition or subtraction character and a numeric +value (decimal or hexidecimal). +.P +If an address is specified and the \fIline-count\fP or \fIradix\fP arguments +are omitted, they default to the values of the \fBMDCOUNT\fP and \fBRADIX\fP +environment variables respectively. If the \fBMDCOUNT\fP or \fBRADIX\fP +environment variables are unset, the appropriate defaults will be used [see +\fBENVIRONMENT\fP below]. If no address is specified then md resumes +after the last address printed, using the previous values of count and +radix. The start address is rounded down to a multiple of the +BYTESPERWORD (md) or width (md\fIW\fR). +.P +md uses the current value of environment variable \fBBYTESPERWORD\fP to +read the data. When reading hardware registers that require special +widths, it is more convenient to use md\fIW\fRc\fIn\fR where \fIW\fR is +the width for this command and \fRc\fIn\fR is the number of entries to +read. For example, md1c20 reads 20 bytes, 1 at a time. To continue +printing just type md, the width and count apply to following md +commands with no parameters. \fBNote:\fR The count is the number of +repeats of the width, unlike MDCOUNT which gives the number of md lines +to print. +.P +The +.B mdr +command displays the raw contents of memory, starting at the specified +address for the specified number of bytes. +The data is printed in one line without a leading address and no +trailing character conversion. +.B mdr +is intended for interfacing with external debuggers, it is of little +use to humans. +.P +The +.B mds +command displays the contents of memory one word per line and +attempts to correlate the contents of each word with a symbol +in the symbol table. If no symbol is found, the ascii representation +of the word is printed, otherwise the symbol name and offset from +symbol value are printed. +By default the section data is printed for kernel symbols. +.P +The +.B mm +and +\fBmm\fIW\fR +commands allow modification of memory. The bytes at the address +represented by \fIaddress-expression\fP are changed to +\fInew-contents\fP. \fInew-contents\fP is allowed to be an +\fIaddress-expression\fP. +.B mm +changes a machine word, \fBmm\fIW\fR changes \fIW\fR bytes at that +address. +.SH LIMITATIONS +None. +.SH ENVIRONMENT +.TP 8 +MDCOUNT +This environment variable (default=8) defines the number of lines +that will be displayed by each invocation of the \fBmd\fP command. + +.TP 8 +RADIX +This environment variable (default=16) defines the radix used to +print the memory contents. + +.TP 8 +BYTESPERWORD +This environment variable (default=4) selects the width of output +data when printing memory contents. Select the value two to get +16-bit word output, select the value one to get byte output. + +.TP 8 +LINES +This environment variable governs the number of lines of output +that will be presented before the kernel debugger built-in pager +pauses the output. This variable only affects the functioning +of the \fBmd\fP and \fBmds\fP if the \fBMDCOUNT\fP variable +is set to a value greater than the \fBLINES\fP variable. + +.TP 8 +If the \fBNOSECT\fP environment variable is non-zero then the +section information is suppressed. +.SH SMP CONSIDERATIONS +None. +.SH EXAMPLES +.TP 8 +md %edx +Display memory starting at the address contained in register \fB%edx\fP. + +.TP 8 +mds %esp +Display stack contents symbolically. This command is quite useful +in manual stack traceback. + +.TP 8 +mm 0xc0252110 0x25 +Change the memory location at 0xc0252110 to the value 0x25. + +.TP 8 +md chrdev_table 15 +Display 15 lines (at 16 bytes per line) starting at address +represented by the symbol \fIchrdev_table\fP. diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_rd.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_rd.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_rd.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_rd.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,68 @@ +.TH RD 1 "09 March 1999" +.SH NAME +rd, rm\- Register manipulation commands +.SH SYNOPSIS +rd [c|d|u] +.LP +rm \fIregister-name\fP \fInew-contents\fP +.LP +ef
+.SH DESCRIPTION +The +.B rd +command is used to display the contents of processor registers. +Without any arguments, the rd command displays the contents of +the general register set at the point at which the kernel debugger +was entered. With the 'c' argument, the processor control registers +%cr0, %cr1, %cr2 and %cr4 are displayed, while with the 'd' argument +the processor debug registers are displayed. If the 'u' argument +is supplied, the registers for the current task as of the last +time the current task entered the kernel are displayed. +.P +The +.B rm +command allows modification of a register. The following +register names are valid: \fB%eax\fP, \fB%ebx\fP, \fB%ecx\fP, +\fB%edx\fP, \fB%esi\fP, \fB%edi\fP, \fB%esp\fP, \fB%eip\fP, +and \fB%ebp\fP. Note that if two '%' symbols are used +consecutively, the register set displayed by the 'u' argument +to the \fBrd\fP command is modified. +.P +The debug registers, \fBdr0\fP through \fBdr3\fP and both +\fBdr6\fP and \fBdr7\fP can also be modified with the \fBrm\fP +command. +.P +The +.B ef +command displays an exception frame at the specified address. +.SH LIMITATIONS +Currently the \fBrm\fP command will not allow modification of the +control registers. +.P +Currently neither the \fBrd\fP command nor the \fBrm\fP command will +display or modify the model specific registers on the Pentium +and Pentium Pro families. +.SH ENVIRONMENT +None. +.SH SMP CONSIDERATIONS +None. +.SH EXAMPLES +.TP 8 +rd +Display general register set. + +.TP 8 +rm %eax 0 +Set the contents of \fB%eax\fP to zero. This will be the +value of %eax when kdb returns from the condition which +invoked it. + +.TP 8 +rm %%eax 0 +Set the value of the \fB%eax\fP register to zero. This will +be the value the user-mode application will see upon returning +from the kernel. + +.TP 8 +rm dr0 0xc1287220 +Set the value of the \fBdr0\fB register to \f(CW0xc1287220\fP. diff -urN linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_ss.man linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_ss.man --- linux-2.4.17-rc2-virgin/Documentation/kdb/kdb_ss.man Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/kdb/kdb_ss.man Tue Dec 18 22:21:49 2001 @@ -0,0 +1,101 @@ +.TH SS 1 "24 September 2000" +.SH NAME +ss, ssb \- Single Step +.SH SYNOPSIS +ss [] +.LP +ssb +.SH DESCRIPTION +The +.B ss +command is used to execute a single instruction and return +to the kernel debugger. +.P +Both the instruction that was single-stepped and the next +instruction to execute are printed. +.P +The \fBssb\fP command will execute instructions from the +current value of the instruction pointer. Each instruction +will be printed as it is executed; execution will stop at +any instruction which would cause the flow of control to +change (e.g. branch, call, interrupt instruction, return, etc.) +.SH LIMITATIONS +None. +.SH ENVIRONMENT +None. +.SH SMP CONSIDERATIONS +Other processors are held in the kernel debugger when the instruction +is traced. Single stepping though code that requires a lock which is +in use by another processor is an exercise in futility, it will never +succeed. +.SH INTERRUPT CONSIDERATIONS +When a kdb event occurs, one cpu (the initial cpu) enters kdb state. +It uses a cross system non maskable interrupt (NMI) to interrupt the +other cpus and bring them all into kdb state. All cpus run with +interrupts disabled while they are inside kdb, this prevents most +external events from disturbing the kernel while kdb is running. +.B Note: +Disabled interrupts means that any I/O that relies on interrupts cannot +proceed while kdb is in control, devices can time out. The clock tick +is also disabled, machines will lose track of time while they are +inside kdb. +.P +Even with interrupts disabled, some NMI events will still occur, these +can disturb the kernel while you are debugging it. The initial cpu +will still accept NMI events, assuming that kdb was not entered for an +NMI event. Any cpu where you use the SS or SSB commands will accept +NMI events, even after the instruction has finished and the cpu is back +in kdb. This is an unavoidable side effect of the fact that doing +SS[B] requires the cpu to drop all the way out of kdb, including +exiting from the NMI event that brought the cpu into kdb. Under normal +circumstances the only NMI event is for the NMI oopser and that is kdb +aware so it does not disturb the kernel while kdb is running. +.P +Sometimes doing SS or SSB on ix86 will allow one interrupt to proceed, +even though the cpu is disabled for interrupts. I have not been able +to track this one down but I suspect that the interrupt was pending +when kdb was entered and it runs when kdb exits through IRET even +though the popped flags are marked as cli(). If any ix86 hardware +expert can shed some light on this problem, please notify the kdb +maintainer. +.SH EXAMPLES +.nf +.na +.ft CW +kdb> bp gendisk_head datar 4 +Data Access Breakpoint #0 at 0xc024ddf4 (gendisk_head) in dr0 is enabled on cpu 0 +for 4 bytes +kdb> go +... +[root@host /root]# cat /proc/partitions +Entering kdb on processor 0 due to Debug Exception @ 0xc01845e3 +Read/Write breakpoint #0 at 0xc024ddf4 +[0]kdb> ssb +sd_finish+0x7b: movzbl 0xc02565d4,%edx +sd_finish+0x82: leal 0xf(%edx),%eax +sd_finish+0x85: sarl $0x4,%eax +sd_finish+0x88: movl 0xc0256654,%ecx +sd_finish+0x8e: leal (%eax,%eax,4),%edx +sd_finish+0x91: leal (%eax,%edx,2),%edx +sd_finish+0x94: movl 0xc0251108,%eax +sd_finish+0x99: movl %eax,0xffffffc(%ecx,%edx,4) +sd_finish+0x9d: movl %ecx,0xc0251108 +sd_finish+0xa3: xorl %ebx,%ebx +sd_finish+0xa5: cmpb $0x0,0xc02565d4 +[0]kdb> go +[root@host /root]# + +[0]kdb> ss +sys_read: pushl %ebp +SS trap at 0xc01274c1 +sys_read+0x1: movl %esp,%ebp +[0]kdb> ss +sys_read+0x1: movl %esp,%ebp +SS trap at 0xc01274c3 +sys_read+0x3: subl $0xc,%esp +[0]kdb> ss +sys_read+0x3: subl $0xc,%esp +SS trap at 0xc01274c6 +sys_read+0x6: pushl %edi +[0]kdb> + diff -urN linux-2.4.17-rc2-virgin/Documentation/preempt-locking.txt linux-2.4.17-rc2-wli2/Documentation/preempt-locking.txt --- linux-2.4.17-rc2-virgin/Documentation/preempt-locking.txt Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/preempt-locking.txt Tue Dec 18 22:28:41 2001 @@ -0,0 +1,94 @@ + Proper Locking Under a Preemptible Kernel: + Keeping Kernel Code Preempt-Safe + Robert Love + Last Updated: 21 Oct 2001 + + +INTRODUCTION + + +A preemptible kernel creates new locking issues. The issues are the same as +those under SMP: concurrency and reentrancy. Thankfully, the Linux preemptible +kernel model leverages existing SMP locking mechanisms. Thus, the kernel +requires explicit additional locking for very few additional situations. + +This document is for all kernel hackers. Developing code in the kernel +requires protecting these situations. As you will see, these situations would +normally require a lock, where they not per-CPU. + + +RULE #1: Per-CPU data structures need explicit protection + + +Two similar problems arise. An example code snippet: + + struct this_needs_locking tux[NR_CPUS]; + tux[smp_processor_id()] = some_value; + /* task is preempted here... */ + something = tux[smp_processor_id()]; + +First, since the data is per-CPU, it may not have explicit SMP locking, but +require it otherwise. Second, when a preempted task is finally rescheduled, +the previous value of smp_processor_id may not equal the current. You must +protect these situations by disabling preemption around them. + + +RULE #2: CPU state must be protected. + + +Under preemption, the state of the CPU must be protected. This is arch- +dependent, but includes CPU structures and state not preserved over a context +switch. For example, on x86, entering and exiting FPU mode is now a critical +section that must occur while preemption is disabled. Think what would happen +if the kernel is executing a floating-point instruction and is then preempted. +Remember, the kernel does not save FPU state except for user tasks. Therefore, +upon preemption, the FPU registers will be sold to the lowest bidder. Thus, +preemption must be disabled around such regions.i + +Note, some FPU functions are already explicitly preempt safe. For example, +kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. +However, math_state_restore must be called with preemption disabled. + + +SOLUTION + + +Data protection under preemption is achieved by disabling preemption for the +duration of the critical region. + +preempt_enable() decrement the preempt counter +preempt_disable() increment the preempt counter +preempt_enable_no_resched() decrement, but do not immediately preempt + +The functions are nestable. In other words, you can call preempt_disable +n-times in a code path, and preemption will not be reenabled until the n-th +call to preempt_enable. The preempt statements define to nothing if +preemption is not enabled. + +Note that you do not need to explicitly prevent preemption if you are holding +any locks or interrupts are disabled, since preemption is implicitly disabled +in those cases. + +Example: + + cpucache_t *cc; /* this is per-CPU */ + preempt_disable(); + cc = cc_data(searchp); + if (cc && cc->avail) { + __free_block(searchp, cc_entry(cc), cc->avail); + cc->avail = 0; + } + preempt_enable(); + return 0; + +Notice how the preemption statements must encompass every reference of the +critical variables. Another example: + + int buf[NR_CPUS]; + set_cpu_val(buf); + if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n"); + spin_lock(&buf_lock); + /* ... */ + +This code is not preempt-safe, but see how easily we can fix it by simply +moving the spin_lock up two lines. diff -urN linux-2.4.17-rc2-virgin/Documentation/rtsched.txt linux-2.4.17-rc2-wli2/Documentation/rtsched.txt --- linux-2.4.17-rc2-virgin/Documentation/rtsched.txt Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/Documentation/rtsched.txt Tue Dec 18 22:28:41 2001 @@ -0,0 +1,28 @@ + + Real Time Scheduler for Linux + ============================= + +The Real Time scheduler patch gives you an option to choose to build a +kernel with MontaVista's real time scheduler in it. If you don't choose +to enable the real time scheduler the kernel will be built the same as +if you had not installed the patch. + +If you enable the real time scheduler, you may also choose a max +priority for real time tasks. The available range is 99 to 2047. +Values outside this range are quietly moved to fall in the range. + +In order to enable the real time scheduler you must use one of the +kernel configure tools to turn it on. The question appears in the +processor options section of the configuration. + +Currently the scheduler is supported on all UP and SMP machines. + +Warning: The Real Time scheduler does not honor the "allowed_cpus" +member of the task_struct, thus it will not honor any attempt to define +cpu affinity. The latest preemption patch uses cpu affinity to prevent +cpu switching during preemption. This will not work with this scheduler +and may cause failures in kernels using preemption. In addition TUX +is known to use cpu affinity. It is believed that TUX will run with out +cpu affinity, but may have degraded performance. It is also known that +some soft irq tasks may use cpu affinity to improve performance. These +tasks will still work, however, the affinity will not happen. diff -urN linux-2.4.17-rc2-virgin/MAINTAINERS linux-2.4.17-rc2-wli2/MAINTAINERS --- linux-2.4.17-rc2-virgin/MAINTAINERS Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/MAINTAINERS Thu Dec 20 19:49:13 2001 @@ -706,12 +706,12 @@ IDE DRIVER [GENERAL] P: Andre Hedrick M: andre@linux-ide.org -M: andre@aslab.com -M: andre@suse.com +M: andre@linuxdiskcert.org L: linux-kernel@vger.kernel.org W: http://www.kernel.org/pub/linux/kernel/people/hedrick/ W: http://www.linux-ide.org/ -S: Supported +W: http://www.linuxdiskcert.org/ +S: Maintained IDE/ATAPI CDROM DRIVER P: Jens Axboe @@ -1240,6 +1240,14 @@ PPP OVER ETHERNET P: Michal Ostrowski M: mostrows@styx.uwaterloo.ca +S: Maintained + +PREEMPTIBLE KERNEL +P: Robert M. Love +M: rml@tech9.net +L: linux-kernel@vger.kernel.org +L: kpreempt-tech@lists.sourceforge.net +W: http://tech9.net/rml/linux S: Maintained PROMISE DC4030 CACHING DISK CONTROLLER DRIVER diff -urN linux-2.4.17-rc2-virgin/Makefile linux-2.4.17-rc2-wli2/Makefile --- linux-2.4.17-rc2-virgin/Makefile Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/Makefile Thu Dec 20 20:05:32 2001 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 17 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc2-wli2 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) @@ -37,13 +37,16 @@ MAKEFILES = $(TOPDIR)/.config GENKSYMS = /sbin/genksyms DEPMOD = /sbin/depmod +KALLSYMS = /sbin/kallsyms MODFLAGS = -DMODULE CFLAGS_KERNEL = PERL = perl +AWK = awk +TMPPREFIX = export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ - CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK all: do-it-all @@ -87,9 +90,13 @@ # CPPFLAGS := -D__KERNEL__ -I$(HPATH) +CPPFLAGS += $(patsubst %,-I%,$(CROSS_COMPILE_INC)) CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ - -fomit-frame-pointer -fno-strict-aliasing -fno-common + -fno-strict-aliasing -fno-common +ifndef CONFIG_FRAME_POINTER +CFLAGS += -fomit-frame-pointer +endif AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) # @@ -124,6 +131,11 @@ LIBS =$(TOPDIR)/lib/lib.a SUBDIRS =kernel drivers mm fs net ipc lib +ifeq ($(CONFIG_KDB),y) +CORE_FILES += kdb/kdb.o +SUBDIRS += kdb +endif + DRIVERS-n := DRIVERS-y := DRIVERS-m := @@ -192,7 +204,7 @@ CLEAN_FILES = \ kernel/ksyms.lst include/linux/compile.h \ vmlinux System.map \ - .tmp* \ + $(TMPPREFIX).tmp* \ drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ drivers/char/conmakehash \ drivers/char/drm/*-mod.c \ @@ -230,6 +242,7 @@ scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ .menuconfig.log \ include/asm \ + kdb/gen-kdb_cmds.c \ .hdepend scripts/mkdep scripts/split-include scripts/docproc \ $(TOPDIR)/include/linux/modversions.h \ kernel.spec @@ -242,14 +255,14 @@ include arch/$(ARCH)/Makefile -export CPPFLAGS CFLAGS AFLAGS +export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS .S.s: - $(CPP) $(AFLAGS) -traditional -o $*.s $< + $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $< .S.o: - $(CC) $(AFLAGS) -traditional -c -o $*.o $< + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $< Version: dummy @rm -f include/linux/compile.h @@ -257,16 +270,42 @@ boot: vmlinux @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot +LD_VMLINUX := $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group +ifeq ($(CONFIG_KALLSYMS),y) +LD_VMLINUX_KALLSYMS := $(TMPPREFIX).tmp_kallsyms3.o +else +LD_VMLINUX_KALLSYMS := +endif + vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs - $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ - --start-group \ - $(CORE_FILES) \ - $(DRIVERS) \ - $(NETWORKS) \ - $(LIBS) \ - --end-group \ - -o vmlinux + @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" kallsyms + +.PHONY: kallsyms + +kallsyms: +ifeq ($(CONFIG_KALLSYMS),y) + @echo kallsyms pass 1 + $(LD_VMLINUX) -o $(TMPPREFIX).tmp_vmlinux1 + @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux1 > $(TMPPREFIX).tmp_kallsyms1.o + @echo kallsyms pass 2 + @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms1.o -o $(TMPPREFIX).tmp_vmlinux2 + @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux2 > $(TMPPREFIX).tmp_kallsyms2.o + @echo kallsyms pass 3 + @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms2.o -o $(TMPPREFIX).tmp_vmlinux3 + @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux3 > $(TMPPREFIX).tmp_kallsyms3.o +endif + $(LD_VMLINUX) $(LD_VMLINUX_KALLSYMS) -o $(TMPPREFIX)vmlinux +ifneq ($(TMPPREFIX),) + mv $(TMPPREFIX)vmlinux vmlinux +endif $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + @rm -f $(TMPPREFIX).tmp_vmlinux* $(TMPPREFIX).tmp_kallsyms* symlinks: rm -f include/asm diff -urN linux-2.4.17-rc2-virgin/arch/alpha/config.in linux-2.4.17-rc2-wli2/arch/alpha/config.in --- linux-2.4.17-rc2-virgin/arch/alpha/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/alpha/config.in Tue Dec 18 22:28:41 2001 @@ -216,6 +216,10 @@ then bool 'Symmetric multi-processing support' CONFIG_SMP fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi if [ "$CONFIG_SMP" = "y" ]; then define_bool CONFIG_HAVE_DEC_LOCK y diff -urN linux-2.4.17-rc2-virgin/arch/alpha/vmlinux.lds.in linux-2.4.17-rc2-wli2/arch/alpha/vmlinux.lds.in --- linux-2.4.17-rc2-virgin/arch/alpha/vmlinux.lds.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/alpha/vmlinux.lds.in Tue Dec 18 22:21:49 2001 @@ -28,6 +28,10 @@ __stop___ksymtab = .; .kstrtab : { *(.kstrtab) } + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + /* Startup code */ . = ALIGN(8192); __init_begin = .; diff -urN linux-2.4.17-rc2-virgin/arch/arm/config.in linux-2.4.17-rc2-wli2/arch/arm/config.in --- linux-2.4.17-rc2-virgin/arch/arm/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/arm/config.in Tue Dec 18 22:28:41 2001 @@ -329,6 +329,10 @@ else define_bool CONFIG_DISCONTIGMEM n fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu @@ -437,6 +441,7 @@ if [ "$CONFIG_CPU_32" = "y" -a "$CONFIG_ARCH_EBSA110" != "y" ]; then bool 'Kernel-mode alignment trap handler' CONFIG_ALIGNMENT_TRAP fi +dep_bool 'Preemptible Kernel (experimental)' CONFIG_PREEMPT $CONFIG_CPU_32 $CONFIG_EXPERIMENTAL endmenu source drivers/parport/Config.in diff -urN linux-2.4.17-rc2-virgin/arch/arm/kernel/entry-armv.S linux-2.4.17-rc2-wli2/arch/arm/kernel/entry-armv.S --- linux-2.4.17-rc2-virgin/arch/arm/kernel/entry-armv.S Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/arm/kernel/entry-armv.S Tue Dec 18 22:28:41 2001 @@ -672,6 +672,12 @@ add r4, sp, #S_SP mov r6, lr stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro +#ifdef CONFIG_PREEMPT + get_current_task r9 + ldr r8, [r9, #TSK_PREEMPT] + add r8, r8, #1 + str r8, [r9, #TSK_PREEMPT] +#endif 1: get_irqnr_and_base r0, r6, r5, lr movne r1, sp @ @@ -679,6 +685,25 @@ @ adrsvc ne, lr, 1b bne do_IRQ +#ifdef CONFIG_PREEMPT +2: ldr r8, [r9, #TSK_PREEMPT] + subs r8, r8, #1 + bne 3f + ldr r7, [r9, #TSK_NEED_RESCHED] + teq r7, #0 + beq 3f + ldr r6, .LCirqstat + ldr r0, [r6, #IRQSTAT_BH_COUNT] + teq r0, #0 + bne 3f + mov r0, #MODE_SVC + msr cpsr_c, r0 @ enable interrupts + bl SYMBOL_NAME(preempt_schedule) + mov r0, #I_BIT | MODE_SVC + msr cpsr_c, r0 @ disable interrupts + b 2b +3: str r8, [r9, #TSK_PREEMPT] +#endif ldr r0, [sp, #S_PSR] @ irqs are already disabled msr spsr, r0 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr @@ -736,6 +761,9 @@ .LCprocfns: .word SYMBOL_NAME(processor) #endif .LCfp: .word SYMBOL_NAME(fp_enter) +#ifdef CONFIG_PREEMPT +.LCirqstat: .word SYMBOL_NAME(irq_stat) +#endif irq_prio_table @@ -775,6 +803,12 @@ stmdb r8, {sp, lr}^ alignment_trap r4, r7, __temp_irq zero_fp + get_current_task tsk +#ifdef CONFIG_PREEMPT + ldr r0, [tsk, #TSK_PREEMPT] + add r0, r0, #1 + str r0, [tsk, #TSK_PREEMPT] +#endif 1: get_irqnr_and_base r0, r6, r5, lr movne r1, sp adrsvc ne, lr, 1b @@ -782,8 +816,12 @@ @ routine called with r0 = irq number, r1 = struct pt_regs * @ bne do_IRQ +#ifdef CONFIG_PREEMPT + ldr r0, [tsk, #TSK_PREEMPT] + sub r0, r0, #1 + str r0, [tsk, #TSK_PREEMPT] +#endif mov why, #0 - get_current_task tsk b ret_to_user .align 5 diff -urN linux-2.4.17-rc2-virgin/arch/arm/tools/getconstants.c linux-2.4.17-rc2-wli2/arch/arm/tools/getconstants.c --- linux-2.4.17-rc2-virgin/arch/arm/tools/getconstants.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/arm/tools/getconstants.c Tue Dec 18 22:28:41 2001 @@ -13,6 +13,7 @@ #include #include +#include /* * Make sure that the compiler and target are compatible. @@ -38,6 +39,11 @@ DEFN("TSS_SAVE", OFF_TSK(thread.save)); DEFN("TSS_FPESAVE", OFF_TSK(thread.fpstate.soft.save)); + +#ifdef CONFIG_PREEMPT +DEFN("TSK_PREEMPT", OFF_TSK(preempt_count)); +DEFN("IRQSTAT_BH_COUNT", (unsigned long)&(((irq_cpustat_t *)0)->__local_bh_count)); +#endif #ifdef CONFIG_CPU_32 DEFN("TSS_DOMAIN", OFF_TSK(thread.domain)); diff -urN linux-2.4.17-rc2-virgin/arch/arm/vmlinux-armo.lds.in linux-2.4.17-rc2-wli2/arch/arm/vmlinux-armo.lds.in --- linux-2.4.17-rc2-virgin/arch/arm/vmlinux-armo.lds.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/arm/vmlinux-armo.lds.in Tue Dec 18 22:21:49 2001 @@ -63,6 +63,10 @@ *(__ksymtab) __stop___ksymtab = .; + __start___kallsyms = .; /* All kernel symbols */ + *(__kallsyms) + __stop___kallsyms = .; + *(.got) /* Global offset table */ _etext = .; /* End of text section */ diff -urN linux-2.4.17-rc2-virgin/arch/arm/vmlinux-armv.lds.in linux-2.4.17-rc2-wli2/arch/arm/vmlinux-armv.lds.in --- linux-2.4.17-rc2-virgin/arch/arm/vmlinux-armv.lds.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/arm/vmlinux-armv.lds.in Tue Dec 18 22:21:49 2001 @@ -68,6 +68,12 @@ __stop___ksymtab = .; } + __kallsyms : { /* Kernel debugging table */ + __start___kallsyms = .; /* All kernel symbols */ + *(__kallsyms) + __stop___kallsyms = .; + } + . = ALIGN(8192); .data : { diff -urN linux-2.4.17-rc2-virgin/arch/cris/config.in linux-2.4.17-rc2-wli2/arch/cris/config.in --- linux-2.4.17-rc2-virgin/arch/cris/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/cris/config.in Tue Dec 18 22:28:41 2001 @@ -11,6 +11,10 @@ mainmenu_option next_comment comment 'Code maturity level options' bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu mainmenu_option next_comment diff -urN linux-2.4.17-rc2-virgin/arch/i386/Makefile linux-2.4.17-rc2-wli2/arch/i386/Makefile --- linux-2.4.17-rc2-virgin/arch/i386/Makefile Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/Makefile Tue Dec 18 22:21:49 2001 @@ -93,6 +93,11 @@ CORE_FILES := arch/i386/kernel/kernel.o arch/i386/mm/mm.o $(CORE_FILES) LIBS := $(TOPDIR)/arch/i386/lib/lib.a $(LIBS) $(TOPDIR)/arch/i386/lib/lib.a +ifdef CONFIG_KDB +LIBS := $(LIBS) $(TOPDIR)/arch/i386/kdb/kdba.o +SUBDIRS := $(SUBDIRS) arch/i386/kdb +endif + ifdef CONFIG_MATH_EMULATION SUBDIRS += arch/i386/math-emu DRIVERS += arch/i386/math-emu/math.o @@ -103,6 +108,11 @@ arch/i386/mm: dummy $(MAKE) linuxsubdirs SUBDIRS=arch/i386/mm + +ifdef CONFIG_KDB +arch/i386/kdb: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/i386/kdb +endif MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot diff -urN linux-2.4.17-rc2-virgin/arch/i386/config.in linux-2.4.17-rc2-wli2/arch/i386/config.in --- linux-2.4.17-rc2-virgin/arch/i386/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/config.in Tue Dec 18 22:28:41 2001 @@ -176,6 +176,10 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +bool 'Preemptible Kernel' CONFIG_PREEMPT +if [ "$CONFIG_PREEMPT" = "y" ]; then + bool 'Break selected locks' CONFIG_LOCK_BREAK +fi if [ "$CONFIG_SMP" != "y" ]; then bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC @@ -188,10 +192,17 @@ else bool 'Multiquad NUMA system' CONFIG_MULTIQUAD fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi -if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y +if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then + if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi fi + endmenu mainmenu_option next_comment @@ -413,6 +424,16 @@ bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + bool ' Built-in Kernel Debugger support' CONFIG_KDB + dep_tristate ' KDB modules' CONFIG_KDB_MODULES $CONFIG_KDB + if [ "$CONFIG_KDB" = "y" ]; then + bool ' KDB off by default' CONFIG_KDB_OFF + comment ' Load all symbols for debugging is required for KDB' + define_bool CONFIG_KALLSYMS y + else + bool ' Load all symbols for debugging' CONFIG_KALLSYMS + fi + bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER fi endmenu diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/Makefile linux-2.4.17-rc2-wli2/arch/i386/kdb/Makefile --- linux-2.4.17-rc2-virgin/arch/i386/kdb/Makefile Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/Makefile Tue Dec 18 22:21:49 2001 @@ -0,0 +1,8 @@ +O_TARGET := kdba.o +obj-y := kdba_bt.o kdba_bp.o kdba_id.o kdba_io.o kdbasupport.o i386-dis.o + +override CFLAGS := $(CFLAGS:%-pg=% ) + +EXTRA_CFLAGS += -I $(TOPDIR)/arch/$(ARCH)/kdb + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/ansidecl.h linux-2.4.17-rc2-wli2/arch/i386/kdb/ansidecl.h --- linux-2.4.17-rc2-virgin/arch/i386/kdb/ansidecl.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/ansidecl.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,196 @@ +/* ANSI and traditional C compatability macros + Copyright 1991, 1992, 1996, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* ANSI and traditional C compatibility macros + + ANSI C is assumed if __STDC__ is #defined. + + Macro ANSI C definition Traditional C definition + ----- ---- - ---------- ----------- - ---------- + PTR `void *' `char *' + LONG_DOUBLE `long double' `double' + VOLATILE `volatile' `' + SIGNED `signed' `' + PTRCONST `void *const' `char *' + ANSI_PROTOTYPES 1 not defined + + CONST is also defined, but is obsolete. Just use const. + + obsolete -- DEFUN (name, arglist, args) + + Defines function NAME. + + ARGLIST lists the arguments, separated by commas and enclosed in + parentheses. ARGLIST becomes the argument list in traditional C. + + ARGS list the arguments with their types. It becomes a prototype in + ANSI C, and the type declarations in traditional C. Arguments should + be separated with `AND'. For functions with a variable number of + arguments, the last thing listed should be `DOTS'. + + obsolete -- DEFUN_VOID (name) + + Defines a function NAME, which takes no arguments. + + obsolete -- EXFUN (name, (prototype)) -- obsolete. + + Replaced by PARAMS. Do not use; will disappear someday soon. + Was used in external function declarations. + In ANSI C it is `NAME PROTOTYPE' (so PROTOTYPE should be enclosed in + parentheses). In traditional C it is `NAME()'. + For a function that takes no arguments, PROTOTYPE should be `(void)'. + + obsolete -- PROTO (type, name, (prototype) -- obsolete. + + This one has also been replaced by PARAMS. Do not use. + + PARAMS ((args)) + + We could use the EXFUN macro to handle prototype declarations, but + the name is misleading and the result is ugly. So we just define a + simple macro to handle the parameter lists, as in: + + static int foo PARAMS ((int, char)); + + This produces: `static int foo();' or `static int foo (int, char);' + + EXFUN would have done it like this: + + static int EXFUN (foo, (int, char)); + + but the function is not external...and it's hard to visually parse + the function name out of the mess. EXFUN should be considered + obsolete; new code should be written to use PARAMS. + + DOTS is also obsolete. + + Examples: + + extern int printf PARAMS ((const char *format, ...)); +*/ + +#ifndef _ANSIDECL_H + +#define _ANSIDECL_H 1 + + +/* Every source file includes this file, + so they will all get the switch for lint. */ +/* LINTLIBRARY */ + + +#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) +/* All known AIX compilers implement these things (but don't always + define __STDC__). The RISC/OS MIPS compiler defines these things + in SVR4 mode, but does not define __STDC__. */ + +#define PTR void * +#define PTRCONST void *CONST +#define LONG_DOUBLE long double + +#ifndef IN_GCC +#define AND , +#define NOARGS void +#define VOLATILE volatile +#define SIGNED signed +#endif /* ! IN_GCC */ + +#define PARAMS(paramlist) paramlist +#define ANSI_PROTOTYPES 1 + +#define VPARAMS(ARGS) ARGS +#define VA_START(va_list,var) va_start(va_list,var) + +/* These are obsolete. Do not use. */ +#ifndef IN_GCC +#define CONST const +#define DOTS , ... +#define PROTO(type, name, arglist) type name arglist +#define EXFUN(name, proto) name proto +#define DEFUN(name, arglist, args) name(args) +#define DEFUN_VOID(name) name(void) +#endif /* ! IN_GCC */ + +#else /* Not ANSI C. */ + +#define PTR char * +#define PTRCONST PTR +#define LONG_DOUBLE double + +#ifndef IN_GCC +#define AND ; +#define NOARGS +#define VOLATILE +#define SIGNED +#endif /* !IN_GCC */ + +#ifndef const /* some systems define it in header files for non-ansi mode */ +#define const +#endif + +#define PARAMS(paramlist) () + +#define VPARAMS(ARGS) (va_alist) va_dcl +#define VA_START(va_list,var) va_start(va_list) + +/* These are obsolete. Do not use. */ +#ifndef IN_GCC +#define CONST +#define DOTS +#define PROTO(type, name, arglist) type name () +#define EXFUN(name, proto) name() +#define DEFUN(name, arglist, args) name arglist args; +#define DEFUN_VOID(name) name() +#endif /* ! IN_GCC */ + +#endif /* ANSI C. */ + +/* Define macros for some gcc attributes. This permits us to use the + macros freely, and know that they will come into play for the + version of gcc in which they are supported. */ + +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED_LABEL +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 93) +# define ATTRIBUTE_UNUSED_LABEL +# else +# define ATTRIBUTE_UNUSED_LABEL ATTRIBUTE_UNUSED +# endif /* GNUC < 2.93 */ +#endif /* ATTRIBUTE_UNUSED_LABEL */ + +#ifndef ATTRIBUTE_UNUSED +#define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif /* ATTRIBUTE_UNUSED */ + +#ifndef ATTRIBUTE_NORETURN +#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__)) +#endif /* ATTRIBUTE_NORETURN */ + +#ifndef ATTRIBUTE_PRINTF +#define ATTRIBUTE_PRINTF(m, n) __attribute__ ((format (__printf__, m, n))) +#define ATTRIBUTE_PRINTF_1 ATTRIBUTE_PRINTF(1, 2) +#define ATTRIBUTE_PRINTF_2 ATTRIBUTE_PRINTF(2, 3) +#define ATTRIBUTE_PRINTF_3 ATTRIBUTE_PRINTF(3, 4) +#define ATTRIBUTE_PRINTF_4 ATTRIBUTE_PRINTF(4, 5) +#define ATTRIBUTE_PRINTF_5 ATTRIBUTE_PRINTF(5, 6) +#endif /* ATTRIBUTE_PRINTF */ + +#endif /* ansidecl.h */ diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/bfd.h linux-2.4.17-rc2-wli2/arch/i386/kdb/bfd.h --- linux-2.4.17-rc2-virgin/arch/i386/kdb/bfd.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/bfd.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,3102 @@ +/* Main header file for the bfd library -- portable access to object files. + Copyright 1990, 91, 92, 93, 94, 95, 96, 97, 98, 1999 + Free Software Foundation, Inc. + Contributed by Cygnus Support. + +** NOTE: bfd.h and bfd-in2.h are GENERATED files. Don't change them; +** instead, change bfd-in.h or the other BFD source files processed to +** generate these files. + +This file is part of BFD, the Binary File Descriptor library. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* bfd.h -- The only header file required by users of the bfd library + +The bfd.h file is generated from bfd-in.h and various .c files; if you +change it, your changes will probably be lost. + +All the prototypes and definitions following the comment "THE FOLLOWING +IS EXTRACTED FROM THE SOURCE" are extracted from the source files for +BFD. If you change it, someone oneday will extract it from the source +again, and your changes will be lost. To save yourself from this bind, +change the definitions in the source in the bfd directory. Type "make +docs" and then "make headers" in that directory, and magically this file +will change to reflect your changes. + +If you don't have the tools to perform the extraction, then you are +safe from someone on your system trampling over your header files. +You should still maintain the equivalence between the source and this +file though; every change you make to the .c file should be reflected +here. */ + +#ifndef __BFD_H_SEEN__ +#define __BFD_H_SEEN__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "ansidecl.h" + +/* These two lines get substitutions done by commands in Makefile.in. */ +#define BFD_VERSION "2.9.5.0.22" +#define BFD_ARCH_SIZE 32 +#define BFD_HOST_64BIT_LONG 0 +#if 0 +#define BFD_HOST_64_BIT +#define BFD_HOST_U_64_BIT +#endif + +#if BFD_ARCH_SIZE >= 64 +#define BFD64 +#endif + +#ifndef INLINE +#if __GNUC__ >= 2 +#define INLINE __inline__ +#else +#define INLINE +#endif +#endif + +/* forward declaration */ +typedef struct _bfd bfd; + +/* To squelch erroneous compiler warnings ("illegal pointer + combination") from the SVR3 compiler, we would like to typedef + boolean to int (it doesn't like functions which return boolean. + Making sure they are never implicitly declared to return int + doesn't seem to help). But this file is not configured based on + the host. */ +/* General rules: functions which are boolean return true on success + and false on failure (unless they're a predicate). -- bfd.doc */ +/* I'm sure this is going to break something and someone is going to + force me to change it. */ +/* typedef enum boolean {false, true} boolean; */ +/* Yup, SVR4 has a "typedef enum boolean" in -fnf */ +/* It gets worse if the host also defines a true/false enum... -sts */ +/* And even worse if your compiler has built-in boolean types... -law */ +#if defined (__GNUG__) && (__GNUC_MINOR__ > 5) +#define TRUE_FALSE_ALREADY_DEFINED +#endif +#ifdef MPW +/* Pre-emptive strike - get the file with the enum. */ +#include +#define TRUE_FALSE_ALREADY_DEFINED +#endif /* MPW */ +#ifndef TRUE_FALSE_ALREADY_DEFINED +typedef enum bfd_boolean {false, true} boolean; +#define BFD_TRUE_FALSE +#else +/* Use enum names that will appear nowhere else. */ +typedef enum bfd_boolean {bfd_fffalse, bfd_tttrue} boolean; +#endif + +/* A pointer to a position in a file. */ +/* FIXME: This should be using off_t from . + For now, try to avoid breaking stuff by not including here. + This will break on systems with 64-bit file offsets (e.g. 4.4BSD). + Probably the best long-term answer is to avoid using file_ptr AND off_t + in this header file, and to handle this in the BFD implementation + rather than in its interface. */ +/* typedef off_t file_ptr; */ +typedef long int file_ptr; + +/* Support for different sizes of target format ints and addresses. + If the type `long' is at least 64 bits, BFD_HOST_64BIT_LONG will be + set to 1 above. Otherwise, if gcc is being used, this code will + use gcc's "long long" type. Otherwise, BFD_HOST_64_BIT must be + defined above. */ + +#ifndef BFD_HOST_64_BIT +# if BFD_HOST_64BIT_LONG +# define BFD_HOST_64_BIT long +# define BFD_HOST_U_64_BIT unsigned long +# else +# ifdef __GNUC__ +# if __GNUC__ >= 2 +# define BFD_HOST_64_BIT long long +# define BFD_HOST_U_64_BIT unsigned long long +# endif /* __GNUC__ >= 2 */ +# endif /* ! defined (__GNUC__) */ +# endif /* ! BFD_HOST_64BIT_LONG */ +#endif /* ! defined (BFD_HOST_64_BIT) */ + +#ifdef BFD64 + +#ifndef BFD_HOST_64_BIT + #error No 64 bit integer type available +#endif /* ! defined (BFD_HOST_64_BIT) */ + +typedef BFD_HOST_U_64_BIT bfd_vma; +typedef BFD_HOST_64_BIT bfd_signed_vma; +typedef BFD_HOST_U_64_BIT bfd_size_type; +typedef BFD_HOST_U_64_BIT symvalue; + +#ifndef fprintf_vma +#if BFD_HOST_64BIT_LONG +#define sprintf_vma(s,x) sprintf (s, "%016lx", x) +#define fprintf_vma(f,x) fprintf (f, "%016lx", x) +#else +#define _bfd_int64_low(x) ((unsigned long) (((x) & 0xffffffff))) +#define _bfd_int64_high(x) ((unsigned long) (((x) >> 32) & 0xffffffff)) +#define fprintf_vma(s,x) \ + fprintf ((s), "%08lx%08lx", _bfd_int64_high (x), _bfd_int64_low (x)) +#define sprintf_vma(s,x) \ + sprintf ((s), "%08lx%08lx", _bfd_int64_high (x), _bfd_int64_low (x)) +#endif +#endif + +#else /* not BFD64 */ + +/* Represent a target address. Also used as a generic unsigned type + which is guaranteed to be big enough to hold any arithmetic types + we need to deal with. */ +typedef unsigned long bfd_vma; + +/* A generic signed type which is guaranteed to be big enough to hold any + arithmetic types we need to deal with. Can be assumed to be compatible + with bfd_vma in the same way that signed and unsigned ints are compatible + (as parameters, in assignment, etc). */ +typedef long bfd_signed_vma; + +typedef unsigned long symvalue; +typedef unsigned long bfd_size_type; + +/* Print a bfd_vma x on stream s. */ +#define fprintf_vma(s,x) fprintf(s, "%08lx", x) +#define sprintf_vma(s,x) sprintf(s, "%08lx", x) + +#endif /* not BFD64 */ + +#define printf_vma(x) fprintf_vma(stdout,x) + +typedef unsigned int flagword; /* 32 bits of flags */ +typedef unsigned char bfd_byte; + +/** File formats */ + +typedef enum bfd_format { + bfd_unknown = 0, /* file format is unknown */ + bfd_object, /* linker/assember/compiler output */ + bfd_archive, /* object archive file */ + bfd_core, /* core dump */ + bfd_type_end} /* marks the end; don't use it! */ + bfd_format; + +/* Values that may appear in the flags field of a BFD. These also + appear in the object_flags field of the bfd_target structure, where + they indicate the set of flags used by that backend (not all flags + are meaningful for all object file formats) (FIXME: at the moment, + the object_flags values have mostly just been copied from backend + to another, and are not necessarily correct). */ + +/* No flags. */ +#define BFD_NO_FLAGS 0x00 + +/* BFD contains relocation entries. */ +#define HAS_RELOC 0x01 + +/* BFD is directly executable. */ +#define EXEC_P 0x02 + +/* BFD has line number information (basically used for F_LNNO in a + COFF header). */ +#define HAS_LINENO 0x04 + +/* BFD has debugging information. */ +#define HAS_DEBUG 0x08 + +/* BFD has symbols. */ +#define HAS_SYMS 0x10 + +/* BFD has local symbols (basically used for F_LSYMS in a COFF + header). */ +#define HAS_LOCALS 0x20 + +/* BFD is a dynamic object. */ +#define DYNAMIC 0x40 + +/* Text section is write protected (if D_PAGED is not set, this is + like an a.out NMAGIC file) (the linker sets this by default, but + clears it for -r or -N). */ +#define WP_TEXT 0x80 + +/* BFD is dynamically paged (this is like an a.out ZMAGIC file) (the + linker sets this by default, but clears it for -r or -n or -N). */ +#define D_PAGED 0x100 + +/* BFD is relaxable (this means that bfd_relax_section may be able to + do something) (sometimes bfd_relax_section can do something even if + this is not set). */ +#define BFD_IS_RELAXABLE 0x200 + +/* This may be set before writing out a BFD to request using a + traditional format. For example, this is used to request that when + writing out an a.out object the symbols not be hashed to eliminate + duplicates. */ +#define BFD_TRADITIONAL_FORMAT 0x400 + +/* This flag indicates that the BFD contents are actually cached in + memory. If this is set, iostream points to a bfd_in_memory struct. */ +#define BFD_IN_MEMORY 0x800 + +/* symbols and relocation */ + +/* A count of carsyms (canonical archive symbols). */ +typedef unsigned long symindex; + +/* How to perform a relocation. */ +typedef const struct reloc_howto_struct reloc_howto_type; + +#define BFD_NO_MORE_SYMBOLS ((symindex) ~0) + +/* General purpose part of a symbol X; + target specific parts are in libcoff.h, libaout.h, etc. */ + +#define bfd_get_section(x) ((x)->section) +#define bfd_get_output_section(x) ((x)->section->output_section) +#define bfd_set_section(x,y) ((x)->section) = (y) +#define bfd_asymbol_base(x) ((x)->section->vma) +#define bfd_asymbol_value(x) (bfd_asymbol_base(x) + (x)->value) +#define bfd_asymbol_name(x) ((x)->name) +/*Perhaps future: #define bfd_asymbol_bfd(x) ((x)->section->owner)*/ +#define bfd_asymbol_bfd(x) ((x)->the_bfd) +#define bfd_asymbol_flavour(x) (bfd_asymbol_bfd(x)->xvec->flavour) + +/* A canonical archive symbol. */ +/* This is a type pun with struct ranlib on purpose! */ +typedef struct carsym { + char *name; + file_ptr file_offset; /* look here to find the file */ +} carsym; /* to make these you call a carsymogen */ + + +/* Used in generating armaps (archive tables of contents). + Perhaps just a forward definition would do? */ +struct orl { /* output ranlib */ + char **name; /* symbol name */ + file_ptr pos; /* bfd* or file position */ + int namidx; /* index into string table */ +}; + + +/* Linenumber stuff */ +typedef struct lineno_cache_entry { + unsigned int line_number; /* Linenumber from start of function*/ + union { + struct symbol_cache_entry *sym; /* Function name */ + unsigned long offset; /* Offset into section */ + } u; +} alent; + +/* object and core file sections */ + +#define align_power(addr, align) \ + ( ((addr) + ((1<<(align))-1)) & (-1 << (align))) + +typedef struct sec *sec_ptr; + +#define bfd_get_section_name(bfd, ptr) ((ptr)->name + 0) +#define bfd_get_section_vma(bfd, ptr) ((ptr)->vma + 0) +#define bfd_get_section_alignment(bfd, ptr) ((ptr)->alignment_power + 0) +#define bfd_section_name(bfd, ptr) ((ptr)->name) +#define bfd_section_size(bfd, ptr) (bfd_get_section_size_before_reloc(ptr)) +#define bfd_section_vma(bfd, ptr) ((ptr)->vma) +#define bfd_section_lma(bfd, ptr) ((ptr)->lma) +#define bfd_section_alignment(bfd, ptr) ((ptr)->alignment_power) +#define bfd_get_section_flags(bfd, ptr) ((ptr)->flags + 0) +#define bfd_get_section_userdata(bfd, ptr) ((ptr)->userdata) + +#define bfd_is_com_section(ptr) (((ptr)->flags & SEC_IS_COMMON) != 0) + +#define bfd_set_section_vma(bfd, ptr, val) (((ptr)->vma = (ptr)->lma= (val)), ((ptr)->user_set_vma = (boolean)true), true) +#define bfd_set_section_alignment(bfd, ptr, val) (((ptr)->alignment_power = (val)),true) +#define bfd_set_section_userdata(bfd, ptr, val) (((ptr)->userdata = (val)),true) + +typedef struct stat stat_type; + +typedef enum bfd_print_symbol +{ + bfd_print_symbol_name, + bfd_print_symbol_more, + bfd_print_symbol_all +} bfd_print_symbol_type; + +/* Information about a symbol that nm needs. */ + +typedef struct _symbol_info +{ + symvalue value; + char type; + CONST char *name; /* Symbol name. */ + unsigned char stab_type; /* Stab type. */ + char stab_other; /* Stab other. */ + short stab_desc; /* Stab desc. */ + CONST char *stab_name; /* String for stab type. */ +} symbol_info; + +/* Get the name of a stabs type code. */ + +extern const char *bfd_get_stab_name PARAMS ((int)); + +/* Hash table routines. There is no way to free up a hash table. */ + +/* An element in the hash table. Most uses will actually use a larger + structure, and an instance of this will be the first field. */ + +struct bfd_hash_entry +{ + /* Next entry for this hash code. */ + struct bfd_hash_entry *next; + /* String being hashed. */ + const char *string; + /* Hash code. This is the full hash code, not the index into the + table. */ + unsigned long hash; +}; + +/* A hash table. */ + +struct bfd_hash_table +{ + /* The hash array. */ + struct bfd_hash_entry **table; + /* The number of slots in the hash table. */ + unsigned int size; + /* A function used to create new elements in the hash table. The + first entry is itself a pointer to an element. When this + function is first invoked, this pointer will be NULL. However, + having the pointer permits a hierarchy of method functions to be + built each of which calls the function in the superclass. Thus + each function should be written to allocate a new block of memory + only if the argument is NULL. */ + struct bfd_hash_entry *(*newfunc) PARAMS ((struct bfd_hash_entry *, + struct bfd_hash_table *, + const char *)); + /* An objalloc for this hash table. This is a struct objalloc *, + but we use PTR to avoid requiring the inclusion of objalloc.h. */ + PTR memory; +}; + +/* Initialize a hash table. */ +extern boolean bfd_hash_table_init + PARAMS ((struct bfd_hash_table *, + struct bfd_hash_entry *(*) (struct bfd_hash_entry *, + struct bfd_hash_table *, + const char *))); + +/* Initialize a hash table specifying a size. */ +extern boolean bfd_hash_table_init_n + PARAMS ((struct bfd_hash_table *, + struct bfd_hash_entry *(*) (struct bfd_hash_entry *, + struct bfd_hash_table *, + const char *), + unsigned int size)); + +/* Free up a hash table. */ +extern void bfd_hash_table_free PARAMS ((struct bfd_hash_table *)); + +/* Look up a string in a hash table. If CREATE is true, a new entry + will be created for this string if one does not already exist. The + COPY argument must be true if this routine should copy the string + into newly allocated memory when adding an entry. */ +extern struct bfd_hash_entry *bfd_hash_lookup + PARAMS ((struct bfd_hash_table *, const char *, boolean create, + boolean copy)); + +/* Replace an entry in a hash table. */ +extern void bfd_hash_replace + PARAMS ((struct bfd_hash_table *, struct bfd_hash_entry *old, + struct bfd_hash_entry *nw)); + +/* Base method for creating a hash table entry. */ +extern struct bfd_hash_entry *bfd_hash_newfunc + PARAMS ((struct bfd_hash_entry *, struct bfd_hash_table *, + const char *)); + +/* Grab some space for a hash table entry. */ +extern PTR bfd_hash_allocate PARAMS ((struct bfd_hash_table *, + unsigned int)); + +/* Traverse a hash table in a random order, calling a function on each + element. If the function returns false, the traversal stops. The + INFO argument is passed to the function. */ +extern void bfd_hash_traverse PARAMS ((struct bfd_hash_table *, + boolean (*) (struct bfd_hash_entry *, + PTR), + PTR info)); + +/* Semi-portable string concatenation in cpp. + The CAT4 hack is to avoid a problem with some strict ANSI C preprocessors. + The problem is, "32_" is not a valid preprocessing token, and we don't + want extra underscores (e.g., "nlm_32_"). The XCAT2 macro will cause the + inner CAT macros to be evaluated first, producing still-valid pp-tokens. + Then the final concatenation can be done. (Sigh.) */ +#ifndef CAT +#ifdef SABER +#define CAT(a,b) a##b +#define CAT3(a,b,c) a##b##c +#define CAT4(a,b,c,d) a##b##c##d +#else +#if defined(__STDC__) || defined(ALMOST_STDC) +#define CAT(a,b) a##b +#define CAT3(a,b,c) a##b##c +#define XCAT2(a,b) CAT(a,b) +#define CAT4(a,b,c,d) XCAT2(CAT(a,b),CAT(c,d)) +#else +#define CAT(a,b) a/**/b +#define CAT3(a,b,c) a/**/b/**/c +#define CAT4(a,b,c,d) a/**/b/**/c/**/d +#endif +#endif +#endif + +#define COFF_SWAP_TABLE (PTR) &bfd_coff_std_swap_table + +/* User program access to BFD facilities */ + +/* Direct I/O routines, for programs which know more about the object + file than BFD does. Use higher level routines if possible. */ + +extern bfd_size_type bfd_read + PARAMS ((PTR, bfd_size_type size, bfd_size_type nitems, bfd *abfd)); +extern bfd_size_type bfd_write + PARAMS ((const PTR, bfd_size_type size, bfd_size_type nitems, bfd *abfd)); +extern int bfd_seek PARAMS ((bfd *abfd, file_ptr fp, int direction)); +extern long bfd_tell PARAMS ((bfd *abfd)); +extern int bfd_flush PARAMS ((bfd *abfd)); +extern int bfd_stat PARAMS ((bfd *abfd, struct stat *)); + + +/* Cast from const char * to char * so that caller can assign to + a char * without a warning. */ +#define bfd_get_filename(abfd) ((char *) (abfd)->filename) +#define bfd_get_cacheable(abfd) ((abfd)->cacheable) +#define bfd_get_format(abfd) ((abfd)->format) +#define bfd_get_target(abfd) ((abfd)->xvec->name) +#define bfd_get_flavour(abfd) ((abfd)->xvec->flavour) +#define bfd_big_endian(abfd) ((abfd)->xvec->byteorder == BFD_ENDIAN_BIG) +#define bfd_little_endian(abfd) ((abfd)->xvec->byteorder == BFD_ENDIAN_LITTLE) +#define bfd_header_big_endian(abfd) \ + ((abfd)->xvec->header_byteorder == BFD_ENDIAN_BIG) +#define bfd_header_little_endian(abfd) \ + ((abfd)->xvec->header_byteorder == BFD_ENDIAN_LITTLE) +#define bfd_get_file_flags(abfd) ((abfd)->flags) +#define bfd_applicable_file_flags(abfd) ((abfd)->xvec->object_flags) +#define bfd_applicable_section_flags(abfd) ((abfd)->xvec->section_flags) +#define bfd_my_archive(abfd) ((abfd)->my_archive) +#define bfd_has_map(abfd) ((abfd)->has_armap) + +#define bfd_valid_reloc_types(abfd) ((abfd)->xvec->valid_reloc_types) +#define bfd_usrdata(abfd) ((abfd)->usrdata) + +#define bfd_get_start_address(abfd) ((abfd)->start_address) +#define bfd_get_symcount(abfd) ((abfd)->symcount) +#define bfd_get_outsymbols(abfd) ((abfd)->outsymbols) +#define bfd_count_sections(abfd) ((abfd)->section_count) + +#define bfd_get_symbol_leading_char(abfd) ((abfd)->xvec->symbol_leading_char) + +#define bfd_set_cacheable(abfd,bool) (((abfd)->cacheable = (boolean)(bool)), true) + +extern boolean bfd_record_phdr + PARAMS ((bfd *, unsigned long, boolean, flagword, boolean, bfd_vma, + boolean, boolean, unsigned int, struct sec **)); + +/* Byte swapping routines. */ + +bfd_vma bfd_getb64 PARAMS ((const unsigned char *)); +bfd_vma bfd_getl64 PARAMS ((const unsigned char *)); +bfd_signed_vma bfd_getb_signed_64 PARAMS ((const unsigned char *)); +bfd_signed_vma bfd_getl_signed_64 PARAMS ((const unsigned char *)); +bfd_vma bfd_getb32 PARAMS ((const unsigned char *)); +bfd_vma bfd_getl32 PARAMS ((const unsigned char *)); +bfd_signed_vma bfd_getb_signed_32 PARAMS ((const unsigned char *)); +bfd_signed_vma bfd_getl_signed_32 PARAMS ((const unsigned char *)); +bfd_vma bfd_getb16 PARAMS ((const unsigned char *)); +bfd_vma bfd_getl16 PARAMS ((const unsigned char *)); +bfd_signed_vma bfd_getb_signed_16 PARAMS ((const unsigned char *)); +bfd_signed_vma bfd_getl_signed_16 PARAMS ((const unsigned char *)); +void bfd_putb64 PARAMS ((bfd_vma, unsigned char *)); +void bfd_putl64 PARAMS ((bfd_vma, unsigned char *)); +void bfd_putb32 PARAMS ((bfd_vma, unsigned char *)); +void bfd_putl32 PARAMS ((bfd_vma, unsigned char *)); +void bfd_putb16 PARAMS ((bfd_vma, unsigned char *)); +void bfd_putl16 PARAMS ((bfd_vma, unsigned char *)); + +/* Externally visible ECOFF routines. */ + +#if defined(__STDC__) || defined(ALMOST_STDC) +struct ecoff_debug_info; +struct ecoff_debug_swap; +struct ecoff_extr; +struct symbol_cache_entry; +struct bfd_link_info; +struct bfd_link_hash_entry; +struct bfd_elf_version_tree; +#endif +extern bfd_vma bfd_ecoff_get_gp_value PARAMS ((bfd * abfd)); +extern boolean bfd_ecoff_set_gp_value PARAMS ((bfd *abfd, bfd_vma gp_value)); +extern boolean bfd_ecoff_set_regmasks + PARAMS ((bfd *abfd, unsigned long gprmask, unsigned long fprmask, + unsigned long *cprmask)); +extern PTR bfd_ecoff_debug_init + PARAMS ((bfd *output_bfd, struct ecoff_debug_info *output_debug, + const struct ecoff_debug_swap *output_swap, + struct bfd_link_info *)); +extern void bfd_ecoff_debug_free + PARAMS ((PTR handle, bfd *output_bfd, struct ecoff_debug_info *output_debug, + const struct ecoff_debug_swap *output_swap, + struct bfd_link_info *)); +extern boolean bfd_ecoff_debug_accumulate + PARAMS ((PTR handle, bfd *output_bfd, struct ecoff_debug_info *output_debug, + const struct ecoff_debug_swap *output_swap, + bfd *input_bfd, struct ecoff_debug_info *input_debug, + const struct ecoff_debug_swap *input_swap, + struct bfd_link_info *)); +extern boolean bfd_ecoff_debug_accumulate_other + PARAMS ((PTR handle, bfd *output_bfd, struct ecoff_debug_info *output_debug, + const struct ecoff_debug_swap *output_swap, bfd *input_bfd, + struct bfd_link_info *)); +extern boolean bfd_ecoff_debug_externals + PARAMS ((bfd *abfd, struct ecoff_debug_info *debug, + const struct ecoff_debug_swap *swap, + boolean relocateable, + boolean (*get_extr) (struct symbol_cache_entry *, + struct ecoff_extr *), + void (*set_index) (struct symbol_cache_entry *, + bfd_size_type))); +extern boolean bfd_ecoff_debug_one_external + PARAMS ((bfd *abfd, struct ecoff_debug_info *debug, + const struct ecoff_debug_swap *swap, + const char *name, struct ecoff_extr *esym)); +extern bfd_size_type bfd_ecoff_debug_size + PARAMS ((bfd *abfd, struct ecoff_debug_info *debug, + const struct ecoff_debug_swap *swap)); +extern boolean bfd_ecoff_write_debug + PARAMS ((bfd *abfd, struct ecoff_debug_info *debug, + const struct ecoff_debug_swap *swap, file_ptr where)); +extern boolean bfd_ecoff_write_accumulated_debug + PARAMS ((PTR handle, bfd *abfd, struct ecoff_debug_info *debug, + const struct ecoff_debug_swap *swap, + struct bfd_link_info *info, file_ptr where)); +extern boolean bfd_mips_ecoff_create_embedded_relocs + PARAMS ((bfd *, struct bfd_link_info *, struct sec *, struct sec *, + char **)); + +/* Externally visible ELF routines. */ + +struct bfd_link_needed_list +{ + struct bfd_link_needed_list *next; + bfd *by; + const char *name; +}; + +extern boolean bfd_elf32_record_link_assignment + PARAMS ((bfd *, struct bfd_link_info *, const char *, boolean)); +extern boolean bfd_elf64_record_link_assignment + PARAMS ((bfd *, struct bfd_link_info *, const char *, boolean)); +extern struct bfd_link_needed_list *bfd_elf_get_needed_list + PARAMS ((bfd *, struct bfd_link_info *)); +extern boolean bfd_elf_get_bfd_needed_list + PARAMS ((bfd *, struct bfd_link_needed_list **)); +extern boolean bfd_elf32_size_dynamic_sections + PARAMS ((bfd *, const char *, const char *, boolean, const char *, + const char * const *, struct bfd_link_info *, struct sec **, + struct bfd_elf_version_tree *)); +extern boolean bfd_elf64_size_dynamic_sections + PARAMS ((bfd *, const char *, const char *, boolean, const char *, + const char * const *, struct bfd_link_info *, struct sec **, + struct bfd_elf_version_tree *)); +extern void bfd_elf_set_dt_needed_name PARAMS ((bfd *, const char *)); +extern const char *bfd_elf_get_dt_soname PARAMS ((bfd *)); + +/* SunOS shared library support routines for the linker. */ + +extern struct bfd_link_needed_list *bfd_sunos_get_needed_list + PARAMS ((bfd *, struct bfd_link_info *)); +extern boolean bfd_sunos_record_link_assignment + PARAMS ((bfd *, struct bfd_link_info *, const char *)); +extern boolean bfd_sunos_size_dynamic_sections + PARAMS ((bfd *, struct bfd_link_info *, struct sec **, struct sec **, + struct sec **)); + +/* Linux shared library support routines for the linker. */ + +extern boolean bfd_i386linux_size_dynamic_sections + PARAMS ((bfd *, struct bfd_link_info *)); +extern boolean bfd_m68klinux_size_dynamic_sections + PARAMS ((bfd *, struct bfd_link_info *)); +extern boolean bfd_sparclinux_size_dynamic_sections + PARAMS ((bfd *, struct bfd_link_info *)); + +/* mmap hacks */ + +struct _bfd_window_internal; +typedef struct _bfd_window_internal bfd_window_internal; + +typedef struct _bfd_window { + /* What the user asked for. */ + PTR data; + bfd_size_type size; + /* The actual window used by BFD. Small user-requested read-only + regions sharing a page may share a single window into the object + file. Read-write versions shouldn't until I've fixed things to + keep track of which portions have been claimed by the + application; don't want to give the same region back when the + application wants two writable copies! */ + struct _bfd_window_internal *i; +} bfd_window; + +extern void bfd_init_window PARAMS ((bfd_window *)); +extern void bfd_free_window PARAMS ((bfd_window *)); +extern boolean bfd_get_file_window + PARAMS ((bfd *, file_ptr, bfd_size_type, bfd_window *, boolean)); + +/* XCOFF support routines for the linker. */ + +extern boolean bfd_xcoff_link_record_set + PARAMS ((bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, + bfd_size_type)); +extern boolean bfd_xcoff_import_symbol + PARAMS ((bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, + bfd_vma, const char *, const char *, const char *)); +extern boolean bfd_xcoff_export_symbol + PARAMS ((bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, + boolean)); +extern boolean bfd_xcoff_link_count_reloc + PARAMS ((bfd *, struct bfd_link_info *, const char *)); +extern boolean bfd_xcoff_record_link_assignment + PARAMS ((bfd *, struct bfd_link_info *, const char *)); +extern boolean bfd_xcoff_size_dynamic_sections + PARAMS ((bfd *, struct bfd_link_info *, const char *, const char *, + unsigned long, unsigned long, unsigned long, boolean, + int, boolean, boolean, struct sec **)); + +/* Externally visible COFF routines. */ + +#if defined(__STDC__) || defined(ALMOST_STDC) +struct internal_syment; +union internal_auxent; +#endif + +extern boolean bfd_coff_get_syment + PARAMS ((bfd *, struct symbol_cache_entry *, struct internal_syment *)); + +extern boolean bfd_coff_get_auxent + PARAMS ((bfd *, struct symbol_cache_entry *, int, union internal_auxent *)); + +extern boolean bfd_coff_set_symbol_class + PARAMS ((bfd *, struct symbol_cache_entry *, unsigned int)); + +/* ARM Interworking support. Called from linker. */ +extern boolean bfd_arm_allocate_interworking_sections + PARAMS ((struct bfd_link_info *)); + +extern boolean bfd_arm_process_before_allocation + PARAMS ((bfd *, struct bfd_link_info *, int)); + +extern boolean bfd_arm_get_bfd_for_interworking + PARAMS ((bfd *, struct bfd_link_info *)); + +/* ELF ARM Interworking support. Called from linker. */ +extern boolean bfd_elf32_arm_allocate_interworking_sections + PARAMS ((struct bfd_link_info *)); + +extern boolean bfd_elf32_arm_process_before_allocation + PARAMS ((bfd *, struct bfd_link_info *, int)); + +extern boolean bfd_elf32_arm_get_bfd_for_interworking + PARAMS ((bfd *, struct bfd_link_info *)); + +/* And more from the source. */ +void +bfd_init PARAMS ((void)); + +bfd * +bfd_openr PARAMS ((CONST char *filename, CONST char *target)); + +bfd * +bfd_fdopenr PARAMS ((CONST char *filename, CONST char *target, int fd)); + +bfd * +bfd_openstreamr PARAMS ((const char *, const char *, PTR)); + +bfd * +bfd_openw PARAMS ((CONST char *filename, CONST char *target)); + +boolean +bfd_close PARAMS ((bfd *abfd)); + +boolean +bfd_close_all_done PARAMS ((bfd *)); + +bfd * +bfd_create PARAMS ((CONST char *filename, bfd *templ)); + +boolean +bfd_make_writable PARAMS ((bfd *abfd)); + +boolean +bfd_make_readable PARAMS ((bfd *abfd)); + + + /* Byte swapping macros for user section data. */ + +#define bfd_put_8(abfd, val, ptr) \ + ((void) (*((unsigned char *)(ptr)) = (unsigned char)(val))) +#define bfd_put_signed_8 \ + bfd_put_8 +#define bfd_get_8(abfd, ptr) \ + (*(unsigned char *)(ptr)) +#define bfd_get_signed_8(abfd, ptr) \ + ((*(unsigned char *)(ptr) ^ 0x80) - 0x80) + +#define bfd_put_16(abfd, val, ptr) \ + BFD_SEND(abfd, bfd_putx16, ((val),(ptr))) +#define bfd_put_signed_16 \ + bfd_put_16 +#define bfd_get_16(abfd, ptr) \ + BFD_SEND(abfd, bfd_getx16, (ptr)) +#define bfd_get_signed_16(abfd, ptr) \ + BFD_SEND (abfd, bfd_getx_signed_16, (ptr)) + +#define bfd_put_32(abfd, val, ptr) \ + BFD_SEND(abfd, bfd_putx32, ((val),(ptr))) +#define bfd_put_signed_32 \ + bfd_put_32 +#define bfd_get_32(abfd, ptr) \ + BFD_SEND(abfd, bfd_getx32, (ptr)) +#define bfd_get_signed_32(abfd, ptr) \ + BFD_SEND(abfd, bfd_getx_signed_32, (ptr)) + +#define bfd_put_64(abfd, val, ptr) \ + BFD_SEND(abfd, bfd_putx64, ((val), (ptr))) +#define bfd_put_signed_64 \ + bfd_put_64 +#define bfd_get_64(abfd, ptr) \ + BFD_SEND(abfd, bfd_getx64, (ptr)) +#define bfd_get_signed_64(abfd, ptr) \ + BFD_SEND(abfd, bfd_getx_signed_64, (ptr)) + +#define bfd_get(bits, abfd, ptr) \ + ((bits) == 8 ? bfd_get_8 (abfd, ptr) \ + : (bits) == 16 ? bfd_get_16 (abfd, ptr) \ + : (bits) == 32 ? bfd_get_32 (abfd, ptr) \ + : (bits) == 64 ? bfd_get_64 (abfd, ptr) \ + : (abort (), (bfd_vma) - 1)) + +#define bfd_put(bits, abfd, val, ptr) \ + ((bits) == 8 ? bfd_put_8 (abfd, val, ptr) \ + : (bits) == 16 ? bfd_put_16 (abfd, val, ptr) \ + : (bits) == 32 ? bfd_put_32 (abfd, val, ptr) \ + : (bits) == 64 ? bfd_put_64 (abfd, val, ptr) \ + : (abort (), (void) 0)) + + + /* Byte swapping macros for file header data. */ + +#define bfd_h_put_8(abfd, val, ptr) \ + bfd_put_8 (abfd, val, ptr) +#define bfd_h_put_signed_8(abfd, val, ptr) \ + bfd_put_8 (abfd, val, ptr) +#define bfd_h_get_8(abfd, ptr) \ + bfd_get_8 (abfd, ptr) +#define bfd_h_get_signed_8(abfd, ptr) \ + bfd_get_signed_8 (abfd, ptr) + +#define bfd_h_put_16(abfd, val, ptr) \ + BFD_SEND(abfd, bfd_h_putx16,(val,ptr)) +#define bfd_h_put_signed_16 \ + bfd_h_put_16 +#define bfd_h_get_16(abfd, ptr) \ + BFD_SEND(abfd, bfd_h_getx16,(ptr)) +#define bfd_h_get_signed_16(abfd, ptr) \ + BFD_SEND(abfd, bfd_h_getx_signed_16, (ptr)) + +#define bfd_h_put_32(abfd, val, ptr) \ + BFD_SEND(abfd, bfd_h_putx32,(val,ptr)) +#define bfd_h_put_signed_32 \ + bfd_h_put_32 +#define bfd_h_get_32(abfd, ptr) \ + BFD_SEND(abfd, bfd_h_getx32,(ptr)) +#define bfd_h_get_signed_32(abfd, ptr) \ + BFD_SEND(abfd, bfd_h_getx_signed_32, (ptr)) + +#define bfd_h_put_64(abfd, val, ptr) \ + BFD_SEND(abfd, bfd_h_putx64,(val, ptr)) +#define bfd_h_put_signed_64 \ + bfd_h_put_64 +#define bfd_h_get_64(abfd, ptr) \ + BFD_SEND(abfd, bfd_h_getx64,(ptr)) +#define bfd_h_get_signed_64(abfd, ptr) \ + BFD_SEND(abfd, bfd_h_getx_signed_64, (ptr)) + + /* This structure is used for a comdat section, as in PE. A comdat + section is associated with a particular symbol. When the linker + sees a comdat section, it keeps only one of the sections with a + given name and associated with a given symbol. */ + +struct bfd_comdat_info +{ + /* The name of the symbol associated with a comdat section. */ + const char *name; + + /* The local symbol table index of the symbol associated with a + comdat section. This is only meaningful to the object file format + specific code; it is not an index into the list returned by + bfd_canonicalize_symtab. */ + long symbol; + + /* If this section is being discarded, the linker uses this field + to point to the input section which is being kept. */ + struct sec *sec; +}; + +typedef struct sec +{ + /* The name of the section; the name isn't a copy, the pointer is + the same as that passed to bfd_make_section. */ + + CONST char *name; + + /* Which section is it; 0..nth. */ + + int index; + + /* The next section in the list belonging to the BFD, or NULL. */ + + struct sec *next; + + /* The field flags contains attributes of the section. Some + flags are read in from the object file, and some are + synthesized from other information. */ + + flagword flags; + +#define SEC_NO_FLAGS 0x000 + + /* Tells the OS to allocate space for this section when loading. + This is clear for a section containing debug information + only. */ +#define SEC_ALLOC 0x001 + + /* Tells the OS to load the section from the file when loading. + This is clear for a .bss section. */ +#define SEC_LOAD 0x002 + + /* The section contains data still to be relocated, so there is + some relocation information too. */ +#define SEC_RELOC 0x004 + +#if 0 /* Obsolete ? */ +#define SEC_BALIGN 0x008 +#endif + + /* A signal to the OS that the section contains read only + data. */ +#define SEC_READONLY 0x010 + + /* The section contains code only. */ +#define SEC_CODE 0x020 + + /* The section contains data only. */ +#define SEC_DATA 0x040 + + /* The section will reside in ROM. */ +#define SEC_ROM 0x080 + + /* The section contains constructor information. This section + type is used by the linker to create lists of constructors and + destructors used by <>. When a back end sees a symbol + which should be used in a constructor list, it creates a new + section for the type of name (e.g., <<__CTOR_LIST__>>), attaches + the symbol to it, and builds a relocation. To build the lists + of constructors, all the linker has to do is catenate all the + sections called <<__CTOR_LIST__>> and relocate the data + contained within - exactly the operations it would peform on + standard data. */ +#define SEC_CONSTRUCTOR 0x100 + + /* The section is a constructor, and should be placed at the + end of the text, data, or bss section(?). */ +#define SEC_CONSTRUCTOR_TEXT 0x1100 +#define SEC_CONSTRUCTOR_DATA 0x2100 +#define SEC_CONSTRUCTOR_BSS 0x3100 + + /* The section has contents - a data section could be + <> | <>; a debug section could be + <> */ +#define SEC_HAS_CONTENTS 0x200 + + /* An instruction to the linker to not output the section + even if it has information which would normally be written. */ +#define SEC_NEVER_LOAD 0x400 + + /* The section is a COFF shared library section. This flag is + only for the linker. If this type of section appears in + the input file, the linker must copy it to the output file + without changing the vma or size. FIXME: Although this + was originally intended to be general, it really is COFF + specific (and the flag was renamed to indicate this). It + might be cleaner to have some more general mechanism to + allow the back end to control what the linker does with + sections. */ +#define SEC_COFF_SHARED_LIBRARY 0x800 + + /* The section contains common symbols (symbols may be defined + multiple times, the value of a symbol is the amount of + space it requires, and the largest symbol value is the one + used). Most targets have exactly one of these (which we + translate to bfd_com_section_ptr), but ECOFF has two. */ +#define SEC_IS_COMMON 0x8000 + + /* The section contains only debugging information. For + example, this is set for ELF .debug and .stab sections. + strip tests this flag to see if a section can be + discarded. */ +#define SEC_DEBUGGING 0x10000 + + /* The contents of this section are held in memory pointed to + by the contents field. This is checked by + bfd_get_section_contents, and the data is retrieved from + memory if appropriate. */ +#define SEC_IN_MEMORY 0x20000 + + /* The contents of this section are to be excluded by the + linker for executable and shared objects unless those + objects are to be further relocated. */ +#define SEC_EXCLUDE 0x40000 + + /* The contents of this section are to be sorted by the + based on the address specified in the associated symbol + table. */ +#define SEC_SORT_ENTRIES 0x80000 + + /* When linking, duplicate sections of the same name should be + discarded, rather than being combined into a single section as + is usually done. This is similar to how common symbols are + handled. See SEC_LINK_DUPLICATES below. */ +#define SEC_LINK_ONCE 0x100000 + + /* If SEC_LINK_ONCE is set, this bitfield describes how the linker + should handle duplicate sections. */ +#define SEC_LINK_DUPLICATES 0x600000 + + /* This value for SEC_LINK_DUPLICATES means that duplicate + sections with the same name should simply be discarded. */ +#define SEC_LINK_DUPLICATES_DISCARD 0x0 + + /* This value for SEC_LINK_DUPLICATES means that the linker + should warn if there are any duplicate sections, although + it should still only link one copy. */ +#define SEC_LINK_DUPLICATES_ONE_ONLY 0x200000 + + /* This value for SEC_LINK_DUPLICATES means that the linker + should warn if any duplicate sections are a different size. */ +#define SEC_LINK_DUPLICATES_SAME_SIZE 0x400000 + + /* This value for SEC_LINK_DUPLICATES means that the linker + should warn if any duplicate sections contain different + contents. */ +#define SEC_LINK_DUPLICATES_SAME_CONTENTS 0x600000 + + /* This section was created by the linker as part of dynamic + relocation or other arcane processing. It is skipped when + going through the first-pass output, trusting that someone + else up the line will take care of it later. */ +#define SEC_LINKER_CREATED 0x800000 + + /* This section should not be subject to garbage collection. */ +#define SEC_KEEP 0x1000000 + + /* This section contains "short" data, and should be placed + "near" the GP. */ +#define SEC_SMALL_DATA 0x2000000 + + /* This section contains data which may be shared with other + executables or shared objects. */ +#define SEC_SHARED 0x4000000 + + /* End of section flags. */ + + /* Some internal packed boolean fields. */ + + /* See the vma field. */ + unsigned int user_set_vma : 1; + + /* Whether relocations have been processed. */ + unsigned int reloc_done : 1; + + /* A mark flag used by some of the linker backends. */ + unsigned int linker_mark : 1; + + /* A mark flag used by some linker backends for garbage collection. */ + unsigned int gc_mark : 1; + + /* End of internal packed boolean fields. */ + + /* The virtual memory address of the section - where it will be + at run time. The symbols are relocated against this. The + user_set_vma flag is maintained by bfd; if it's not set, the + backend can assign addresses (for example, in <>, where + the default address for <<.data>> is dependent on the specific + target and various flags). */ + + bfd_vma vma; + + /* The load address of the section - where it would be in a + rom image; really only used for writing section header + information. */ + + bfd_vma lma; + + /* The size of the section in bytes, as it will be output. + contains a value even if the section has no contents (e.g., the + size of <<.bss>>). This will be filled in after relocation */ + + bfd_size_type _cooked_size; + + /* The original size on disk of the section, in bytes. Normally this + value is the same as the size, but if some relaxing has + been done, then this value will be bigger. */ + + bfd_size_type _raw_size; + + /* If this section is going to be output, then this value is the + offset into the output section of the first byte in the input + section. E.g., if this was going to start at the 100th byte in + the output section, this value would be 100. */ + + bfd_vma output_offset; + + /* The output section through which to map on output. */ + + struct sec *output_section; + + /* The alignment requirement of the section, as an exponent of 2 - + e.g., 3 aligns to 2^3 (or 8). */ + + unsigned int alignment_power; + + /* If an input section, a pointer to a vector of relocation + records for the data in this section. */ + + struct reloc_cache_entry *relocation; + + /* If an output section, a pointer to a vector of pointers to + relocation records for the data in this section. */ + + struct reloc_cache_entry **orelocation; + + /* The number of relocation records in one of the above */ + + unsigned reloc_count; + + /* Information below is back end specific - and not always used + or updated. */ + + /* File position of section data */ + + file_ptr filepos; + + /* File position of relocation info */ + + file_ptr rel_filepos; + + /* File position of line data */ + + file_ptr line_filepos; + + /* Pointer to data for applications */ + + PTR userdata; + + /* If the SEC_IN_MEMORY flag is set, this points to the actual + contents. */ + unsigned char *contents; + + /* Attached line number information */ + + alent *lineno; + + /* Number of line number records */ + + unsigned int lineno_count; + + /* Optional information about a COMDAT entry; NULL if not COMDAT */ + + struct bfd_comdat_info *comdat; + + /* When a section is being output, this value changes as more + linenumbers are written out */ + + file_ptr moving_line_filepos; + + /* What the section number is in the target world */ + + int target_index; + + PTR used_by_bfd; + + /* If this is a constructor section then here is a list of the + relocations created to relocate items within it. */ + + struct relent_chain *constructor_chain; + + /* The BFD which owns the section. */ + + bfd *owner; + + /* A symbol which points at this section only */ + struct symbol_cache_entry *symbol; + struct symbol_cache_entry **symbol_ptr_ptr; + + struct bfd_link_order *link_order_head; + struct bfd_link_order *link_order_tail; +} asection ; + + /* These sections are global, and are managed by BFD. The application + and target back end are not permitted to change the values in + these sections. New code should use the section_ptr macros rather + than referring directly to the const sections. The const sections + may eventually vanish. */ +#define BFD_ABS_SECTION_NAME "*ABS*" +#define BFD_UND_SECTION_NAME "*UND*" +#define BFD_COM_SECTION_NAME "*COM*" +#define BFD_IND_SECTION_NAME "*IND*" + + /* the absolute section */ +extern const asection bfd_abs_section; +#define bfd_abs_section_ptr ((asection *) &bfd_abs_section) +#define bfd_is_abs_section(sec) ((sec) == bfd_abs_section_ptr) + /* Pointer to the undefined section */ +extern const asection bfd_und_section; +#define bfd_und_section_ptr ((asection *) &bfd_und_section) +#define bfd_is_und_section(sec) ((sec) == bfd_und_section_ptr) + /* Pointer to the common section */ +extern const asection bfd_com_section; +#define bfd_com_section_ptr ((asection *) &bfd_com_section) + /* Pointer to the indirect section */ +extern const asection bfd_ind_section; +#define bfd_ind_section_ptr ((asection *) &bfd_ind_section) +#define bfd_is_ind_section(sec) ((sec) == bfd_ind_section_ptr) + +extern const struct symbol_cache_entry * const bfd_abs_symbol; +extern const struct symbol_cache_entry * const bfd_com_symbol; +extern const struct symbol_cache_entry * const bfd_und_symbol; +extern const struct symbol_cache_entry * const bfd_ind_symbol; +#define bfd_get_section_size_before_reloc(section) \ + (section->reloc_done ? (abort(),1): (section)->_raw_size) +#define bfd_get_section_size_after_reloc(section) \ + ((section->reloc_done) ? (section)->_cooked_size: (abort(),1)) +asection * +bfd_get_section_by_name PARAMS ((bfd *abfd, CONST char *name)); + +asection * +bfd_make_section_old_way PARAMS ((bfd *abfd, CONST char *name)); + +asection * +bfd_make_section_anyway PARAMS ((bfd *abfd, CONST char *name)); + +asection * +bfd_make_section PARAMS ((bfd *, CONST char *name)); + +boolean +bfd_set_section_flags PARAMS ((bfd *abfd, asection *sec, flagword flags)); + +void +bfd_map_over_sections PARAMS ((bfd *abfd, + void (*func)(bfd *abfd, + asection *sect, + PTR obj), + PTR obj)); + +boolean +bfd_set_section_size PARAMS ((bfd *abfd, asection *sec, bfd_size_type val)); + +boolean +bfd_set_section_contents + PARAMS ((bfd *abfd, + asection *section, + PTR data, + file_ptr offset, + bfd_size_type count)); + +boolean +bfd_get_section_contents + PARAMS ((bfd *abfd, asection *section, PTR location, + file_ptr offset, bfd_size_type count)); + +boolean +bfd_copy_private_section_data PARAMS ((bfd *ibfd, asection *isec, bfd *obfd, asection *osec)); + +#define bfd_copy_private_section_data(ibfd, isection, obfd, osection) \ + BFD_SEND (obfd, _bfd_copy_private_section_data, \ + (ibfd, isection, obfd, osection)) +void +_bfd_strip_section_from_output + PARAMS ((asection *section)); + +enum bfd_architecture +{ + bfd_arch_unknown, /* File arch not known */ + bfd_arch_obscure, /* Arch known, not one of these */ + bfd_arch_m68k, /* Motorola 68xxx */ +#define bfd_mach_m68000 1 +#define bfd_mach_m68008 2 +#define bfd_mach_m68010 3 +#define bfd_mach_m68020 4 +#define bfd_mach_m68030 5 +#define bfd_mach_m68040 6 +#define bfd_mach_m68060 7 +#define bfd_mach_cpu32 8 + bfd_arch_vax, /* DEC Vax */ + bfd_arch_i960, /* Intel 960 */ + /* The order of the following is important. + lower number indicates a machine type that + only accepts a subset of the instructions + available to machines with higher numbers. + The exception is the "ca", which is + incompatible with all other machines except + "core". */ + +#define bfd_mach_i960_core 1 +#define bfd_mach_i960_ka_sa 2 +#define bfd_mach_i960_kb_sb 3 +#define bfd_mach_i960_mc 4 +#define bfd_mach_i960_xa 5 +#define bfd_mach_i960_ca 6 +#define bfd_mach_i960_jx 7 +#define bfd_mach_i960_hx 8 + + bfd_arch_a29k, /* AMD 29000 */ + bfd_arch_sparc, /* SPARC */ +#define bfd_mach_sparc 1 + /* The difference between v8plus and v9 is that v9 is a true 64 bit env. */ +#define bfd_mach_sparc_sparclet 2 +#define bfd_mach_sparc_sparclite 3 +#define bfd_mach_sparc_v8plus 4 +#define bfd_mach_sparc_v8plusa 5 /* with ultrasparc add'ns */ +#define bfd_mach_sparc_sparclite_le 6 +#define bfd_mach_sparc_v9 7 +#define bfd_mach_sparc_v9a 8 /* with ultrasparc add'ns */ + /* Nonzero if MACH has the v9 instruction set. */ +#define bfd_mach_sparc_v9_p(mach) \ + ((mach) >= bfd_mach_sparc_v8plus && (mach) <= bfd_mach_sparc_v9a) + bfd_arch_mips, /* MIPS Rxxxx */ +#define bfd_mach_mips3000 3000 +#define bfd_mach_mips3900 3900 +#define bfd_mach_mips4000 4000 +#define bfd_mach_mips4010 4010 +#define bfd_mach_mips4100 4100 +#define bfd_mach_mips4111 4111 +#define bfd_mach_mips4300 4300 +#define bfd_mach_mips4400 4400 +#define bfd_mach_mips4600 4600 +#define bfd_mach_mips4650 4650 +#define bfd_mach_mips5000 5000 +#define bfd_mach_mips6000 6000 +#define bfd_mach_mips8000 8000 +#define bfd_mach_mips10000 10000 +#define bfd_mach_mips16 16 + bfd_arch_i386, /* Intel 386 */ +#define bfd_mach_i386_i386 0 +#define bfd_mach_i386_i8086 1 +#define bfd_mach_i386_i386_intel_syntax 2 + bfd_arch_we32k, /* AT&T WE32xxx */ + bfd_arch_tahoe, /* CCI/Harris Tahoe */ + bfd_arch_i860, /* Intel 860 */ + bfd_arch_i370, /* IBM 360/370 Mainframes */ + bfd_arch_romp, /* IBM ROMP PC/RT */ + bfd_arch_alliant, /* Alliant */ + bfd_arch_convex, /* Convex */ + bfd_arch_m88k, /* Motorola 88xxx */ + bfd_arch_pyramid, /* Pyramid Technology */ + bfd_arch_h8300, /* Hitachi H8/300 */ +#define bfd_mach_h8300 1 +#define bfd_mach_h8300h 2 +#define bfd_mach_h8300s 3 + bfd_arch_powerpc, /* PowerPC */ + bfd_arch_rs6000, /* IBM RS/6000 */ + bfd_arch_hppa, /* HP PA RISC */ + bfd_arch_d10v, /* Mitsubishi D10V */ +#define bfd_mach_d10v 0 +#define bfd_mach_d10v_ts2 2 +#define bfd_mach_d10v_ts3 3 + bfd_arch_d30v, /* Mitsubishi D30V */ + bfd_arch_z8k, /* Zilog Z8000 */ +#define bfd_mach_z8001 1 +#define bfd_mach_z8002 2 + bfd_arch_h8500, /* Hitachi H8/500 */ + bfd_arch_sh, /* Hitachi SH */ +#define bfd_mach_sh 0 +#define bfd_mach_sh3 0x30 +#define bfd_mach_sh3e 0x3e + bfd_arch_alpha, /* Dec Alpha */ +#define bfd_mach_alpha_ev4 0x10 +#define bfd_mach_alpha_ev5 0x20 +#define bfd_mach_alpha_ev6 0x30 + bfd_arch_arm, /* Advanced Risc Machines ARM */ +#define bfd_mach_arm_2 1 +#define bfd_mach_arm_2a 2 +#define bfd_mach_arm_3 3 +#define bfd_mach_arm_3M 4 +#define bfd_mach_arm_4 5 +#define bfd_mach_arm_4T 6 +#define bfd_mach_arm_5 7 +#define bfd_mach_arm_5T 8 + bfd_arch_ns32k, /* National Semiconductors ns32000 */ + bfd_arch_w65, /* WDC 65816 */ + bfd_arch_tic30, /* Texas Instruments TMS320C30 */ + bfd_arch_tic80, /* TI TMS320c80 (MVP) */ + bfd_arch_v850, /* NEC V850 */ +#define bfd_mach_v850 0 +#define bfd_mach_v850e 'E' +#define bfd_mach_v850ea 'A' + bfd_arch_arc, /* Argonaut RISC Core */ +#define bfd_mach_arc_base 0 + bfd_arch_m32r, /* Mitsubishi M32R/D */ +#define bfd_mach_m32r 0 /* backwards compatibility */ +#define bfd_mach_m32rx 'x' + bfd_arch_mn10200, /* Matsushita MN10200 */ + bfd_arch_mn10300, /* Matsushita MN10300 */ +#define bfd_mach_mn10300 300 +#define bfd_mach_am33 330 + bfd_arch_fr30, +#define bfd_mach_fr30 0x46523330 + bfd_arch_mcore, + bfd_arch_pj, + bfd_arch_last + }; + +typedef struct bfd_arch_info +{ + int bits_per_word; + int bits_per_address; + int bits_per_byte; + enum bfd_architecture arch; + unsigned long mach; + const char *arch_name; + const char *printable_name; + unsigned int section_align_power; + /* true if this is the default machine for the architecture */ + boolean the_default; + const struct bfd_arch_info * (*compatible) + PARAMS ((const struct bfd_arch_info *a, + const struct bfd_arch_info *b)); + + boolean (*scan) PARAMS ((const struct bfd_arch_info *, const char *)); + + const struct bfd_arch_info *next; +} bfd_arch_info_type; +const char * +bfd_printable_name PARAMS ((bfd *abfd)); + +const bfd_arch_info_type * +bfd_scan_arch PARAMS ((const char *string)); + +const char ** +bfd_arch_list PARAMS ((void)); + +const bfd_arch_info_type * +bfd_arch_get_compatible PARAMS (( + const bfd *abfd, + const bfd *bbfd)); + +void +bfd_set_arch_info PARAMS ((bfd *abfd, const bfd_arch_info_type *arg)); + +enum bfd_architecture +bfd_get_arch PARAMS ((bfd *abfd)); + +unsigned long +bfd_get_mach PARAMS ((bfd *abfd)); + +unsigned int +bfd_arch_bits_per_byte PARAMS ((bfd *abfd)); + +unsigned int +bfd_arch_bits_per_address PARAMS ((bfd *abfd)); + +const bfd_arch_info_type * +bfd_get_arch_info PARAMS ((bfd *abfd)); + +const bfd_arch_info_type * +bfd_lookup_arch + PARAMS ((enum bfd_architecture + arch, + unsigned long machine)); + +const char * +bfd_printable_arch_mach + PARAMS ((enum bfd_architecture arch, unsigned long machine)); + +typedef enum bfd_reloc_status +{ + /* No errors detected */ + bfd_reloc_ok, + + /* The relocation was performed, but there was an overflow. */ + bfd_reloc_overflow, + + /* The address to relocate was not within the section supplied. */ + bfd_reloc_outofrange, + + /* Used by special functions */ + bfd_reloc_continue, + + /* Unsupported relocation size requested. */ + bfd_reloc_notsupported, + + /* Unused */ + bfd_reloc_other, + + /* The symbol to relocate against was undefined. */ + bfd_reloc_undefined, + + /* The relocation was performed, but may not be ok - presently + generated only when linking i960 coff files with i960 b.out + symbols. If this type is returned, the error_message argument + to bfd_perform_relocation will be set. */ + bfd_reloc_dangerous + } + bfd_reloc_status_type; + + +typedef struct reloc_cache_entry +{ + /* A pointer into the canonical table of pointers */ + struct symbol_cache_entry **sym_ptr_ptr; + + /* offset in section */ + bfd_size_type address; + + /* addend for relocation value */ + bfd_vma addend; + + /* Pointer to how to perform the required relocation */ + reloc_howto_type *howto; + +} arelent; +enum complain_overflow +{ + /* Do not complain on overflow. */ + complain_overflow_dont, + + /* Complain if the bitfield overflows, whether it is considered + as signed or unsigned. */ + complain_overflow_bitfield, + + /* Complain if the value overflows when considered as signed + number. */ + complain_overflow_signed, + + /* Complain if the value overflows when considered as an + unsigned number. */ + complain_overflow_unsigned +}; + +struct reloc_howto_struct +{ + /* The type field has mainly a documentary use - the back end can + do what it wants with it, though normally the back end's + external idea of what a reloc number is stored + in this field. For example, a PC relative word relocation + in a coff environment has the type 023 - because that's + what the outside world calls a R_PCRWORD reloc. */ + unsigned int type; + + /* The value the final relocation is shifted right by. This drops + unwanted data from the relocation. */ + unsigned int rightshift; + + /* The size of the item to be relocated. This is *not* a + power-of-two measure. To get the number of bytes operated + on by a type of relocation, use bfd_get_reloc_size. */ + int size; + + /* The number of bits in the item to be relocated. This is used + when doing overflow checking. */ + unsigned int bitsize; + + /* Notes that the relocation is relative to the location in the + data section of the addend. The relocation function will + subtract from the relocation value the address of the location + being relocated. */ + boolean pc_relative; + + /* The bit position of the reloc value in the destination. + The relocated value is left shifted by this amount. */ + unsigned int bitpos; + + /* What type of overflow error should be checked for when + relocating. */ + enum complain_overflow complain_on_overflow; + + /* If this field is non null, then the supplied function is + called rather than the normal function. This allows really + strange relocation methods to be accomodated (e.g., i960 callj + instructions). */ + bfd_reloc_status_type (*special_function) + PARAMS ((bfd *abfd, + arelent *reloc_entry, + struct symbol_cache_entry *symbol, + PTR data, + asection *input_section, + bfd *output_bfd, + char **error_message)); + + /* The textual name of the relocation type. */ + char *name; + + /* When performing a partial link, some formats must modify the + relocations rather than the data - this flag signals this.*/ + boolean partial_inplace; + + /* The src_mask selects which parts of the read in data + are to be used in the relocation sum. E.g., if this was an 8 bit + bit of data which we read and relocated, this would be + 0x000000ff. When we have relocs which have an addend, such as + sun4 extended relocs, the value in the offset part of a + relocating field is garbage so we never use it. In this case + the mask would be 0x00000000. */ + bfd_vma src_mask; + + /* The dst_mask selects which parts of the instruction are replaced + into the instruction. In most cases src_mask == dst_mask, + except in the above special case, where dst_mask would be + 0x000000ff, and src_mask would be 0x00000000. */ + bfd_vma dst_mask; + + /* When some formats create PC relative instructions, they leave + the value of the pc of the place being relocated in the offset + slot of the instruction, so that a PC relative relocation can + be made just by adding in an ordinary offset (e.g., sun3 a.out). + Some formats leave the displacement part of an instruction + empty (e.g., m88k bcs); this flag signals the fact.*/ + boolean pcrel_offset; + +}; +#define HOWTO(C, R,S,B, P, BI, O, SF, NAME, INPLACE, MASKSRC, MASKDST, PC) \ + {(unsigned)C,R,S,B, P, BI, O,SF,NAME,INPLACE,MASKSRC,MASKDST,PC} +#define NEWHOWTO( FUNCTION, NAME,SIZE,REL,IN) HOWTO(0,0,SIZE,0,REL,0,complain_overflow_dont,FUNCTION, NAME,false,0,0,IN) + +#define EMPTY_HOWTO(C) \ + HOWTO((C),0,0,0,false,0,complain_overflow_dont,NULL,NULL,false,0,0,false) + +#define HOWTO_PREPARE(relocation, symbol) \ + { \ + if (symbol != (asymbol *)NULL) { \ + if (bfd_is_com_section (symbol->section)) { \ + relocation = 0; \ + } \ + else { \ + relocation = symbol->value; \ + } \ + } \ +} +unsigned int +bfd_get_reloc_size PARAMS ((reloc_howto_type *)); + +typedef struct relent_chain { + arelent relent; + struct relent_chain *next; +} arelent_chain; +bfd_reloc_status_type + +bfd_check_overflow + PARAMS ((enum complain_overflow how, + unsigned int bitsize, + unsigned int rightshift, + unsigned int addrsize, + bfd_vma relocation)); + +bfd_reloc_status_type + +bfd_perform_relocation + PARAMS ((bfd *abfd, + arelent *reloc_entry, + PTR data, + asection *input_section, + bfd *output_bfd, + char **error_message)); + +bfd_reloc_status_type + +bfd_install_relocation + PARAMS ((bfd *abfd, + arelent *reloc_entry, + PTR data, bfd_vma data_start, + asection *input_section, + char **error_message)); + +enum bfd_reloc_code_real { + _dummy_first_bfd_reloc_code_real, + + +/* Basic absolute relocations of N bits. */ + BFD_RELOC_64, + BFD_RELOC_32, + BFD_RELOC_26, + BFD_RELOC_24, + BFD_RELOC_16, + BFD_RELOC_14, + BFD_RELOC_8, + +/* PC-relative relocations. Sometimes these are relative to the address +of the relocation itself; sometimes they are relative to the start of +the section containing the relocation. It depends on the specific target. + +The 24-bit relocation is used in some Intel 960 configurations. */ + BFD_RELOC_64_PCREL, + BFD_RELOC_32_PCREL, + BFD_RELOC_24_PCREL, + BFD_RELOC_16_PCREL, + BFD_RELOC_12_PCREL, + BFD_RELOC_8_PCREL, + +/* For ELF. */ + BFD_RELOC_32_GOT_PCREL, + BFD_RELOC_16_GOT_PCREL, + BFD_RELOC_8_GOT_PCREL, + BFD_RELOC_32_GOTOFF, + BFD_RELOC_16_GOTOFF, + BFD_RELOC_LO16_GOTOFF, + BFD_RELOC_HI16_GOTOFF, + BFD_RELOC_HI16_S_GOTOFF, + BFD_RELOC_8_GOTOFF, + BFD_RELOC_32_PLT_PCREL, + BFD_RELOC_24_PLT_PCREL, + BFD_RELOC_16_PLT_PCREL, + BFD_RELOC_8_PLT_PCREL, + BFD_RELOC_32_PLTOFF, + BFD_RELOC_16_PLTOFF, + BFD_RELOC_LO16_PLTOFF, + BFD_RELOC_HI16_PLTOFF, + BFD_RELOC_HI16_S_PLTOFF, + BFD_RELOC_8_PLTOFF, + +/* Relocations used by 68K ELF. */ + BFD_RELOC_68K_GLOB_DAT, + BFD_RELOC_68K_JMP_SLOT, + BFD_RELOC_68K_RELATIVE, + +/* Linkage-table relative. */ + BFD_RELOC_32_BASEREL, + BFD_RELOC_16_BASEREL, + BFD_RELOC_LO16_BASEREL, + BFD_RELOC_HI16_BASEREL, + BFD_RELOC_HI16_S_BASEREL, + BFD_RELOC_8_BASEREL, + BFD_RELOC_RVA, + +/* Absolute 8-bit relocation, but used to form an address like 0xFFnn. */ + BFD_RELOC_8_FFnn, + +/* These PC-relative relocations are stored as word displacements -- +i.e., byte displacements shifted right two bits. The 30-bit word +displacement (<<32_PCREL_S2>> -- 32 bits, shifted 2) is used on the +SPARC. (SPARC tools generally refer to this as <>.) The +signed 16-bit displacement is used on the MIPS, and the 23-bit +displacement is used on the Alpha. */ + BFD_RELOC_32_PCREL_S2, + BFD_RELOC_16_PCREL_S2, + BFD_RELOC_23_PCREL_S2, + +/* High 22 bits and low 10 bits of 32-bit value, placed into lower bits of +the target word. These are used on the SPARC. */ + BFD_RELOC_HI22, + BFD_RELOC_LO10, + +/* For systems that allocate a Global Pointer register, these are +displacements off that register. These relocation types are +handled specially, because the value the register will have is +decided relatively late. */ + BFD_RELOC_GPREL16, + BFD_RELOC_GPREL32, + +/* Reloc types used for i960/b.out. */ + BFD_RELOC_I960_CALLJ, + +/* SPARC ELF relocations. There is probably some overlap with other +relocation types already defined. */ + BFD_RELOC_NONE, + BFD_RELOC_SPARC_WDISP22, + BFD_RELOC_SPARC22, + BFD_RELOC_SPARC13, + BFD_RELOC_SPARC_GOT10, + BFD_RELOC_SPARC_GOT13, + BFD_RELOC_SPARC_GOT22, + BFD_RELOC_SPARC_PC10, + BFD_RELOC_SPARC_PC22, + BFD_RELOC_SPARC_WPLT30, + BFD_RELOC_SPARC_COPY, + BFD_RELOC_SPARC_GLOB_DAT, + BFD_RELOC_SPARC_JMP_SLOT, + BFD_RELOC_SPARC_RELATIVE, + BFD_RELOC_SPARC_UA32, + +/* I think these are specific to SPARC a.out (e.g., Sun 4). */ + BFD_RELOC_SPARC_BASE13, + BFD_RELOC_SPARC_BASE22, + +/* SPARC64 relocations */ +#define BFD_RELOC_SPARC_64 BFD_RELOC_64 + BFD_RELOC_SPARC_10, + BFD_RELOC_SPARC_11, + BFD_RELOC_SPARC_OLO10, + BFD_RELOC_SPARC_HH22, + BFD_RELOC_SPARC_HM10, + BFD_RELOC_SPARC_LM22, + BFD_RELOC_SPARC_PC_HH22, + BFD_RELOC_SPARC_PC_HM10, + BFD_RELOC_SPARC_PC_LM22, + BFD_RELOC_SPARC_WDISP16, + BFD_RELOC_SPARC_WDISP19, + BFD_RELOC_SPARC_7, + BFD_RELOC_SPARC_6, + BFD_RELOC_SPARC_5, +#define BFD_RELOC_SPARC_DISP64 BFD_RELOC_64_PCREL + BFD_RELOC_SPARC_PLT64, + BFD_RELOC_SPARC_HIX22, + BFD_RELOC_SPARC_LOX10, + BFD_RELOC_SPARC_H44, + BFD_RELOC_SPARC_M44, + BFD_RELOC_SPARC_L44, + BFD_RELOC_SPARC_REGISTER, + +/* SPARC little endian relocation */ + BFD_RELOC_SPARC_REV32, + +/* Alpha ECOFF and ELF relocations. Some of these treat the symbol or +"addend" in some special way. +For GPDISP_HI16 ("gpdisp") relocations, the symbol is ignored when +writing; when reading, it will be the absolute section symbol. The +addend is the displacement in bytes of the "lda" instruction from +the "ldah" instruction (which is at the address of this reloc). */ + BFD_RELOC_ALPHA_GPDISP_HI16, + +/* For GPDISP_LO16 ("ignore") relocations, the symbol is handled as +with GPDISP_HI16 relocs. The addend is ignored when writing the +relocations out, and is filled in with the file's GP value on +reading, for convenience. */ + BFD_RELOC_ALPHA_GPDISP_LO16, + +/* The ELF GPDISP relocation is exactly the same as the GPDISP_HI16 +relocation except that there is no accompanying GPDISP_LO16 +relocation. */ + BFD_RELOC_ALPHA_GPDISP, + +/* The Alpha LITERAL/LITUSE relocs are produced by a symbol reference; +the assembler turns it into a LDQ instruction to load the address of +the symbol, and then fills in a register in the real instruction. + +The LITERAL reloc, at the LDQ instruction, refers to the .lita +section symbol. The addend is ignored when writing, but is filled +in with the file's GP value on reading, for convenience, as with the +GPDISP_LO16 reloc. + +The ELF_LITERAL reloc is somewhere between 16_GOTOFF and GPDISP_LO16. +It should refer to the symbol to be referenced, as with 16_GOTOFF, +but it generates output not based on the position within the .got +section, but relative to the GP value chosen for the file during the +final link stage. + +The LITUSE reloc, on the instruction using the loaded address, gives +information to the linker that it might be able to use to optimize +away some literal section references. The symbol is ignored (read +as the absolute section symbol), and the "addend" indicates the type +of instruction using the register: +1 - "memory" fmt insn +2 - byte-manipulation (byte offset reg) +3 - jsr (target of branch) + +The GNU linker currently doesn't do any of this optimizing. */ + BFD_RELOC_ALPHA_LITERAL, + BFD_RELOC_ALPHA_ELF_LITERAL, + BFD_RELOC_ALPHA_LITUSE, + +/* The BFD_RELOC_ALPHA_USER_* relocations are used by the assembler to +process the explicit !!sequence relocations, and are mapped +into the normal relocations at the end of processing. */ + BFD_RELOC_ALPHA_USER_LITERAL, + BFD_RELOC_ALPHA_USER_LITUSE_BASE, + BFD_RELOC_ALPHA_USER_LITUSE_BYTOFF, + BFD_RELOC_ALPHA_USER_LITUSE_JSR, + BFD_RELOC_ALPHA_USER_GPDISP, + BFD_RELOC_ALPHA_USER_GPRELHIGH, + BFD_RELOC_ALPHA_USER_GPRELLOW, + +/* The HINT relocation indicates a value that should be filled into the +"hint" field of a jmp/jsr/ret instruction, for possible branch- +prediction logic which may be provided on some processors. */ + BFD_RELOC_ALPHA_HINT, + +/* The LINKAGE relocation outputs a linkage pair in the object file, +which is filled by the linker. */ + BFD_RELOC_ALPHA_LINKAGE, + +/* The CODEADDR relocation outputs a STO_CA in the object file, +which is filled by the linker. */ + BFD_RELOC_ALPHA_CODEADDR, + +/* Bits 27..2 of the relocation address shifted right 2 bits; +simple reloc otherwise. */ + BFD_RELOC_MIPS_JMP, + +/* The MIPS16 jump instruction. */ + BFD_RELOC_MIPS16_JMP, + +/* MIPS16 GP relative reloc. */ + BFD_RELOC_MIPS16_GPREL, + +/* High 16 bits of 32-bit value; simple reloc. */ + BFD_RELOC_HI16, + +/* High 16 bits of 32-bit value but the low 16 bits will be sign +extended and added to form the final result. If the low 16 +bits form a negative number, we need to add one to the high value +to compensate for the borrow when the low bits are added. */ + BFD_RELOC_HI16_S, + +/* Low 16 bits. */ + BFD_RELOC_LO16, + +/* Like BFD_RELOC_HI16_S, but PC relative. */ + BFD_RELOC_PCREL_HI16_S, + +/* Like BFD_RELOC_LO16, but PC relative. */ + BFD_RELOC_PCREL_LO16, + +/* Relocation relative to the global pointer. */ +#define BFD_RELOC_MIPS_GPREL BFD_RELOC_GPREL16 + +/* Relocation against a MIPS literal section. */ + BFD_RELOC_MIPS_LITERAL, + +/* MIPS ELF relocations. */ + BFD_RELOC_MIPS_GOT16, + BFD_RELOC_MIPS_CALL16, +#define BFD_RELOC_MIPS_GPREL32 BFD_RELOC_GPREL32 + BFD_RELOC_MIPS_GOT_HI16, + BFD_RELOC_MIPS_GOT_LO16, + BFD_RELOC_MIPS_CALL_HI16, + BFD_RELOC_MIPS_CALL_LO16, + BFD_RELOC_MIPS_SUB, + BFD_RELOC_MIPS_GOT_PAGE, + BFD_RELOC_MIPS_GOT_OFST, + BFD_RELOC_MIPS_GOT_DISP, + + +/* i386/elf relocations */ + BFD_RELOC_386_GOT32, + BFD_RELOC_386_PLT32, + BFD_RELOC_386_COPY, + BFD_RELOC_386_GLOB_DAT, + BFD_RELOC_386_JUMP_SLOT, + BFD_RELOC_386_RELATIVE, + BFD_RELOC_386_GOTOFF, + BFD_RELOC_386_GOTPC, + +/* ns32k relocations */ + BFD_RELOC_NS32K_IMM_8, + BFD_RELOC_NS32K_IMM_16, + BFD_RELOC_NS32K_IMM_32, + BFD_RELOC_NS32K_IMM_8_PCREL, + BFD_RELOC_NS32K_IMM_16_PCREL, + BFD_RELOC_NS32K_IMM_32_PCREL, + BFD_RELOC_NS32K_DISP_8, + BFD_RELOC_NS32K_DISP_16, + BFD_RELOC_NS32K_DISP_32, + BFD_RELOC_NS32K_DISP_8_PCREL, + BFD_RELOC_NS32K_DISP_16_PCREL, + BFD_RELOC_NS32K_DISP_32_PCREL, + +/* Picojava relocs. Not all of these appear in object files. */ + BFD_RELOC_PJ_CODE_HI16, + BFD_RELOC_PJ_CODE_LO16, + BFD_RELOC_PJ_CODE_DIR16, + BFD_RELOC_PJ_CODE_DIR32, + BFD_RELOC_PJ_CODE_REL16, + BFD_RELOC_PJ_CODE_REL32, + +/* Power(rs6000) and PowerPC relocations. */ + BFD_RELOC_PPC_B26, + BFD_RELOC_PPC_BA26, + BFD_RELOC_PPC_TOC16, + BFD_RELOC_PPC_B16, + BFD_RELOC_PPC_B16_BRTAKEN, + BFD_RELOC_PPC_B16_BRNTAKEN, + BFD_RELOC_PPC_BA16, + BFD_RELOC_PPC_BA16_BRTAKEN, + BFD_RELOC_PPC_BA16_BRNTAKEN, + BFD_RELOC_PPC_COPY, + BFD_RELOC_PPC_GLOB_DAT, + BFD_RELOC_PPC_JMP_SLOT, + BFD_RELOC_PPC_RELATIVE, + BFD_RELOC_PPC_LOCAL24PC, + BFD_RELOC_PPC_EMB_NADDR32, + BFD_RELOC_PPC_EMB_NADDR16, + BFD_RELOC_PPC_EMB_NADDR16_LO, + BFD_RELOC_PPC_EMB_NADDR16_HI, + BFD_RELOC_PPC_EMB_NADDR16_HA, + BFD_RELOC_PPC_EMB_SDAI16, + BFD_RELOC_PPC_EMB_SDA2I16, + BFD_RELOC_PPC_EMB_SDA2REL, + BFD_RELOC_PPC_EMB_SDA21, + BFD_RELOC_PPC_EMB_MRKREF, + BFD_RELOC_PPC_EMB_RELSEC16, + BFD_RELOC_PPC_EMB_RELST_LO, + BFD_RELOC_PPC_EMB_RELST_HI, + BFD_RELOC_PPC_EMB_RELST_HA, + BFD_RELOC_PPC_EMB_BIT_FLD, + BFD_RELOC_PPC_EMB_RELSDA, + +/* Instruction 370/390 relocations */ + BFD_RELOC_I370_D12, + +/* The type of reloc used to build a contructor table - at the moment +probably a 32 bit wide absolute relocation, but the target can choose. +It generally does map to one of the other relocation types. */ + BFD_RELOC_CTOR, + +/* ARM 26 bit pc-relative branch. The lowest two bits must be zero and are +not stored in the instruction. */ + BFD_RELOC_ARM_PCREL_BRANCH, + +/* These relocs are only used within the ARM assembler. They are not +(at present) written to any object files. */ + BFD_RELOC_ARM_IMMEDIATE, + BFD_RELOC_ARM_ADRL_IMMEDIATE, + BFD_RELOC_ARM_OFFSET_IMM, + BFD_RELOC_ARM_SHIFT_IMM, + BFD_RELOC_ARM_SWI, + BFD_RELOC_ARM_MULTI, + BFD_RELOC_ARM_CP_OFF_IMM, + BFD_RELOC_ARM_ADR_IMM, + BFD_RELOC_ARM_LDR_IMM, + BFD_RELOC_ARM_LITERAL, + BFD_RELOC_ARM_IN_POOL, + BFD_RELOC_ARM_OFFSET_IMM8, + BFD_RELOC_ARM_HWLITERAL, + BFD_RELOC_ARM_THUMB_ADD, + BFD_RELOC_ARM_THUMB_IMM, + BFD_RELOC_ARM_THUMB_SHIFT, + BFD_RELOC_ARM_THUMB_OFFSET, + BFD_RELOC_ARM_GOT12, + BFD_RELOC_ARM_GOT32, + BFD_RELOC_ARM_JUMP_SLOT, + BFD_RELOC_ARM_COPY, + BFD_RELOC_ARM_GLOB_DAT, + BFD_RELOC_ARM_PLT32, + BFD_RELOC_ARM_RELATIVE, + BFD_RELOC_ARM_GOTOFF, + BFD_RELOC_ARM_GOTPC, + +/* Hitachi SH relocs. Not all of these appear in object files. */ + BFD_RELOC_SH_PCDISP8BY2, + BFD_RELOC_SH_PCDISP12BY2, + BFD_RELOC_SH_IMM4, + BFD_RELOC_SH_IMM4BY2, + BFD_RELOC_SH_IMM4BY4, + BFD_RELOC_SH_IMM8, + BFD_RELOC_SH_IMM8BY2, + BFD_RELOC_SH_IMM8BY4, + BFD_RELOC_SH_PCRELIMM8BY2, + BFD_RELOC_SH_PCRELIMM8BY4, + BFD_RELOC_SH_SWITCH16, + BFD_RELOC_SH_SWITCH32, + BFD_RELOC_SH_USES, + BFD_RELOC_SH_COUNT, + BFD_RELOC_SH_ALIGN, + BFD_RELOC_SH_CODE, + BFD_RELOC_SH_DATA, + BFD_RELOC_SH_LABEL, + +/* Thumb 23-, 12- and 9-bit pc-relative branches. The lowest bit must +be zero and is not stored in the instruction. */ + BFD_RELOC_THUMB_PCREL_BRANCH9, + BFD_RELOC_THUMB_PCREL_BRANCH12, + BFD_RELOC_THUMB_PCREL_BRANCH23, + +/* Argonaut RISC Core (ARC) relocs. +ARC 22 bit pc-relative branch. The lowest two bits must be zero and are +not stored in the instruction. The high 20 bits are installed in bits 26 +through 7 of the instruction. */ + BFD_RELOC_ARC_B22_PCREL, + +/* ARC 26 bit absolute branch. The lowest two bits must be zero and are not +stored in the instruction. The high 24 bits are installed in bits 23 +through 0. */ + BFD_RELOC_ARC_B26, + +/* Mitsubishi D10V relocs. +This is a 10-bit reloc with the right 2 bits +assumed to be 0. */ + BFD_RELOC_D10V_10_PCREL_R, + +/* Mitsubishi D10V relocs. +This is a 10-bit reloc with the right 2 bits +assumed to be 0. This is the same as the previous reloc +except it is in the left container, i.e., +shifted left 15 bits. */ + BFD_RELOC_D10V_10_PCREL_L, + +/* This is an 18-bit reloc with the right 2 bits +assumed to be 0. */ + BFD_RELOC_D10V_18, + +/* This is an 18-bit reloc with the right 2 bits +assumed to be 0. */ + BFD_RELOC_D10V_18_PCREL, + +/* Mitsubishi D30V relocs. +This is a 6-bit absolute reloc. */ + BFD_RELOC_D30V_6, + +/* This is a 6-bit pc-relative reloc with +the right 3 bits assumed to be 0. */ + BFD_RELOC_D30V_9_PCREL, + +/* This is a 6-bit pc-relative reloc with +the right 3 bits assumed to be 0. Same +as the previous reloc but on the right side +of the container. */ + BFD_RELOC_D30V_9_PCREL_R, + +/* This is a 12-bit absolute reloc with the +right 3 bitsassumed to be 0. */ + BFD_RELOC_D30V_15, + +/* This is a 12-bit pc-relative reloc with +the right 3 bits assumed to be 0. */ + BFD_RELOC_D30V_15_PCREL, + +/* This is a 12-bit pc-relative reloc with +the right 3 bits assumed to be 0. Same +as the previous reloc but on the right side +of the container. */ + BFD_RELOC_D30V_15_PCREL_R, + +/* This is an 18-bit absolute reloc with +the right 3 bits assumed to be 0. */ + BFD_RELOC_D30V_21, + +/* This is an 18-bit pc-relative reloc with +the right 3 bits assumed to be 0. */ + BFD_RELOC_D30V_21_PCREL, + +/* This is an 18-bit pc-relative reloc with +the right 3 bits assumed to be 0. Same +as the previous reloc but on the right side +of the container. */ + BFD_RELOC_D30V_21_PCREL_R, + +/* This is a 32-bit absolute reloc. */ + BFD_RELOC_D30V_32, + +/* This is a 32-bit pc-relative reloc. */ + BFD_RELOC_D30V_32_PCREL, + +/* Mitsubishi M32R relocs. +This is a 24 bit absolute address. */ + BFD_RELOC_M32R_24, + +/* This is a 10-bit pc-relative reloc with the right 2 bits assumed to be 0. */ + BFD_RELOC_M32R_10_PCREL, + +/* This is an 18-bit reloc with the right 2 bits assumed to be 0. */ + BFD_RELOC_M32R_18_PCREL, + +/* This is a 26-bit reloc with the right 2 bits assumed to be 0. */ + BFD_RELOC_M32R_26_PCREL, + +/* This is a 16-bit reloc containing the high 16 bits of an address +used when the lower 16 bits are treated as unsigned. */ + BFD_RELOC_M32R_HI16_ULO, + +/* This is a 16-bit reloc containing the high 16 bits of an address +used when the lower 16 bits are treated as signed. */ + BFD_RELOC_M32R_HI16_SLO, + +/* This is a 16-bit reloc containing the lower 16 bits of an address. */ + BFD_RELOC_M32R_LO16, + +/* This is a 16-bit reloc containing the small data area offset for use in +add3, load, and store instructions. */ + BFD_RELOC_M32R_SDA16, + +/* This is a 9-bit reloc */ + BFD_RELOC_V850_9_PCREL, + +/* This is a 22-bit reloc */ + BFD_RELOC_V850_22_PCREL, + +/* This is a 16 bit offset from the short data area pointer. */ + BFD_RELOC_V850_SDA_16_16_OFFSET, + +/* This is a 16 bit offset (of which only 15 bits are used) from the +short data area pointer. */ + BFD_RELOC_V850_SDA_15_16_OFFSET, + +/* This is a 16 bit offset from the zero data area pointer. */ + BFD_RELOC_V850_ZDA_16_16_OFFSET, + +/* This is a 16 bit offset (of which only 15 bits are used) from the +zero data area pointer. */ + BFD_RELOC_V850_ZDA_15_16_OFFSET, + +/* This is an 8 bit offset (of which only 6 bits are used) from the +tiny data area pointer. */ + BFD_RELOC_V850_TDA_6_8_OFFSET, + +/* This is an 8bit offset (of which only 7 bits are used) from the tiny +data area pointer. */ + BFD_RELOC_V850_TDA_7_8_OFFSET, + +/* This is a 7 bit offset from the tiny data area pointer. */ + BFD_RELOC_V850_TDA_7_7_OFFSET, + +/* This is a 16 bit offset from the tiny data area pointer. */ + BFD_RELOC_V850_TDA_16_16_OFFSET, + +/* This is a 5 bit offset (of which only 4 bits are used) from the tiny +data area pointer. */ + BFD_RELOC_V850_TDA_4_5_OFFSET, + +/* This is a 4 bit offset from the tiny data area pointer. */ + BFD_RELOC_V850_TDA_4_4_OFFSET, + +/* This is a 16 bit offset from the short data area pointer, with the +bits placed non-contigously in the instruction. */ + BFD_RELOC_V850_SDA_16_16_SPLIT_OFFSET, + +/* This is a 16 bit offset from the zero data area pointer, with the +bits placed non-contigously in the instruction. */ + BFD_RELOC_V850_ZDA_16_16_SPLIT_OFFSET, + +/* This is a 6 bit offset from the call table base pointer. */ + BFD_RELOC_V850_CALLT_6_7_OFFSET, + +/* This is a 16 bit offset from the call table base pointer. */ + BFD_RELOC_V850_CALLT_16_16_OFFSET, + + +/* This is a 32bit pcrel reloc for the mn10300, offset by two bytes in the +instruction. */ + BFD_RELOC_MN10300_32_PCREL, + +/* This is a 16bit pcrel reloc for the mn10300, offset by two bytes in the +instruction. */ + BFD_RELOC_MN10300_16_PCREL, + +/* This is a 8bit DP reloc for the tms320c30, where the most +significant 8 bits of a 24 bit word are placed into the least +significant 8 bits of the opcode. */ + BFD_RELOC_TIC30_LDP, + +/* This is a 48 bit reloc for the FR30 that stores 32 bits. */ + BFD_RELOC_FR30_48, + +/* This is a 32 bit reloc for the FR30 that stores 20 bits split up into +two sections. */ + BFD_RELOC_FR30_20, + +/* This is a 16 bit reloc for the FR30 that stores a 6 bit word offset in +4 bits. */ + BFD_RELOC_FR30_6_IN_4, + +/* This is a 16 bit reloc for the FR30 that stores an 8 bit byte offset +into 8 bits. */ + BFD_RELOC_FR30_8_IN_8, + +/* This is a 16 bit reloc for the FR30 that stores a 9 bit short offset +into 8 bits. */ + BFD_RELOC_FR30_9_IN_8, + +/* This is a 16 bit reloc for the FR30 that stores a 10 bit word offset +into 8 bits. */ + BFD_RELOC_FR30_10_IN_8, + +/* This is a 16 bit reloc for the FR30 that stores a 9 bit pc relative +short offset into 8 bits. */ + BFD_RELOC_FR30_9_PCREL, + +/* This is a 16 bit reloc for the FR30 that stores a 12 bit pc relative +short offset into 11 bits. */ + BFD_RELOC_FR30_12_PCREL, + +/* Motorola Mcore relocations. */ + BFD_RELOC_MCORE_PCREL_IMM8BY4, + BFD_RELOC_MCORE_PCREL_IMM11BY2, + BFD_RELOC_MCORE_PCREL_IMM4BY2, + BFD_RELOC_MCORE_PCREL_32, + BFD_RELOC_MCORE_PCREL_JSR_IMM11BY2, + BFD_RELOC_MCORE_RVA, + +/* These two relocations are used by the linker to determine which of +the entries in a C++ virtual function table are actually used. When +the --gc-sections option is given, the linker will zero out the entries +that are not used, so that the code for those functions need not be +included in the output. + +VTABLE_INHERIT is a zero-space relocation used to describe to the +linker the inheritence tree of a C++ virtual function table. The +relocation's symbol should be the parent class' vtable, and the +relocation should be located at the child vtable. + +VTABLE_ENTRY is a zero-space relocation that describes the use of a +virtual function table entry. The reloc's symbol should refer to the +table of the class mentioned in the code. Off of that base, an offset +describes the entry that is being used. For Rela hosts, this offset +is stored in the reloc's addend. For Rel hosts, we are forced to put +this offset in the reloc's section offset. */ + BFD_RELOC_VTABLE_INHERIT, + BFD_RELOC_VTABLE_ENTRY, + BFD_RELOC_UNUSED }; +typedef enum bfd_reloc_code_real bfd_reloc_code_real_type; +reloc_howto_type * + +bfd_reloc_type_lookup PARAMS ((bfd *abfd, bfd_reloc_code_real_type code)); + +const char * +bfd_get_reloc_code_name PARAMS ((bfd_reloc_code_real_type code)); + + +typedef struct symbol_cache_entry +{ + /* A pointer to the BFD which owns the symbol. This information + is necessary so that a back end can work out what additional + information (invisible to the application writer) is carried + with the symbol. + + This field is *almost* redundant, since you can use section->owner + instead, except that some symbols point to the global sections + bfd_{abs,com,und}_section. This could be fixed by making + these globals be per-bfd (or per-target-flavor). FIXME. */ + + struct _bfd *the_bfd; /* Use bfd_asymbol_bfd(sym) to access this field. */ + + /* The text of the symbol. The name is left alone, and not copied; the + application may not alter it. */ + CONST char *name; + + /* The value of the symbol. This really should be a union of a + numeric value with a pointer, since some flags indicate that + a pointer to another symbol is stored here. */ + symvalue value; + + /* Attributes of a symbol: */ + +#define BSF_NO_FLAGS 0x00 + + /* The symbol has local scope; <> in <>. The value + is the offset into the section of the data. */ +#define BSF_LOCAL 0x01 + + /* The symbol has global scope; initialized data in <>. The + value is the offset into the section of the data. */ +#define BSF_GLOBAL 0x02 + + /* The symbol has global scope and is exported. The value is + the offset into the section of the data. */ +#define BSF_EXPORT BSF_GLOBAL /* no real difference */ + + /* A normal C symbol would be one of: + <>, <>, <> or + <> */ + + /* The symbol is a debugging record. The value has an arbitary + meaning, unless BSF_DEBUGGING_RELOC is also set. */ +#define BSF_DEBUGGING 0x08 + + /* The symbol denotes a function entry point. Used in ELF, + perhaps others someday. */ +#define BSF_FUNCTION 0x10 + + /* Used by the linker. */ +#define BSF_KEEP 0x20 +#define BSF_KEEP_G 0x40 + + /* A weak global symbol, overridable without warnings by + a regular global symbol of the same name. */ +#define BSF_WEAK 0x80 + + /* This symbol was created to point to a section, e.g. ELF's + STT_SECTION symbols. */ +#define BSF_SECTION_SYM 0x100 + + /* The symbol used to be a common symbol, but now it is + allocated. */ +#define BSF_OLD_COMMON 0x200 + + /* The default value for common data. */ +#define BFD_FORT_COMM_DEFAULT_VALUE 0 + + /* In some files the type of a symbol sometimes alters its + location in an output file - ie in coff a <> symbol + which is also <> symbol appears where it was + declared and not at the end of a section. This bit is set + by the target BFD part to convey this information. */ + +#define BSF_NOT_AT_END 0x400 + + /* Signal that the symbol is the label of constructor section. */ +#define BSF_CONSTRUCTOR 0x800 + + /* Signal that the symbol is a warning symbol. The name is a + warning. The name of the next symbol is the one to warn about; + if a reference is made to a symbol with the same name as the next + symbol, a warning is issued by the linker. */ +#define BSF_WARNING 0x1000 + + /* Signal that the symbol is indirect. This symbol is an indirect + pointer to the symbol with the same name as the next symbol. */ +#define BSF_INDIRECT 0x2000 + + /* BSF_FILE marks symbols that contain a file name. This is used + for ELF STT_FILE symbols. */ +#define BSF_FILE 0x4000 + + /* Symbol is from dynamic linking information. */ +#define BSF_DYNAMIC 0x8000 + + /* The symbol denotes a data object. Used in ELF, and perhaps + others someday. */ +#define BSF_OBJECT 0x10000 + + /* This symbol is a debugging symbol. The value is the offset + into the section of the data. BSF_DEBUGGING should be set + as well. */ +#define BSF_DEBUGGING_RELOC 0x20000 + + flagword flags; + + /* A pointer to the section to which this symbol is + relative. This will always be non NULL, there are special + sections for undefined and absolute symbols. */ + struct sec *section; + + /* Back end special data. */ + union + { + PTR p; + bfd_vma i; + } udata; + +} asymbol; +#define bfd_get_symtab_upper_bound(abfd) \ + BFD_SEND (abfd, _bfd_get_symtab_upper_bound, (abfd)) +boolean +bfd_is_local_label PARAMS ((bfd *abfd, asymbol *sym)); + +boolean +bfd_is_local_label_name PARAMS ((bfd *abfd, const char *name)); + +#define bfd_is_local_label_name(abfd, name) \ + BFD_SEND (abfd, _bfd_is_local_label_name, (abfd, name)) +#define bfd_canonicalize_symtab(abfd, location) \ + BFD_SEND (abfd, _bfd_canonicalize_symtab,\ + (abfd, location)) +boolean +bfd_set_symtab PARAMS ((bfd *abfd, asymbol **location, unsigned int count)); + +void +bfd_print_symbol_vandf PARAMS ((PTR file, asymbol *symbol)); + +#define bfd_make_empty_symbol(abfd) \ + BFD_SEND (abfd, _bfd_make_empty_symbol, (abfd)) +#define bfd_make_debug_symbol(abfd,ptr,size) \ + BFD_SEND (abfd, _bfd_make_debug_symbol, (abfd, ptr, size)) +int +bfd_decode_symclass PARAMS ((asymbol *symbol)); + +void +bfd_symbol_info PARAMS ((asymbol *symbol, symbol_info *ret)); + +boolean +bfd_copy_private_symbol_data PARAMS ((bfd *ibfd, asymbol *isym, bfd *obfd, asymbol *osym)); + +#define bfd_copy_private_symbol_data(ibfd, isymbol, obfd, osymbol) \ + BFD_SEND (obfd, _bfd_copy_private_symbol_data, \ + (ibfd, isymbol, obfd, osymbol)) +struct _bfd +{ + /* The filename the application opened the BFD with. */ + CONST char *filename; + + /* A pointer to the target jump table. */ + const struct bfd_target *xvec; + + /* To avoid dragging too many header files into every file that + includes `<>', IOSTREAM has been declared as a "char + *", and MTIME as a "long". Their correct types, to which they + are cast when used, are "FILE *" and "time_t". The iostream + is the result of an fopen on the filename. However, if the + BFD_IN_MEMORY flag is set, then iostream is actually a pointer + to a bfd_in_memory struct. */ + PTR iostream; + + /* Is the file descriptor being cached? That is, can it be closed as + needed, and re-opened when accessed later? */ + + boolean cacheable; + + /* Marks whether there was a default target specified when the + BFD was opened. This is used to select which matching algorithm + to use to choose the back end. */ + + boolean target_defaulted; + + /* The caching routines use these to maintain a + least-recently-used list of BFDs */ + + struct _bfd *lru_prev, *lru_next; + + /* When a file is closed by the caching routines, BFD retains + state information on the file here: */ + + file_ptr where; + + /* and here: (``once'' means at least once) */ + + boolean opened_once; + + /* Set if we have a locally maintained mtime value, rather than + getting it from the file each time: */ + + boolean mtime_set; + + /* File modified time, if mtime_set is true: */ + + long mtime; + + /* Reserved for an unimplemented file locking extension.*/ + + int ifd; + + /* The format which belongs to the BFD. (object, core, etc.) */ + + bfd_format format; + + /* The direction the BFD was opened with*/ + + enum bfd_direction {no_direction = 0, + read_direction = 1, + write_direction = 2, + both_direction = 3} direction; + + /* Format_specific flags*/ + + flagword flags; + + /* Currently my_archive is tested before adding origin to + anything. I believe that this can become always an add of + origin, with origin set to 0 for non archive files. */ + + file_ptr origin; + + /* Remember when output has begun, to stop strange things + from happening. */ + boolean output_has_begun; + + /* Pointer to linked list of sections*/ + struct sec *sections; + + /* The number of sections */ + unsigned int section_count; + + /* Stuff only useful for object files: + The start address. */ + bfd_vma start_address; + + /* Used for input and output*/ + unsigned int symcount; + + /* Symbol table for output BFD (with symcount entries) */ + struct symbol_cache_entry **outsymbols; + + /* Pointer to structure which contains architecture information*/ + const struct bfd_arch_info *arch_info; + + /* Stuff only useful for archives:*/ + PTR arelt_data; + struct _bfd *my_archive; /* The containing archive BFD. */ + struct _bfd *next; /* The next BFD in the archive. */ + struct _bfd *archive_head; /* The first BFD in the archive. */ + boolean has_armap; + + /* A chain of BFD structures involved in a link. */ + struct _bfd *link_next; + + /* A field used by _bfd_generic_link_add_archive_symbols. This will + be used only for archive elements. */ + int archive_pass; + + /* Used by the back end to hold private data. */ + + union + { + struct aout_data_struct *aout_data; + struct artdata *aout_ar_data; + struct _oasys_data *oasys_obj_data; + struct _oasys_ar_data *oasys_ar_data; + struct coff_tdata *coff_obj_data; + struct pe_tdata *pe_obj_data; + struct xcoff_tdata *xcoff_obj_data; + struct ecoff_tdata *ecoff_obj_data; + struct ieee_data_struct *ieee_data; + struct ieee_ar_data_struct *ieee_ar_data; + struct srec_data_struct *srec_data; + struct ihex_data_struct *ihex_data; + struct tekhex_data_struct *tekhex_data; + struct elf_obj_tdata *elf_obj_data; + struct nlm_obj_tdata *nlm_obj_data; + struct bout_data_struct *bout_data; + struct sun_core_struct *sun_core_data; + struct sco5_core_struct *sco5_core_data; + struct trad_core_struct *trad_core_data; + struct som_data_struct *som_data; + struct hpux_core_struct *hpux_core_data; + struct hppabsd_core_struct *hppabsd_core_data; + struct sgi_core_struct *sgi_core_data; + struct lynx_core_struct *lynx_core_data; + struct osf_core_struct *osf_core_data; + struct cisco_core_struct *cisco_core_data; + struct versados_data_struct *versados_data; + struct netbsd_core_struct *netbsd_core_data; + PTR any; + } tdata; + + /* Used by the application to hold private data*/ + PTR usrdata; + + /* Where all the allocated stuff under this BFD goes. This is a + struct objalloc *, but we use PTR to avoid requiring the inclusion of + objalloc.h. */ + PTR memory; +}; + +typedef enum bfd_error +{ + bfd_error_no_error = 0, + bfd_error_system_call, + bfd_error_invalid_target, + bfd_error_wrong_format, + bfd_error_invalid_operation, + bfd_error_no_memory, + bfd_error_no_symbols, + bfd_error_no_armap, + bfd_error_no_more_archived_files, + bfd_error_malformed_archive, + bfd_error_file_not_recognized, + bfd_error_file_ambiguously_recognized, + bfd_error_no_contents, + bfd_error_nonrepresentable_section, + bfd_error_no_debug_section, + bfd_error_bad_value, + bfd_error_file_truncated, + bfd_error_file_too_big, + bfd_error_invalid_error_code +} bfd_error_type; + +bfd_error_type +bfd_get_error PARAMS ((void)); + +void +bfd_set_error PARAMS ((bfd_error_type error_tag)); + +CONST char * +bfd_errmsg PARAMS ((bfd_error_type error_tag)); + +void +bfd_perror PARAMS ((CONST char *message)); + +typedef void (*bfd_error_handler_type) PARAMS ((const char *, ...)); + +bfd_error_handler_type +bfd_set_error_handler PARAMS ((bfd_error_handler_type)); + +void +bfd_set_error_program_name PARAMS ((const char *)); + +bfd_error_handler_type +bfd_get_error_handler PARAMS ((void)); + +long +bfd_get_reloc_upper_bound PARAMS ((bfd *abfd, asection *sect)); + +long +bfd_canonicalize_reloc + PARAMS ((bfd *abfd, + asection *sec, + arelent **loc, + asymbol **syms)); + +void +bfd_set_reloc + PARAMS ((bfd *abfd, asection *sec, arelent **rel, unsigned int count) + + ); + +boolean +bfd_set_file_flags PARAMS ((bfd *abfd, flagword flags)); + +boolean +bfd_set_start_address PARAMS ((bfd *abfd, bfd_vma vma)); + +long +bfd_get_mtime PARAMS ((bfd *abfd)); + +long +bfd_get_size PARAMS ((bfd *abfd)); + +int +bfd_get_gp_size PARAMS ((bfd *abfd)); + +void +bfd_set_gp_size PARAMS ((bfd *abfd, int i)); + +bfd_vma +bfd_scan_vma PARAMS ((CONST char *string, CONST char **end, int base)); + +boolean +bfd_copy_private_bfd_data PARAMS ((bfd *ibfd, bfd *obfd)); + +#define bfd_copy_private_bfd_data(ibfd, obfd) \ + BFD_SEND (obfd, _bfd_copy_private_bfd_data, \ + (ibfd, obfd)) +boolean +bfd_merge_private_bfd_data PARAMS ((bfd *ibfd, bfd *obfd)); + +#define bfd_merge_private_bfd_data(ibfd, obfd) \ + BFD_SEND (obfd, _bfd_merge_private_bfd_data, \ + (ibfd, obfd)) +boolean +bfd_set_private_flags PARAMS ((bfd *abfd, flagword flags)); + +#define bfd_set_private_flags(abfd, flags) \ + BFD_SEND (abfd, _bfd_set_private_flags, \ + (abfd, flags)) +#define bfd_sizeof_headers(abfd, reloc) \ + BFD_SEND (abfd, _bfd_sizeof_headers, (abfd, reloc)) + +#define bfd_find_nearest_line(abfd, sec, syms, off, file, func, line) \ + BFD_SEND (abfd, _bfd_find_nearest_line, (abfd, sec, syms, off, file, func, line)) + + /* Do these three do anything useful at all, for any back end? */ +#define bfd_debug_info_start(abfd) \ + BFD_SEND (abfd, _bfd_debug_info_start, (abfd)) + +#define bfd_debug_info_end(abfd) \ + BFD_SEND (abfd, _bfd_debug_info_end, (abfd)) + +#define bfd_debug_info_accumulate(abfd, section) \ + BFD_SEND (abfd, _bfd_debug_info_accumulate, (abfd, section)) + + +#define bfd_stat_arch_elt(abfd, stat) \ + BFD_SEND (abfd, _bfd_stat_arch_elt,(abfd, stat)) + +#define bfd_update_armap_timestamp(abfd) \ + BFD_SEND (abfd, _bfd_update_armap_timestamp, (abfd)) + +#define bfd_set_arch_mach(abfd, arch, mach)\ + BFD_SEND ( abfd, _bfd_set_arch_mach, (abfd, arch, mach)) + +#define bfd_relax_section(abfd, section, link_info, again) \ + BFD_SEND (abfd, _bfd_relax_section, (abfd, section, link_info, again)) + +#define bfd_gc_sections(abfd, link_info) \ + BFD_SEND (abfd, _bfd_gc_sections, (abfd, link_info)) + +#define bfd_link_hash_table_create(abfd) \ + BFD_SEND (abfd, _bfd_link_hash_table_create, (abfd)) + +#define bfd_link_add_symbols(abfd, info) \ + BFD_SEND (abfd, _bfd_link_add_symbols, (abfd, info)) + +#define bfd_final_link(abfd, info) \ + BFD_SEND (abfd, _bfd_final_link, (abfd, info)) + +#define bfd_free_cached_info(abfd) \ + BFD_SEND (abfd, _bfd_free_cached_info, (abfd)) + +#define bfd_get_dynamic_symtab_upper_bound(abfd) \ + BFD_SEND (abfd, _bfd_get_dynamic_symtab_upper_bound, (abfd)) + +#define bfd_print_private_bfd_data(abfd, file)\ + BFD_SEND (abfd, _bfd_print_private_bfd_data, (abfd, file)) + +#define bfd_canonicalize_dynamic_symtab(abfd, asymbols) \ + BFD_SEND (abfd, _bfd_canonicalize_dynamic_symtab, (abfd, asymbols)) + +#define bfd_get_dynamic_reloc_upper_bound(abfd) \ + BFD_SEND (abfd, _bfd_get_dynamic_reloc_upper_bound, (abfd)) + +#define bfd_canonicalize_dynamic_reloc(abfd, arels, asyms) \ + BFD_SEND (abfd, _bfd_canonicalize_dynamic_reloc, (abfd, arels, asyms)) + +extern bfd_byte *bfd_get_relocated_section_contents + PARAMS ((bfd *, struct bfd_link_info *, + struct bfd_link_order *, bfd_byte *, + boolean, asymbol **)); + +symindex +bfd_get_next_mapent PARAMS ((bfd *abfd, symindex previous, carsym **sym)); + +boolean +bfd_set_archive_head PARAMS ((bfd *output, bfd *new_head)); + +bfd * +bfd_openr_next_archived_file PARAMS ((bfd *archive, bfd *previous)); + +CONST char * +bfd_core_file_failing_command PARAMS ((bfd *abfd)); + +int +bfd_core_file_failing_signal PARAMS ((bfd *abfd)); + +boolean +core_file_matches_executable_p + PARAMS ((bfd *core_bfd, bfd *exec_bfd)); + +#define BFD_SEND(bfd, message, arglist) \ + ((*((bfd)->xvec->message)) arglist) + +#ifdef DEBUG_BFD_SEND +#undef BFD_SEND +#define BFD_SEND(bfd, message, arglist) \ + (((bfd) && (bfd)->xvec && (bfd)->xvec->message) ? \ + ((*((bfd)->xvec->message)) arglist) : \ + (bfd_assert (__FILE__,__LINE__), NULL)) +#endif +#define BFD_SEND_FMT(bfd, message, arglist) \ + (((bfd)->xvec->message[(int)((bfd)->format)]) arglist) + +#ifdef DEBUG_BFD_SEND +#undef BFD_SEND_FMT +#define BFD_SEND_FMT(bfd, message, arglist) \ + (((bfd) && (bfd)->xvec && (bfd)->xvec->message) ? \ + (((bfd)->xvec->message[(int)((bfd)->format)]) arglist) : \ + (bfd_assert (__FILE__,__LINE__), NULL)) +#endif +enum bfd_flavour { + bfd_target_unknown_flavour, + bfd_target_aout_flavour, + bfd_target_coff_flavour, + bfd_target_ecoff_flavour, + bfd_target_elf_flavour, + bfd_target_ieee_flavour, + bfd_target_nlm_flavour, + bfd_target_oasys_flavour, + bfd_target_tekhex_flavour, + bfd_target_srec_flavour, + bfd_target_ihex_flavour, + bfd_target_som_flavour, + bfd_target_os9k_flavour, + bfd_target_versados_flavour, + bfd_target_msdos_flavour, + bfd_target_ovax_flavour, + bfd_target_evax_flavour +}; + +enum bfd_endian { BFD_ENDIAN_BIG, BFD_ENDIAN_LITTLE, BFD_ENDIAN_UNKNOWN }; + + /* Forward declaration. */ +typedef struct bfd_link_info _bfd_link_info; + +typedef struct bfd_target +{ + char *name; + enum bfd_flavour flavour; + enum bfd_endian byteorder; + enum bfd_endian header_byteorder; + flagword object_flags; + flagword section_flags; + char symbol_leading_char; + char ar_pad_char; + unsigned short ar_max_namelen; + bfd_vma (*bfd_getx64) PARAMS ((const bfd_byte *)); + bfd_signed_vma (*bfd_getx_signed_64) PARAMS ((const bfd_byte *)); + void (*bfd_putx64) PARAMS ((bfd_vma, bfd_byte *)); + bfd_vma (*bfd_getx32) PARAMS ((const bfd_byte *)); + bfd_signed_vma (*bfd_getx_signed_32) PARAMS ((const bfd_byte *)); + void (*bfd_putx32) PARAMS ((bfd_vma, bfd_byte *)); + bfd_vma (*bfd_getx16) PARAMS ((const bfd_byte *)); + bfd_signed_vma (*bfd_getx_signed_16) PARAMS ((const bfd_byte *)); + void (*bfd_putx16) PARAMS ((bfd_vma, bfd_byte *)); + bfd_vma (*bfd_h_getx64) PARAMS ((const bfd_byte *)); + bfd_signed_vma (*bfd_h_getx_signed_64) PARAMS ((const bfd_byte *)); + void (*bfd_h_putx64) PARAMS ((bfd_vma, bfd_byte *)); + bfd_vma (*bfd_h_getx32) PARAMS ((const bfd_byte *)); + bfd_signed_vma (*bfd_h_getx_signed_32) PARAMS ((const bfd_byte *)); + void (*bfd_h_putx32) PARAMS ((bfd_vma, bfd_byte *)); + bfd_vma (*bfd_h_getx16) PARAMS ((const bfd_byte *)); + bfd_signed_vma (*bfd_h_getx_signed_16) PARAMS ((const bfd_byte *)); + void (*bfd_h_putx16) PARAMS ((bfd_vma, bfd_byte *)); + const struct bfd_target *(*_bfd_check_format[bfd_type_end]) PARAMS ((bfd *)); + boolean (*_bfd_set_format[bfd_type_end]) PARAMS ((bfd *)); + boolean (*_bfd_write_contents[bfd_type_end]) PARAMS ((bfd *)); + + /* Generic entry points. */ +#define BFD_JUMP_TABLE_GENERIC(NAME)\ +CAT(NAME,_close_and_cleanup),\ +CAT(NAME,_bfd_free_cached_info),\ +CAT(NAME,_new_section_hook),\ +CAT(NAME,_get_section_contents),\ +CAT(NAME,_get_section_contents_in_window) + + /* Called when the BFD is being closed to do any necessary cleanup. */ + boolean (*_close_and_cleanup) PARAMS ((bfd *)); + /* Ask the BFD to free all cached information. */ + boolean (*_bfd_free_cached_info) PARAMS ((bfd *)); + /* Called when a new section is created. */ + boolean (*_new_section_hook) PARAMS ((bfd *, sec_ptr)); + /* Read the contents of a section. */ + boolean (*_bfd_get_section_contents) PARAMS ((bfd *, sec_ptr, PTR, + file_ptr, bfd_size_type)); + boolean (*_bfd_get_section_contents_in_window) + PARAMS ((bfd *, sec_ptr, bfd_window *, + file_ptr, bfd_size_type)); + + /* Entry points to copy private data. */ +#define BFD_JUMP_TABLE_COPY(NAME)\ +CAT(NAME,_bfd_copy_private_bfd_data),\ +CAT(NAME,_bfd_merge_private_bfd_data),\ +CAT(NAME,_bfd_copy_private_section_data),\ +CAT(NAME,_bfd_copy_private_symbol_data),\ +CAT(NAME,_bfd_set_private_flags),\ +CAT(NAME,_bfd_print_private_bfd_data)\ + /* Called to copy BFD general private data from one object file + to another. */ + boolean (*_bfd_copy_private_bfd_data) PARAMS ((bfd *, bfd *)); + /* Called to merge BFD general private data from one object file + to a common output file when linking. */ + boolean (*_bfd_merge_private_bfd_data) PARAMS ((bfd *, bfd *)); + /* Called to copy BFD private section data from one object file + to another. */ + boolean (*_bfd_copy_private_section_data) PARAMS ((bfd *, sec_ptr, + bfd *, sec_ptr)); + /* Called to copy BFD private symbol data from one symbol + to another. */ + boolean (*_bfd_copy_private_symbol_data) PARAMS ((bfd *, asymbol *, + bfd *, asymbol *)); + /* Called to set private backend flags */ + boolean (*_bfd_set_private_flags) PARAMS ((bfd *, flagword)); + + /* Called to print private BFD data */ + boolean (*_bfd_print_private_bfd_data) PARAMS ((bfd *, PTR)); + + /* Core file entry points. */ +#define BFD_JUMP_TABLE_CORE(NAME)\ +CAT(NAME,_core_file_failing_command),\ +CAT(NAME,_core_file_failing_signal),\ +CAT(NAME,_core_file_matches_executable_p) + char * (*_core_file_failing_command) PARAMS ((bfd *)); + int (*_core_file_failing_signal) PARAMS ((bfd *)); + boolean (*_core_file_matches_executable_p) PARAMS ((bfd *, bfd *)); + + /* Archive entry points. */ +#define BFD_JUMP_TABLE_ARCHIVE(NAME)\ +CAT(NAME,_slurp_armap),\ +CAT(NAME,_slurp_extended_name_table),\ +CAT(NAME,_construct_extended_name_table),\ +CAT(NAME,_truncate_arname),\ +CAT(NAME,_write_armap),\ +CAT(NAME,_read_ar_hdr),\ +CAT(NAME,_openr_next_archived_file),\ +CAT(NAME,_get_elt_at_index),\ +CAT(NAME,_generic_stat_arch_elt),\ +CAT(NAME,_update_armap_timestamp) + boolean (*_bfd_slurp_armap) PARAMS ((bfd *)); + boolean (*_bfd_slurp_extended_name_table) PARAMS ((bfd *)); + boolean (*_bfd_construct_extended_name_table) + PARAMS ((bfd *, char **, bfd_size_type *, const char **)); + void (*_bfd_truncate_arname) PARAMS ((bfd *, CONST char *, char *)); + boolean (*write_armap) PARAMS ((bfd *arch, + unsigned int elength, + struct orl *map, + unsigned int orl_count, + int stridx)); + PTR (*_bfd_read_ar_hdr_fn) PARAMS ((bfd *)); + bfd * (*openr_next_archived_file) PARAMS ((bfd *arch, bfd *prev)); +#define bfd_get_elt_at_index(b,i) BFD_SEND(b, _bfd_get_elt_at_index, (b,i)) + bfd * (*_bfd_get_elt_at_index) PARAMS ((bfd *, symindex)); + int (*_bfd_stat_arch_elt) PARAMS ((bfd *, struct stat *)); + boolean (*_bfd_update_armap_timestamp) PARAMS ((bfd *)); + + /* Entry points used for symbols. */ +#define BFD_JUMP_TABLE_SYMBOLS(NAME)\ +CAT(NAME,_get_symtab_upper_bound),\ +CAT(NAME,_get_symtab),\ +CAT(NAME,_make_empty_symbol),\ +CAT(NAME,_print_symbol),\ +CAT(NAME,_get_symbol_info),\ +CAT(NAME,_bfd_is_local_label_name),\ +CAT(NAME,_get_lineno),\ +CAT(NAME,_find_nearest_line),\ +CAT(NAME,_bfd_make_debug_symbol),\ +CAT(NAME,_read_minisymbols),\ +CAT(NAME,_minisymbol_to_symbol) + long (*_bfd_get_symtab_upper_bound) PARAMS ((bfd *)); + long (*_bfd_canonicalize_symtab) PARAMS ((bfd *, + struct symbol_cache_entry **)); + struct symbol_cache_entry * + (*_bfd_make_empty_symbol) PARAMS ((bfd *)); + void (*_bfd_print_symbol) PARAMS ((bfd *, PTR, + struct symbol_cache_entry *, + bfd_print_symbol_type)); +#define bfd_print_symbol(b,p,s,e) BFD_SEND(b, _bfd_print_symbol, (b,p,s,e)) + void (*_bfd_get_symbol_info) PARAMS ((bfd *, + struct symbol_cache_entry *, + symbol_info *)); +#define bfd_get_symbol_info(b,p,e) BFD_SEND(b, _bfd_get_symbol_info, (b,p,e)) + boolean (*_bfd_is_local_label_name) PARAMS ((bfd *, const char *)); + + alent * (*_get_lineno) PARAMS ((bfd *, struct symbol_cache_entry *)); + boolean (*_bfd_find_nearest_line) PARAMS ((bfd *abfd, + struct sec *section, struct symbol_cache_entry **symbols, + bfd_vma offset, CONST char **file, CONST char **func, + unsigned int *line)); + /* Back-door to allow format-aware applications to create debug symbols + while using BFD for everything else. Currently used by the assembler + when creating COFF files. */ + asymbol * (*_bfd_make_debug_symbol) PARAMS (( + bfd *abfd, + void *ptr, + unsigned long size)); +#define bfd_read_minisymbols(b, d, m, s) \ + BFD_SEND (b, _read_minisymbols, (b, d, m, s)) + long (*_read_minisymbols) PARAMS ((bfd *, boolean, PTR *, + unsigned int *)); +#define bfd_minisymbol_to_symbol(b, d, m, f) \ + BFD_SEND (b, _minisymbol_to_symbol, (b, d, m, f)) + asymbol *(*_minisymbol_to_symbol) PARAMS ((bfd *, boolean, const PTR, + asymbol *)); + + /* Routines for relocs. */ +#define BFD_JUMP_TABLE_RELOCS(NAME)\ +CAT(NAME,_get_reloc_upper_bound),\ +CAT(NAME,_canonicalize_reloc),\ +CAT(NAME,_bfd_reloc_type_lookup) + long (*_get_reloc_upper_bound) PARAMS ((bfd *, sec_ptr)); + long (*_bfd_canonicalize_reloc) PARAMS ((bfd *, sec_ptr, arelent **, + struct symbol_cache_entry **)); + /* See documentation on reloc types. */ + reloc_howto_type * + (*reloc_type_lookup) PARAMS ((bfd *abfd, + bfd_reloc_code_real_type code)); + + /* Routines used when writing an object file. */ +#define BFD_JUMP_TABLE_WRITE(NAME)\ +CAT(NAME,_set_arch_mach),\ +CAT(NAME,_set_section_contents) + boolean (*_bfd_set_arch_mach) PARAMS ((bfd *, enum bfd_architecture, + unsigned long)); + boolean (*_bfd_set_section_contents) PARAMS ((bfd *, sec_ptr, PTR, + file_ptr, bfd_size_type)); + + /* Routines used by the linker. */ +#define BFD_JUMP_TABLE_LINK(NAME)\ +CAT(NAME,_sizeof_headers),\ +CAT(NAME,_bfd_get_relocated_section_contents),\ +CAT(NAME,_bfd_relax_section),\ +CAT(NAME,_bfd_link_hash_table_create),\ +CAT(NAME,_bfd_link_add_symbols),\ +CAT(NAME,_bfd_final_link),\ +CAT(NAME,_bfd_link_split_section),\ +CAT(NAME,_bfd_gc_sections) + int (*_bfd_sizeof_headers) PARAMS ((bfd *, boolean)); + bfd_byte * (*_bfd_get_relocated_section_contents) PARAMS ((bfd *, + struct bfd_link_info *, struct bfd_link_order *, + bfd_byte *data, boolean relocateable, + struct symbol_cache_entry **)); + + boolean (*_bfd_relax_section) PARAMS ((bfd *, struct sec *, + struct bfd_link_info *, boolean *again)); + + /* Create a hash table for the linker. Different backends store + different information in this table. */ + struct bfd_link_hash_table *(*_bfd_link_hash_table_create) PARAMS ((bfd *)); + + /* Add symbols from this object file into the hash table. */ + boolean (*_bfd_link_add_symbols) PARAMS ((bfd *, struct bfd_link_info *)); + + /* Do a link based on the link_order structures attached to each + section of the BFD. */ + boolean (*_bfd_final_link) PARAMS ((bfd *, struct bfd_link_info *)); + + /* Should this section be split up into smaller pieces during linking. */ + boolean (*_bfd_link_split_section) PARAMS ((bfd *, struct sec *)); + + /* Remove sections that are not referenced from the output. */ + boolean (*_bfd_gc_sections) PARAMS ((bfd *, struct bfd_link_info *)); + + /* Routines to handle dynamic symbols and relocs. */ +#define BFD_JUMP_TABLE_DYNAMIC(NAME)\ +CAT(NAME,_get_dynamic_symtab_upper_bound),\ +CAT(NAME,_canonicalize_dynamic_symtab),\ +CAT(NAME,_get_dynamic_reloc_upper_bound),\ +CAT(NAME,_canonicalize_dynamic_reloc) + /* Get the amount of memory required to hold the dynamic symbols. */ + long (*_bfd_get_dynamic_symtab_upper_bound) PARAMS ((bfd *)); + /* Read in the dynamic symbols. */ + long (*_bfd_canonicalize_dynamic_symtab) + PARAMS ((bfd *, struct symbol_cache_entry **)); + /* Get the amount of memory required to hold the dynamic relocs. */ + long (*_bfd_get_dynamic_reloc_upper_bound) PARAMS ((bfd *)); + /* Read in the dynamic relocs. */ + long (*_bfd_canonicalize_dynamic_reloc) + PARAMS ((bfd *, arelent **, struct symbol_cache_entry **)); + + /* Opposite endian version of this target. */ + const struct bfd_target * alternative_target; + + PTR backend_data; + +} bfd_target; +boolean +bfd_set_default_target PARAMS ((const char *name)); + +const bfd_target * +bfd_find_target PARAMS ((CONST char *target_name, bfd *abfd)); + +const char ** +bfd_target_list PARAMS ((void)); + +const bfd_target * +bfd_search_for_target PARAMS ((int (* search_func)(const bfd_target *, void *), void *)); + +boolean +bfd_check_format PARAMS ((bfd *abfd, bfd_format format)); + +boolean +bfd_check_format_matches PARAMS ((bfd *abfd, bfd_format format, char ***matching)); + +boolean +bfd_set_format PARAMS ((bfd *abfd, bfd_format format)); + +CONST char * +bfd_format_string PARAMS ((bfd_format format)); + +#ifdef __cplusplus +} +#endif +#endif diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/i386-dis.c linux-2.4.17-rc2-wli2/arch/i386/kdb/i386-dis.c --- linux-2.4.17-rc2-virgin/arch/i386/kdb/i386-dis.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/i386-dis.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,3781 @@ +/* Print i386 instructions for GDB, the GNU debugger. + Copyright (C) 1988, 89, 91, 93, 94, 95, 96, 97, 98, 1999 + Free Software Foundation, Inc. + +This file is part of GDB. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* + * 80386 instruction printer by Pace Willisson (pace@prep.ai.mit.edu) + * July 1988 + * modified by John Hassey (hassey@dg-rtp.dg.com) + */ + +/* Extracted from cygnus CVS and modified for kdb use. + * Keith Owens 30 Oct 2000 + */ + +/* + * The main tables describing the instructions is essentially a copy + * of the "Opcode Map" chapter (Appendix A) of the Intel 80386 + * Programmers Manual. Usually, there is a capital letter, followed + * by a small letter. The capital letter tell the addressing mode, + * and the small letter tells about the operand size. Refer to + * the Intel manual for details. + */ + +#ifdef __KERNEL__ +#include +#include +#include +#include +#else +#include "dis-asm.h" +#include "sysdep.h" +#include "opintl.h" +#endif + +#define MAXLEN 20 + +#ifndef __KERNEL__ +#include +#endif + +#ifndef UNIXWARE_COMPAT +/* Set non-zero for broken, compatible instructions. Set to zero for + non-broken opcodes. */ +#define UNIXWARE_COMPAT 1 +#endif + +static int fetch_data PARAMS ((struct disassemble_info *, bfd_byte *)); + +struct dis_private +{ + /* Points to first byte not fetched. */ + bfd_byte *max_fetched; + bfd_byte the_buffer[MAXLEN]; + bfd_vma insn_start; +#ifndef __KERNEL__ + jmp_buf bailout; +#endif +}; + +/* The opcode for the fwait instruction, which we treat as a prefix + when we can. */ +#define FWAIT_OPCODE (0x9b) + +/* Flags for the prefixes for the current instruction. See below. */ +static int prefixes; + +/* Flags for prefixes which we somehow handled when printing the + current instruction. */ +static int used_prefixes; + +/* Flags stored in PREFIXES. */ +#define PREFIX_REPZ 1 +#define PREFIX_REPNZ 2 +#define PREFIX_LOCK 4 +#define PREFIX_CS 8 +#define PREFIX_SS 0x10 +#define PREFIX_DS 0x20 +#define PREFIX_ES 0x40 +#define PREFIX_FS 0x80 +#define PREFIX_GS 0x100 +#define PREFIX_DATA 0x200 +#define PREFIX_ADDR 0x400 +#define PREFIX_FWAIT 0x800 + +/* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive) + to ADDR (exclusive) are valid. Returns 1 for success, longjmps + on error. */ +#define FETCH_DATA(info, addr) \ + ((addr) <= ((struct dis_private *)(info->private_data))->max_fetched \ + ? 1 : fetch_data ((info), (addr))) + +static int +fetch_data (info, addr) + struct disassemble_info *info; + bfd_byte *addr; +{ + int status; + struct dis_private *priv = (struct dis_private *)info->private_data; + bfd_vma start = priv->insn_start + (priv->max_fetched - priv->the_buffer); + + status = (*info->read_memory_func) (start, + priv->max_fetched, + addr - priv->max_fetched, + info); + if (status != 0) + { + /* If we did manage to read at least one byte, then + print_insn_i386 will do something sensible. Otherwise, print + an error. We do that here because this is where we know + STATUS. */ + if (priv->max_fetched == priv->the_buffer) + (*info->memory_error_func) (status, start, info); +#ifndef __KERNEL__ + longjmp (priv->bailout, 1); +#else + /* XXX - what to do? */ + kdb_printf("Hmm. longjmp.\n"); +#endif + } + else + priv->max_fetched = addr; + return 1; +} + +#define XX NULL, 0 + +#define Eb OP_E, b_mode +#define indirEb OP_indirE, b_mode +#define Gb OP_G, b_mode +#define Ev OP_E, v_mode +#define Ed OP_E, d_mode +#define indirEv OP_indirE, v_mode +#define Ew OP_E, w_mode +#define Ma OP_E, v_mode +#define M OP_E, 0 /* lea */ +#define Mp OP_E, 0 /* 32 or 48 bit memory operand for LDS, LES etc */ +#define Gv OP_G, v_mode +#define Gw OP_G, w_mode +#define Rd OP_Rd, d_mode +#define Ib OP_I, b_mode +#define sIb OP_sI, b_mode /* sign extened byte */ +#define Iv OP_I, v_mode +#define Iw OP_I, w_mode +#define Jb OP_J, b_mode +#define Jv OP_J, v_mode +#define Cd OP_C, d_mode +#define Dd OP_D, d_mode +#define Td OP_T, d_mode + +#define eAX OP_REG, eAX_reg +#define eBX OP_REG, eBX_reg +#define eCX OP_REG, eCX_reg +#define eDX OP_REG, eDX_reg +#define eSP OP_REG, eSP_reg +#define eBP OP_REG, eBP_reg +#define eSI OP_REG, eSI_reg +#define eDI OP_REG, eDI_reg +#define AL OP_REG, al_reg +#define CL OP_REG, cl_reg +#define DL OP_REG, dl_reg +#define BL OP_REG, bl_reg +#define AH OP_REG, ah_reg +#define CH OP_REG, ch_reg +#define DH OP_REG, dh_reg +#define BH OP_REG, bh_reg +#define AX OP_REG, ax_reg +#define DX OP_REG, dx_reg +#define indirDX OP_REG, indir_dx_reg + +#define Sw OP_SEG, w_mode +#define Ap OP_DIR, 0 +#define Ob OP_OFF, b_mode +#define Ov OP_OFF, v_mode +#define Xb OP_DSreg, eSI_reg +#define Xv OP_DSreg, eSI_reg +#define Yb OP_ESreg, eDI_reg +#define Yv OP_ESreg, eDI_reg +#define DSBX OP_DSreg, eBX_reg + +#define es OP_REG, es_reg +#define ss OP_REG, ss_reg +#define cs OP_REG, cs_reg +#define ds OP_REG, ds_reg +#define fs OP_REG, fs_reg +#define gs OP_REG, gs_reg + +#define MX OP_MMX, 0 +#define XM OP_XMM, 0 +#define EM OP_EM, v_mode +#define EX OP_EX, v_mode +#define MS OP_MS, v_mode +#define None OP_E, 0 +#define OPSUF OP_3DNowSuffix, 0 +#define OPSIMD OP_SIMD_Suffix, 0 + +/* bits in sizeflag */ +#if 0 /* leave undefined until someone adds the extra flag to objdump */ +#define SUFFIX_ALWAYS 4 +#endif +#define AFLAG 2 +#define DFLAG 1 + +typedef void (*op_rtn) PARAMS ((int bytemode, int sizeflag)); + +static void OP_E PARAMS ((int, int)); +static void OP_G PARAMS ((int, int)); +static void OP_I PARAMS ((int, int)); +static void OP_indirE PARAMS ((int, int)); +static void OP_sI PARAMS ((int, int)); +static void OP_REG PARAMS ((int, int)); +static void OP_J PARAMS ((int, int)); +static void OP_DIR PARAMS ((int, int)); +static void OP_OFF PARAMS ((int, int)); +static void OP_ESreg PARAMS ((int, int)); +static void OP_DSreg PARAMS ((int, int)); +static void OP_SEG PARAMS ((int, int)); +static void OP_C PARAMS ((int, int)); +static void OP_D PARAMS ((int, int)); +static void OP_T PARAMS ((int, int)); +static void OP_Rd PARAMS ((int, int)); +static void OP_ST PARAMS ((int, int)); +static void OP_STi PARAMS ((int, int)); +static void OP_MMX PARAMS ((int, int)); +static void OP_XMM PARAMS ((int, int)); +static void OP_EM PARAMS ((int, int)); +static void OP_EX PARAMS ((int, int)); +static void OP_MS PARAMS ((int, int)); +static void OP_3DNowSuffix PARAMS ((int, int)); +static void OP_SIMD_Suffix PARAMS ((int, int)); +static void SIMD_Fixup PARAMS ((int, int)); + +static void append_seg PARAMS ((void)); +static void set_op PARAMS ((unsigned int op)); +static void putop PARAMS ((const char *template, int sizeflag)); +static void dofloat PARAMS ((int sizeflag)); +static int get16 PARAMS ((void)); +static int get32 PARAMS ((void)); +static void ckprefix PARAMS ((void)); +static const char *prefix_name PARAMS ((int, int)); +static void ptr_reg PARAMS ((int, int)); +static void BadOp PARAMS ((void)); + +#define b_mode 1 +#define v_mode 2 +#define w_mode 3 +#define d_mode 4 +#define x_mode 5 + +#define es_reg 100 +#define cs_reg 101 +#define ss_reg 102 +#define ds_reg 103 +#define fs_reg 104 +#define gs_reg 105 + +#define eAX_reg 108 +#define eCX_reg 109 +#define eDX_reg 110 +#define eBX_reg 111 +#define eSP_reg 112 +#define eBP_reg 113 +#define eSI_reg 114 +#define eDI_reg 115 + +#define al_reg 116 +#define cl_reg 117 +#define dl_reg 118 +#define bl_reg 119 +#define ah_reg 120 +#define ch_reg 121 +#define dh_reg 122 +#define bh_reg 123 + +#define ax_reg 124 +#define cx_reg 125 +#define dx_reg 126 +#define bx_reg 127 +#define sp_reg 128 +#define bp_reg 129 +#define si_reg 130 +#define di_reg 131 + +#define indir_dx_reg 150 + +#define USE_GROUPS 1 +#define USE_PREFIX_USER_TABLE 2 + +#define GRP1b NULL, NULL, 0, NULL, USE_GROUPS, NULL, 0 +#define GRP1S NULL, NULL, 1, NULL, USE_GROUPS, NULL, 0 +#define GRP1Ss NULL, NULL, 2, NULL, USE_GROUPS, NULL, 0 +#define GRP2b NULL, NULL, 3, NULL, USE_GROUPS, NULL, 0 +#define GRP2S NULL, NULL, 4, NULL, USE_GROUPS, NULL, 0 +#define GRP2b_one NULL, NULL, 5, NULL, USE_GROUPS, NULL, 0 +#define GRP2S_one NULL, NULL, 6, NULL, USE_GROUPS, NULL, 0 +#define GRP2b_cl NULL, NULL, 7, NULL, USE_GROUPS, NULL, 0 +#define GRP2S_cl NULL, NULL, 8, NULL, USE_GROUPS, NULL, 0 +#define GRP3b NULL, NULL, 9, NULL, USE_GROUPS, NULL, 0 +#define GRP3S NULL, NULL, 10, NULL, USE_GROUPS, NULL, 0 +#define GRP4 NULL, NULL, 11, NULL, USE_GROUPS, NULL, 0 +#define GRP5 NULL, NULL, 12, NULL, USE_GROUPS, NULL, 0 +#define GRP6 NULL, NULL, 13, NULL, USE_GROUPS, NULL, 0 +#define GRP7 NULL, NULL, 14, NULL, USE_GROUPS, NULL, 0 +#define GRP8 NULL, NULL, 15, NULL, USE_GROUPS, NULL, 0 +#define GRP9 NULL, NULL, 16, NULL, USE_GROUPS, NULL, 0 +#define GRP10 NULL, NULL, 17, NULL, USE_GROUPS, NULL, 0 +#define GRP11 NULL, NULL, 18, NULL, USE_GROUPS, NULL, 0 +#define GRP12 NULL, NULL, 19, NULL, USE_GROUPS, NULL, 0 +#define GRP13 NULL, NULL, 20, NULL, USE_GROUPS, NULL, 0 +#define GRP14 NULL, NULL, 21, NULL, USE_GROUPS, NULL, 0 +#define GRPAMD NULL, NULL, 22, NULL, USE_GROUPS, NULL, 0 + +#define PREGRP0 NULL, NULL, 0, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP1 NULL, NULL, 1, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP2 NULL, NULL, 2, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP3 NULL, NULL, 3, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP4 NULL, NULL, 4, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP5 NULL, NULL, 5, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP6 NULL, NULL, 6, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP7 NULL, NULL, 7, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP8 NULL, NULL, 8, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP9 NULL, NULL, 9, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP10 NULL, NULL, 10, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP11 NULL, NULL, 11, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP12 NULL, NULL, 12, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP13 NULL, NULL, 13, NULL, USE_PREFIX_USER_TABLE, NULL, 0 +#define PREGRP14 NULL, NULL, 14, NULL, USE_PREFIX_USER_TABLE, NULL, 0 + +#define FLOATCODE 50 +#define FLOAT NULL, NULL, FLOATCODE, NULL, 0, NULL, 0 + +struct dis386 { + const char *name; + op_rtn op1; + int bytemode1; + op_rtn op2; + int bytemode2; + op_rtn op3; + int bytemode3; +}; + +/* Upper case letters in the instruction names here are macros. + 'A' => print 'b' if no register operands or suffix_always is true + 'B' => print 'b' if suffix_always is true + 'E' => print 'e' if 32-bit form of jcxz + 'L' => print 'l' if suffix_always is true + 'N' => print 'n' if instruction has no wait "prefix" + 'P' => print 'w' or 'l' if instruction has an operand size prefix, + or suffix_always is true + 'Q' => print 'w' or 'l' if no register operands or suffix_always is true + 'R' => print 'w' or 'l' ("wd" or "dq" in intel mode) + 'S' => print 'w' or 'l' if suffix_always is true + 'W' => print 'b' or 'w' ("w" or "de" in intel mode) +*/ + +static const struct dis386 dis386_att[] = { + /* 00 */ + { "addB", Eb, Gb, XX }, + { "addS", Ev, Gv, XX }, + { "addB", Gb, Eb, XX }, + { "addS", Gv, Ev, XX }, + { "addB", AL, Ib, XX }, + { "addS", eAX, Iv, XX }, + { "pushP", es, XX, XX }, + { "popP", es, XX, XX }, + /* 08 */ + { "orB", Eb, Gb, XX }, + { "orS", Ev, Gv, XX }, + { "orB", Gb, Eb, XX }, + { "orS", Gv, Ev, XX }, + { "orB", AL, Ib, XX }, + { "orS", eAX, Iv, XX }, + { "pushP", cs, XX, XX }, + { "(bad)", XX, XX, XX }, /* 0x0f extended opcode escape */ + /* 10 */ + { "adcB", Eb, Gb, XX }, + { "adcS", Ev, Gv, XX }, + { "adcB", Gb, Eb, XX }, + { "adcS", Gv, Ev, XX }, + { "adcB", AL, Ib, XX }, + { "adcS", eAX, Iv, XX }, + { "pushP", ss, XX, XX }, + { "popP", ss, XX, XX }, + /* 18 */ + { "sbbB", Eb, Gb, XX }, + { "sbbS", Ev, Gv, XX }, + { "sbbB", Gb, Eb, XX }, + { "sbbS", Gv, Ev, XX }, + { "sbbB", AL, Ib, XX }, + { "sbbS", eAX, Iv, XX }, + { "pushP", ds, XX, XX }, + { "popP", ds, XX, XX }, + /* 20 */ + { "andB", Eb, Gb, XX }, + { "andS", Ev, Gv, XX }, + { "andB", Gb, Eb, XX }, + { "andS", Gv, Ev, XX }, + { "andB", AL, Ib, XX }, + { "andS", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG ES prefix */ + { "daa", XX, XX, XX }, + /* 28 */ + { "subB", Eb, Gb, XX }, + { "subS", Ev, Gv, XX }, + { "subB", Gb, Eb, XX }, + { "subS", Gv, Ev, XX }, + { "subB", AL, Ib, XX }, + { "subS", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG CS prefix */ + { "das", XX, XX, XX }, + /* 30 */ + { "xorB", Eb, Gb, XX }, + { "xorS", Ev, Gv, XX }, + { "xorB", Gb, Eb, XX }, + { "xorS", Gv, Ev, XX }, + { "xorB", AL, Ib, XX }, + { "xorS", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG SS prefix */ + { "aaa", XX, XX, XX }, + /* 38 */ + { "cmpB", Eb, Gb, XX }, + { "cmpS", Ev, Gv, XX }, + { "cmpB", Gb, Eb, XX }, + { "cmpS", Gv, Ev, XX }, + { "cmpB", AL, Ib, XX }, + { "cmpS", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG DS prefix */ + { "aas", XX, XX, XX }, + /* 40 */ + { "incS", eAX, XX, XX }, + { "incS", eCX, XX, XX }, + { "incS", eDX, XX, XX }, + { "incS", eBX, XX, XX }, + { "incS", eSP, XX, XX }, + { "incS", eBP, XX, XX }, + { "incS", eSI, XX, XX }, + { "incS", eDI, XX, XX }, + /* 48 */ + { "decS", eAX, XX, XX }, + { "decS", eCX, XX, XX }, + { "decS", eDX, XX, XX }, + { "decS", eBX, XX, XX }, + { "decS", eSP, XX, XX }, + { "decS", eBP, XX, XX }, + { "decS", eSI, XX, XX }, + { "decS", eDI, XX, XX }, + /* 50 */ + { "pushS", eAX, XX, XX }, + { "pushS", eCX, XX, XX }, + { "pushS", eDX, XX, XX }, + { "pushS", eBX, XX, XX }, + { "pushS", eSP, XX, XX }, + { "pushS", eBP, XX, XX }, + { "pushS", eSI, XX, XX }, + { "pushS", eDI, XX, XX }, + /* 58 */ + { "popS", eAX, XX, XX }, + { "popS", eCX, XX, XX }, + { "popS", eDX, XX, XX }, + { "popS", eBX, XX, XX }, + { "popS", eSP, XX, XX }, + { "popS", eBP, XX, XX }, + { "popS", eSI, XX, XX }, + { "popS", eDI, XX, XX }, + /* 60 */ + { "pushaP", XX, XX, XX }, + { "popaP", XX, XX, XX }, + { "boundS", Gv, Ma, XX }, + { "arpl", Ew, Gw, XX }, + { "(bad)", XX, XX, XX }, /* seg fs */ + { "(bad)", XX, XX, XX }, /* seg gs */ + { "(bad)", XX, XX, XX }, /* op size prefix */ + { "(bad)", XX, XX, XX }, /* adr size prefix */ + /* 68 */ + { "pushP", Iv, XX, XX }, /* 386 book wrong */ + { "imulS", Gv, Ev, Iv }, + { "pushP", sIb, XX, XX }, /* push of byte really pushes 2 or 4 bytes */ + { "imulS", Gv, Ev, sIb }, + { "insb", Yb, indirDX, XX }, + { "insR", Yv, indirDX, XX }, + { "outsb", indirDX, Xb, XX }, + { "outsR", indirDX, Xv, XX }, + /* 70 */ + { "jo", Jb, XX, XX }, + { "jno", Jb, XX, XX }, + { "jb", Jb, XX, XX }, + { "jae", Jb, XX, XX }, + { "je", Jb, XX, XX }, + { "jne", Jb, XX, XX }, + { "jbe", Jb, XX, XX }, + { "ja", Jb, XX, XX }, + /* 78 */ + { "js", Jb, XX, XX }, + { "jns", Jb, XX, XX }, + { "jp", Jb, XX, XX }, + { "jnp", Jb, XX, XX }, + { "jl", Jb, XX, XX }, + { "jge", Jb, XX, XX }, + { "jle", Jb, XX, XX }, + { "jg", Jb, XX, XX }, + /* 80 */ + { GRP1b }, + { GRP1S }, + { "(bad)", XX, XX, XX }, + { GRP1Ss }, + { "testB", Eb, Gb, XX }, + { "testS", Ev, Gv, XX }, + { "xchgB", Eb, Gb, XX }, + { "xchgS", Ev, Gv, XX }, + /* 88 */ + { "movB", Eb, Gb, XX }, + { "movS", Ev, Gv, XX }, + { "movB", Gb, Eb, XX }, + { "movS", Gv, Ev, XX }, + { "movQ", Ev, Sw, XX }, + { "leaS", Gv, M, XX }, + { "movQ", Sw, Ev, XX }, + { "popQ", Ev, XX, XX }, + /* 90 */ + { "nop", XX, XX, XX }, + { "xchgS", eCX, eAX, XX }, + { "xchgS", eDX, eAX, XX }, + { "xchgS", eBX, eAX, XX }, + { "xchgS", eSP, eAX, XX }, + { "xchgS", eBP, eAX, XX }, + { "xchgS", eSI, eAX, XX }, + { "xchgS", eDI, eAX, XX }, + /* 98 */ + { "cWtR", XX, XX, XX }, + { "cRtd", XX, XX, XX }, + { "lcallP", Ap, XX, XX }, + { "(bad)", XX, XX, XX }, /* fwait */ + { "pushfP", XX, XX, XX }, + { "popfP", XX, XX, XX }, + { "sahf", XX, XX, XX }, + { "lahf", XX, XX, XX }, + /* a0 */ + { "movB", AL, Ob, XX }, + { "movS", eAX, Ov, XX }, + { "movB", Ob, AL, XX }, + { "movS", Ov, eAX, XX }, + { "movsb", Yb, Xb, XX }, + { "movsR", Yv, Xv, XX }, + { "cmpsb", Xb, Yb, XX }, + { "cmpsR", Xv, Yv, XX }, + /* a8 */ + { "testB", AL, Ib, XX }, + { "testS", eAX, Iv, XX }, + { "stosB", Yb, AL, XX }, + { "stosS", Yv, eAX, XX }, + { "lodsB", AL, Xb, XX }, + { "lodsS", eAX, Xv, XX }, + { "scasB", AL, Yb, XX }, + { "scasS", eAX, Yv, XX }, + /* b0 */ + { "movB", AL, Ib, XX }, + { "movB", CL, Ib, XX }, + { "movB", DL, Ib, XX }, + { "movB", BL, Ib, XX }, + { "movB", AH, Ib, XX }, + { "movB", CH, Ib, XX }, + { "movB", DH, Ib, XX }, + { "movB", BH, Ib, XX }, + /* b8 */ + { "movS", eAX, Iv, XX }, + { "movS", eCX, Iv, XX }, + { "movS", eDX, Iv, XX }, + { "movS", eBX, Iv, XX }, + { "movS", eSP, Iv, XX }, + { "movS", eBP, Iv, XX }, + { "movS", eSI, Iv, XX }, + { "movS", eDI, Iv, XX }, + /* c0 */ + { GRP2b }, + { GRP2S }, + { "retP", Iw, XX, XX }, + { "retP", XX, XX, XX }, + { "lesS", Gv, Mp, XX }, + { "ldsS", Gv, Mp, XX }, + { "movA", Eb, Ib, XX }, + { "movQ", Ev, Iv, XX }, + /* c8 */ + { "enterP", Iw, Ib, XX }, + { "leaveP", XX, XX, XX }, + { "lretP", Iw, XX, XX }, + { "lretP", XX, XX, XX }, + { "int3", XX, XX, XX }, + { "int", Ib, XX, XX }, + { "into", XX, XX, XX}, + { "iretP", XX, XX, XX }, + /* d0 */ + { GRP2b_one }, + { GRP2S_one }, + { GRP2b_cl }, + { GRP2S_cl }, + { "aam", sIb, XX, XX }, + { "aad", sIb, XX, XX }, + { "(bad)", XX, XX, XX }, + { "xlat", DSBX, XX, XX }, + /* d8 */ + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + /* e0 */ + { "loopne", Jb, XX, XX }, + { "loope", Jb, XX, XX }, + { "loop", Jb, XX, XX }, + { "jEcxz", Jb, XX, XX }, + { "inB", AL, Ib, XX }, + { "inS", eAX, Ib, XX }, + { "outB", Ib, AL, XX }, + { "outS", Ib, eAX, XX }, + /* e8 */ + { "callP", Jv, XX, XX }, + { "jmpP", Jv, XX, XX }, + { "ljmpP", Ap, XX, XX }, + { "jmp", Jb, XX, XX }, + { "inB", AL, indirDX, XX }, + { "inS", eAX, indirDX, XX }, + { "outB", indirDX, AL, XX }, + { "outS", indirDX, eAX, XX }, + /* f0 */ + { "(bad)", XX, XX, XX }, /* lock prefix */ + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, /* repne */ + { "(bad)", XX, XX, XX }, /* repz */ + { "hlt", XX, XX, XX }, + { "cmc", XX, XX, XX }, + { GRP3b }, + { GRP3S }, + /* f8 */ + { "clc", XX, XX, XX }, + { "stc", XX, XX, XX }, + { "cli", XX, XX, XX }, + { "sti", XX, XX, XX }, + { "cld", XX, XX, XX }, + { "std", XX, XX, XX }, + { GRP4 }, + { GRP5 }, +}; + +static const struct dis386 dis386_intel[] = { + /* 00 */ + { "add", Eb, Gb, XX }, + { "add", Ev, Gv, XX }, + { "add", Gb, Eb, XX }, + { "add", Gv, Ev, XX }, + { "add", AL, Ib, XX }, + { "add", eAX, Iv, XX }, + { "push", es, XX, XX }, + { "pop", es, XX, XX }, + /* 08 */ + { "or", Eb, Gb, XX }, + { "or", Ev, Gv, XX }, + { "or", Gb, Eb, XX }, + { "or", Gv, Ev, XX }, + { "or", AL, Ib, XX }, + { "or", eAX, Iv, XX }, + { "push", cs, XX, XX }, + { "(bad)", XX, XX, XX }, /* 0x0f extended opcode escape */ + /* 10 */ + { "adc", Eb, Gb, XX }, + { "adc", Ev, Gv, XX }, + { "adc", Gb, Eb, XX }, + { "adc", Gv, Ev, XX }, + { "adc", AL, Ib, XX }, + { "adc", eAX, Iv, XX }, + { "push", ss, XX, XX }, + { "pop", ss, XX, XX }, + /* 18 */ + { "sbb", Eb, Gb, XX }, + { "sbb", Ev, Gv, XX }, + { "sbb", Gb, Eb, XX }, + { "sbb", Gv, Ev, XX }, + { "sbb", AL, Ib, XX }, + { "sbb", eAX, Iv, XX }, + { "push", ds, XX, XX }, + { "pop", ds, XX, XX }, + /* 20 */ + { "and", Eb, Gb, XX }, + { "and", Ev, Gv, XX }, + { "and", Gb, Eb, XX }, + { "and", Gv, Ev, XX }, + { "and", AL, Ib, XX }, + { "and", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG ES prefix */ + { "daa", XX, XX, XX }, + /* 28 */ + { "sub", Eb, Gb, XX }, + { "sub", Ev, Gv, XX }, + { "sub", Gb, Eb, XX }, + { "sub", Gv, Ev, XX }, + { "sub", AL, Ib, XX }, + { "sub", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG CS prefix */ + { "das", XX, XX, XX }, + /* 30 */ + { "xor", Eb, Gb, XX }, + { "xor", Ev, Gv, XX }, + { "xor", Gb, Eb, XX }, + { "xor", Gv, Ev, XX }, + { "xor", AL, Ib, XX }, + { "xor", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG SS prefix */ + { "aaa", XX, XX, XX }, + /* 38 */ + { "cmp", Eb, Gb, XX }, + { "cmp", Ev, Gv, XX }, + { "cmp", Gb, Eb, XX }, + { "cmp", Gv, Ev, XX }, + { "cmp", AL, Ib, XX }, + { "cmp", eAX, Iv, XX }, + { "(bad)", XX, XX, XX }, /* SEG DS prefix */ + { "aas", XX, XX, XX }, + /* 40 */ + { "inc", eAX, XX, XX }, + { "inc", eCX, XX, XX }, + { "inc", eDX, XX, XX }, + { "inc", eBX, XX, XX }, + { "inc", eSP, XX, XX }, + { "inc", eBP, XX, XX }, + { "inc", eSI, XX, XX }, + { "inc", eDI, XX, XX }, + /* 48 */ + { "dec", eAX, XX, XX }, + { "dec", eCX, XX, XX }, + { "dec", eDX, XX, XX }, + { "dec", eBX, XX, XX }, + { "dec", eSP, XX, XX }, + { "dec", eBP, XX, XX }, + { "dec", eSI, XX, XX }, + { "dec", eDI, XX, XX }, + /* 50 */ + { "push", eAX, XX, XX }, + { "push", eCX, XX, XX }, + { "push", eDX, XX, XX }, + { "push", eBX, XX, XX }, + { "push", eSP, XX, XX }, + { "push", eBP, XX, XX }, + { "push", eSI, XX, XX }, + { "push", eDI, XX, XX }, + /* 58 */ + { "pop", eAX, XX, XX }, + { "pop", eCX, XX, XX }, + { "pop", eDX, XX, XX }, + { "pop", eBX, XX, XX }, + { "pop", eSP, XX, XX }, + { "pop", eBP, XX, XX }, + { "pop", eSI, XX, XX }, + { "pop", eDI, XX, XX }, + /* 60 */ + { "pusha", XX, XX, XX }, + { "popa", XX, XX, XX }, + { "bound", Gv, Ma, XX }, + { "arpl", Ew, Gw, XX }, + { "(bad)", XX, XX, XX }, /* seg fs */ + { "(bad)", XX, XX, XX }, /* seg gs */ + { "(bad)", XX, XX, XX }, /* op size prefix */ + { "(bad)", XX, XX, XX }, /* adr size prefix */ + /* 68 */ + { "push", Iv, XX, XX }, /* 386 book wrong */ + { "imul", Gv, Ev, Iv }, + { "push", sIb, XX, XX }, /* push of byte really pushes 2 or 4 bytes */ + { "imul", Gv, Ev, sIb }, + { "ins", Yb, indirDX, XX }, + { "ins", Yv, indirDX, XX }, + { "outs", indirDX, Xb, XX }, + { "outs", indirDX, Xv, XX }, + /* 70 */ + { "jo", Jb, XX, XX }, + { "jno", Jb, XX, XX }, + { "jb", Jb, XX, XX }, + { "jae", Jb, XX, XX }, + { "je", Jb, XX, XX }, + { "jne", Jb, XX, XX }, + { "jbe", Jb, XX, XX }, + { "ja", Jb, XX, XX }, + /* 78 */ + { "js", Jb, XX, XX }, + { "jns", Jb, XX, XX }, + { "jp", Jb, XX, XX }, + { "jnp", Jb, XX, XX }, + { "jl", Jb, XX, XX }, + { "jge", Jb, XX, XX }, + { "jle", Jb, XX, XX }, + { "jg", Jb, XX, XX }, + /* 80 */ + { GRP1b }, + { GRP1S }, + { "(bad)", XX, XX, XX }, + { GRP1Ss }, + { "test", Eb, Gb, XX }, + { "test", Ev, Gv, XX }, + { "xchg", Eb, Gb, XX }, + { "xchg", Ev, Gv, XX }, + /* 88 */ + { "mov", Eb, Gb, XX }, + { "mov", Ev, Gv, XX }, + { "mov", Gb, Eb, XX }, + { "mov", Gv, Ev, XX }, + { "mov", Ev, Sw, XX }, + { "lea", Gv, M, XX }, + { "mov", Sw, Ev, XX }, + { "pop", Ev, XX, XX }, + /* 90 */ + { "nop", XX, XX, XX }, + { "xchg", eCX, eAX, XX }, + { "xchg", eDX, eAX, XX }, + { "xchg", eBX, eAX, XX }, + { "xchg", eSP, eAX, XX }, + { "xchg", eBP, eAX, XX }, + { "xchg", eSI, eAX, XX }, + { "xchg", eDI, eAX, XX }, + /* 98 */ + { "cW", XX, XX, XX }, /* cwde and cbw */ + { "cR", XX, XX, XX }, /* cdq and cwd */ + { "lcall", Ap, XX, XX }, + { "(bad)", XX, XX, XX }, /* fwait */ + { "pushf", XX, XX, XX }, + { "popf", XX, XX, XX }, + { "sahf", XX, XX, XX }, + { "lahf", XX, XX, XX }, + /* a0 */ + { "mov", AL, Ob, XX }, + { "mov", eAX, Ov, XX }, + { "mov", Ob, AL, XX }, + { "mov", Ov, eAX, XX }, + { "movs", Yb, Xb, XX }, + { "movs", Yv, Xv, XX }, + { "cmps", Xb, Yb, XX }, + { "cmps", Xv, Yv, XX }, + /* a8 */ + { "test", AL, Ib, XX }, + { "test", eAX, Iv, XX }, + { "stos", Yb, AL, XX }, + { "stos", Yv, eAX, XX }, + { "lods", AL, Xb, XX }, + { "lods", eAX, Xv, XX }, + { "scas", AL, Yb, XX }, + { "scas", eAX, Yv, XX }, + /* b0 */ + { "mov", AL, Ib, XX }, + { "mov", CL, Ib, XX }, + { "mov", DL, Ib, XX }, + { "mov", BL, Ib, XX }, + { "mov", AH, Ib, XX }, + { "mov", CH, Ib, XX }, + { "mov", DH, Ib, XX }, + { "mov", BH, Ib, XX }, + /* b8 */ + { "mov", eAX, Iv, XX }, + { "mov", eCX, Iv, XX }, + { "mov", eDX, Iv, XX }, + { "mov", eBX, Iv, XX }, + { "mov", eSP, Iv, XX }, + { "mov", eBP, Iv, XX }, + { "mov", eSI, Iv, XX }, + { "mov", eDI, Iv, XX }, + /* c0 */ + { GRP2b }, + { GRP2S }, + { "ret", Iw, XX, XX }, + { "ret", XX, XX, XX }, + { "les", Gv, Mp, XX }, + { "lds", Gv, Mp, XX }, + { "mov", Eb, Ib, XX }, + { "mov", Ev, Iv, XX }, + /* c8 */ + { "enter", Iw, Ib, XX }, + { "leave", XX, XX, XX }, + { "lret", Iw, XX, XX }, + { "lret", XX, XX, XX }, + { "int3", XX, XX, XX }, + { "int", Ib, XX, XX }, + { "into", XX, XX, XX }, + { "iret", XX, XX, XX }, + /* d0 */ + { GRP2b_one }, + { GRP2S_one }, + { GRP2b_cl }, + { GRP2S_cl }, + { "aam", sIb, XX, XX }, + { "aad", sIb, XX, XX }, + { "(bad)", XX, XX, XX }, + { "xlat", DSBX, XX, XX }, + /* d8 */ + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + { FLOAT }, + /* e0 */ + { "loopne", Jb, XX, XX }, + { "loope", Jb, XX, XX }, + { "loop", Jb, XX, XX }, + { "jEcxz", Jb, XX, XX }, + { "in", AL, Ib, XX }, + { "in", eAX, Ib, XX }, + { "out", Ib, AL, XX }, + { "out", Ib, eAX, XX }, + /* e8 */ + { "call", Jv, XX, XX }, + { "jmp", Jv, XX, XX }, + { "ljmp", Ap, XX, XX }, + { "jmp", Jb, XX, XX }, + { "in", AL, indirDX, XX }, + { "in", eAX, indirDX, XX }, + { "out", indirDX, AL, XX }, + { "out", indirDX, eAX, XX }, + /* f0 */ + { "(bad)", XX, XX, XX }, /* lock prefix */ + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, /* repne */ + { "(bad)", XX, XX, XX }, /* repz */ + { "hlt", XX, XX, XX }, + { "cmc", XX, XX, XX }, + { GRP3b }, + { GRP3S }, + /* f8 */ + { "clc", XX, XX, XX }, + { "stc", XX, XX, XX }, + { "cli", XX, XX, XX }, + { "sti", XX, XX, XX }, + { "cld", XX, XX, XX }, + { "std", XX, XX, XX }, + { GRP4 }, + { GRP5 }, +}; + +static const struct dis386 dis386_twobyte_att[] = { + /* 00 */ + { GRP6 }, + { GRP7 }, + { "larS", Gv, Ew, XX }, + { "lslS", Gv, Ew, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "clts", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 08 */ + { "invd", XX, XX, XX }, + { "wbinvd", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "ud2a", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { GRPAMD }, + { "femms", XX, XX, XX }, + { "", MX, EM, OPSUF }, /* See OP_3DNowSuffix */ + /* 10 */ + { PREGRP8 }, + { PREGRP9 }, + { "movlps", XM, EX, SIMD_Fixup, 'h' }, /* really only 2 operands */ + { "movlps", EX, XM, SIMD_Fixup, 'h' }, + { "unpcklps", XM, EX, XX }, + { "unpckhps", XM, EX, XX }, + { "movhps", XM, EX, SIMD_Fixup, 'l' }, + { "movhps", EX, XM, SIMD_Fixup, 'l' }, + /* 18 */ + { GRP14 }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 20 */ + /* these are all backward in appendix A of the intel book */ + { "movL", Rd, Cd, XX }, + { "movL", Rd, Dd, XX }, + { "movL", Cd, Rd, XX }, + { "movL", Dd, Rd, XX }, + { "movL", Rd, Td, XX }, + { "(bad)", XX, XX, XX }, + { "movL", Td, Rd, XX }, + { "(bad)", XX, XX, XX }, + /* 28 */ + { "movaps", XM, EX, XX }, + { "movaps", EX, XM, XX }, + { PREGRP2 }, + { "movntps", Ev, XM, XX }, + { PREGRP4 }, + { PREGRP3 }, + { "ucomiss", XM, EX, XX }, + { "comiss", XM, EX, XX }, + /* 30 */ + { "wrmsr", XX, XX, XX }, + { "rdtsc", XX, XX, XX }, + { "rdmsr", XX, XX, XX }, + { "rdpmc", XX, XX, XX }, + { "sysenter", XX, XX, XX }, + { "sysexit", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 38 */ + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 40 */ + { "cmovo", Gv, Ev, XX }, + { "cmovno", Gv, Ev, XX }, + { "cmovb", Gv, Ev, XX }, + { "cmovae", Gv, Ev, XX }, + { "cmove", Gv, Ev, XX }, + { "cmovne", Gv, Ev, XX }, + { "cmovbe", Gv, Ev, XX }, + { "cmova", Gv, Ev, XX }, + /* 48 */ + { "cmovs", Gv, Ev, XX }, + { "cmovns", Gv, Ev, XX }, + { "cmovp", Gv, Ev, XX }, + { "cmovnp", Gv, Ev, XX }, + { "cmovl", Gv, Ev, XX }, + { "cmovge", Gv, Ev, XX }, + { "cmovle", Gv, Ev, XX }, + { "cmovg", Gv, Ev, XX }, + /* 50 */ + { "movmskps", Gv, EX, XX }, + { PREGRP13 }, + { PREGRP12 }, + { PREGRP11 }, + { "andps", XM, EX, XX }, + { "andnps", XM, EX, XX }, + { "orps", XM, EX, XX }, + { "xorps", XM, EX, XX }, + /* 58 */ + { PREGRP0 }, + { PREGRP10 }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { PREGRP14 }, + { PREGRP7 }, + { PREGRP5 }, + { PREGRP6 }, + /* 60 */ + { "punpcklbw", MX, EM, XX }, + { "punpcklwd", MX, EM, XX }, + { "punpckldq", MX, EM, XX }, + { "packsswb", MX, EM, XX }, + { "pcmpgtb", MX, EM, XX }, + { "pcmpgtw", MX, EM, XX }, + { "pcmpgtd", MX, EM, XX }, + { "packuswb", MX, EM, XX }, + /* 68 */ + { "punpckhbw", MX, EM, XX }, + { "punpckhwd", MX, EM, XX }, + { "punpckhdq", MX, EM, XX }, + { "packssdw", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "movd", MX, Ed, XX }, + { "movq", MX, EM, XX }, + /* 70 */ + { "pshufw", MX, EM, Ib }, + { GRP10 }, + { GRP11 }, + { GRP12 }, + { "pcmpeqb", MX, EM, XX }, + { "pcmpeqw", MX, EM, XX }, + { "pcmpeqd", MX, EM, XX }, + { "emms", XX, XX, XX }, + /* 78 */ + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "movd", Ed, MX, XX }, + { "movq", EM, MX, XX }, + /* 80 */ + { "jo", Jv, XX, XX }, + { "jno", Jv, XX, XX }, + { "jb", Jv, XX, XX }, + { "jae", Jv, XX, XX }, + { "je", Jv, XX, XX }, + { "jne", Jv, XX, XX }, + { "jbe", Jv, XX, XX }, + { "ja", Jv, XX, XX }, + /* 88 */ + { "js", Jv, XX, XX }, + { "jns", Jv, XX, XX }, + { "jp", Jv, XX, XX }, + { "jnp", Jv, XX, XX }, + { "jl", Jv, XX, XX }, + { "jge", Jv, XX, XX }, + { "jle", Jv, XX, XX }, + { "jg", Jv, XX, XX }, + /* 90 */ + { "seto", Eb, XX, XX }, + { "setno", Eb, XX, XX }, + { "setb", Eb, XX, XX }, + { "setae", Eb, XX, XX }, + { "sete", Eb, XX, XX }, + { "setne", Eb, XX, XX }, + { "setbe", Eb, XX, XX }, + { "seta", Eb, XX, XX }, + /* 98 */ + { "sets", Eb, XX, XX }, + { "setns", Eb, XX, XX }, + { "setp", Eb, XX, XX }, + { "setnp", Eb, XX, XX }, + { "setl", Eb, XX, XX }, + { "setge", Eb, XX, XX }, + { "setle", Eb, XX, XX }, + { "setg", Eb, XX, XX }, + /* a0 */ + { "pushP", fs, XX, XX }, + { "popP", fs, XX, XX }, + { "cpuid", XX, XX, XX }, + { "btS", Ev, Gv, XX }, + { "shldS", Ev, Gv, Ib }, + { "shldS", Ev, Gv, CL }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* a8 */ + { "pushP", gs, XX, XX }, + { "popP", gs, XX, XX }, + { "rsm", XX, XX, XX }, + { "btsS", Ev, Gv, XX }, + { "shrdS", Ev, Gv, Ib }, + { "shrdS", Ev, Gv, CL }, + { GRP13 }, + { "imulS", Gv, Ev, XX }, + /* b0 */ + { "cmpxchgB", Eb, Gb, XX }, + { "cmpxchgS", Ev, Gv, XX }, + { "lssS", Gv, Mp, XX }, + { "btrS", Ev, Gv, XX }, + { "lfsS", Gv, Mp, XX }, + { "lgsS", Gv, Mp, XX }, + { "movzbR", Gv, Eb, XX }, + { "movzwR", Gv, Ew, XX }, /* yes, there really is movzww ! */ + /* b8 */ + { "(bad)", XX, XX, XX }, + { "ud2b", XX, XX, XX }, + { GRP8 }, + { "btcS", Ev, Gv, XX }, + { "bsfS", Gv, Ev, XX }, + { "bsrS", Gv, Ev, XX }, + { "movsbR", Gv, Eb, XX }, + { "movswR", Gv, Ew, XX }, /* yes, there really is movsww ! */ + /* c0 */ + { "xaddB", Eb, Gb, XX }, + { "xaddS", Ev, Gv, XX }, + { PREGRP1 }, + { "(bad)", XX, XX, XX }, + { "pinsrw", MX, Ev, Ib }, + { "pextrw", Ev, MX, Ib }, + { "shufps", XM, EX, Ib }, + { GRP9 }, + /* c8 */ + { "bswap", eAX, XX, XX }, /* bswap doesn't support 16 bit regs */ + { "bswap", eCX, XX, XX }, + { "bswap", eDX, XX, XX }, + { "bswap", eBX, XX, XX }, + { "bswap", eSP, XX, XX }, + { "bswap", eBP, XX, XX }, + { "bswap", eSI, XX, XX }, + { "bswap", eDI, XX, XX }, + /* d0 */ + { "(bad)", XX, XX, XX }, + { "psrlw", MX, EM, XX }, + { "psrld", MX, EM, XX }, + { "psrlq", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "pmullw", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "pmovmskb", Ev, MX, XX }, + /* d8 */ + { "psubusb", MX, EM, XX }, + { "psubusw", MX, EM, XX }, + { "pminub", MX, EM, XX }, + { "pand", MX, EM, XX }, + { "paddusb", MX, EM, XX }, + { "paddusw", MX, EM, XX }, + { "pmaxub", MX, EM, XX }, + { "pandn", MX, EM, XX }, + /* e0 */ + { "pavgb", MX, EM, XX }, + { "psraw", MX, EM, XX }, + { "psrad", MX, EM, XX }, + { "pavgw", MX, EM, XX }, + { "pmulhuw", MX, EM, XX }, + { "pmulhw", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "movntq", Ev, MX, XX }, + /* e8 */ + { "psubsb", MX, EM, XX }, + { "psubsw", MX, EM, XX }, + { "pminsw", MX, EM, XX }, + { "por", MX, EM, XX }, + { "paddsb", MX, EM, XX }, + { "paddsw", MX, EM, XX }, + { "pmaxsw", MX, EM, XX }, + { "pxor", MX, EM, XX }, + /* f0 */ + { "(bad)", XX, XX, XX }, + { "psllw", MX, EM, XX }, + { "pslld", MX, EM, XX }, + { "psllq", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "pmaddwd", MX, EM, XX }, + { "psadbw", MX, EM, XX }, + { "maskmovq", MX, EM, XX }, + /* f8 */ + { "psubb", MX, EM, XX }, + { "psubw", MX, EM, XX }, + { "psubd", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "paddb", MX, EM, XX }, + { "paddw", MX, EM, XX }, + { "paddd", MX, EM, XX }, + { "(bad)", XX, XX, XX } +}; + +static const struct dis386 dis386_twobyte_intel[] = { + /* 00 */ + { GRP6 }, + { GRP7 }, + { "lar", Gv, Ew, XX }, + { "lsl", Gv, Ew, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "clts", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 08 */ + { "invd", XX, XX, XX }, + { "wbinvd", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "ud2a", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { GRPAMD }, + { "femms" , XX, XX, XX}, + { "", MX, EM, OPSUF }, /* See OP_3DNowSuffix */ + /* 10 */ + { PREGRP8 }, + { PREGRP9 }, + { "movlps", XM, EX, SIMD_Fixup, 'h' }, /* really only 2 operands */ + { "movlps", EX, XM, SIMD_Fixup, 'h' }, + { "unpcklps", XM, EX, XX }, + { "unpckhps", XM, EX, XX }, + { "movhps", XM, EX, SIMD_Fixup, 'l' }, + { "movhps", EX, XM, SIMD_Fixup, 'l' }, + /* 18 */ + { GRP14 }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 20 */ + /* these are all backward in appendix A of the intel book */ + { "mov", Rd, Cd, XX }, + { "mov", Rd, Dd, XX }, + { "mov", Cd, Rd, XX }, + { "mov", Dd, Rd, XX }, + { "mov", Rd, Td, XX }, + { "(bad)", XX, XX, XX }, + { "mov", Td, Rd, XX }, + { "(bad)", XX, XX, XX }, + /* 28 */ + { "movaps", XM, EX, XX }, + { "movaps", EX, XM, XX }, + { PREGRP2 }, + { "movntps", Ev, XM, XX }, + { PREGRP4 }, + { PREGRP3 }, + { "ucomiss", XM, EX, XX }, + { "comiss", XM, EX, XX }, + /* 30 */ + { "wrmsr", XX, XX, XX }, + { "rdtsc", XX, XX, XX }, + { "rdmsr", XX, XX, XX }, + { "rdpmc", XX, XX, XX }, + { "sysenter", XX, XX, XX }, + { "sysexit", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 38 */ + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* 40 */ + { "cmovo", Gv, Ev, XX }, + { "cmovno", Gv, Ev, XX }, + { "cmovb", Gv, Ev, XX }, + { "cmovae", Gv, Ev, XX }, + { "cmove", Gv, Ev, XX }, + { "cmovne", Gv, Ev, XX }, + { "cmovbe", Gv, Ev, XX }, + { "cmova", Gv, Ev, XX }, + /* 48 */ + { "cmovs", Gv, Ev, XX }, + { "cmovns", Gv, Ev, XX }, + { "cmovp", Gv, Ev, XX }, + { "cmovnp", Gv, Ev, XX }, + { "cmovl", Gv, Ev, XX }, + { "cmovge", Gv, Ev, XX }, + { "cmovle", Gv, Ev, XX }, + { "cmovg", Gv, Ev, XX }, + /* 50 */ + { "movmskps", Gv, EX, XX }, + { PREGRP13 }, + { PREGRP12 }, + { PREGRP11 }, + { "andps", XM, EX, XX }, + { "andnps", XM, EX, XX }, + { "orps", XM, EX, XX }, + { "xorps", XM, EX, XX }, + /* 58 */ + { PREGRP0 }, + { PREGRP10 }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { PREGRP14 }, + { PREGRP7 }, + { PREGRP5 }, + { PREGRP6 }, + /* 60 */ + { "punpcklbw", MX, EM, XX }, + { "punpcklwd", MX, EM, XX }, + { "punpckldq", MX, EM, XX }, + { "packsswb", MX, EM, XX }, + { "pcmpgtb", MX, EM, XX }, + { "pcmpgtw", MX, EM, XX }, + { "pcmpgtd", MX, EM, XX }, + { "packuswb", MX, EM, XX }, + /* 68 */ + { "punpckhbw", MX, EM, XX }, + { "punpckhwd", MX, EM, XX }, + { "punpckhdq", MX, EM, XX }, + { "packssdw", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "movd", MX, Ed, XX }, + { "movq", MX, EM, XX }, + /* 70 */ + { "pshufw", MX, EM, Ib }, + { GRP10 }, + { GRP11 }, + { GRP12 }, + { "pcmpeqb", MX, EM, XX }, + { "pcmpeqw", MX, EM, XX }, + { "pcmpeqd", MX, EM, XX }, + { "emms", XX, XX, XX }, + /* 78 */ + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "movd", Ed, MX, XX }, + { "movq", EM, MX, XX }, + /* 80 */ + { "jo", Jv, XX, XX }, + { "jno", Jv, XX, XX }, + { "jb", Jv, XX, XX }, + { "jae", Jv, XX, XX }, + { "je", Jv, XX, XX }, + { "jne", Jv, XX, XX }, + { "jbe", Jv, XX, XX }, + { "ja", Jv, XX, XX }, + /* 88 */ + { "js", Jv, XX, XX }, + { "jns", Jv, XX, XX }, + { "jp", Jv, XX, XX }, + { "jnp", Jv, XX, XX }, + { "jl", Jv, XX, XX }, + { "jge", Jv, XX, XX }, + { "jle", Jv, XX, XX }, + { "jg", Jv, XX, XX }, + /* 90 */ + { "seto", Eb, XX, XX }, + { "setno", Eb, XX, XX }, + { "setb", Eb, XX, XX }, + { "setae", Eb, XX, XX }, + { "sete", Eb, XX, XX }, + { "setne", Eb, XX, XX }, + { "setbe", Eb, XX, XX }, + { "seta", Eb, XX, XX }, + /* 98 */ + { "sets", Eb, XX, XX }, + { "setns", Eb, XX, XX }, + { "setp", Eb, XX, XX }, + { "setnp", Eb, XX, XX }, + { "setl", Eb, XX, XX }, + { "setge", Eb, XX, XX }, + { "setle", Eb, XX, XX }, + { "setg", Eb, XX, XX }, + /* a0 */ + { "push", fs, XX, XX }, + { "pop", fs, XX, XX }, + { "cpuid", XX, XX, XX }, + { "bt", Ev, Gv, XX }, + { "shld", Ev, Gv, Ib }, + { "shld", Ev, Gv, CL }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + /* a8 */ + { "push", gs, XX, XX }, + { "pop", gs, XX, XX }, + { "rsm" , XX, XX, XX}, + { "bts", Ev, Gv, XX }, + { "shrd", Ev, Gv, Ib }, + { "shrd", Ev, Gv, CL }, + { GRP13 }, + { "imul", Gv, Ev, XX }, + /* b0 */ + { "cmpxchg", Eb, Gb, XX }, + { "cmpxchg", Ev, Gv, XX }, + { "lss", Gv, Mp, XX }, + { "btr", Ev, Gv, XX }, + { "lfs", Gv, Mp, XX }, + { "lgs", Gv, Mp, XX }, + { "movzx", Gv, Eb, XX }, + { "movzx", Gv, Ew, XX }, + /* b8 */ + { "(bad)", XX, XX, XX }, + { "ud2b", XX, XX, XX }, + { GRP8 }, + { "btc", Ev, Gv, XX }, + { "bsf", Gv, Ev, XX }, + { "bsr", Gv, Ev, XX }, + { "movsx", Gv, Eb, XX }, + { "movsx", Gv, Ew, XX }, + /* c0 */ + { "xadd", Eb, Gb, XX }, + { "xadd", Ev, Gv, XX }, + { PREGRP1 }, + { "(bad)", XX, XX, XX }, + { "pinsrw", MX, Ev, Ib }, + { "pextrw", Ev, MX, Ib }, + { "shufps", XM, EX, Ib }, + { GRP9 }, + /* c8 */ + { "bswap", eAX, XX, XX }, /* bswap doesn't support 16 bit regs */ + { "bswap", eCX, XX, XX }, + { "bswap", eDX, XX, XX }, + { "bswap", eBX, XX, XX }, + { "bswap", eSP, XX, XX }, + { "bswap", eBP, XX, XX }, + { "bswap", eSI, XX, XX }, + { "bswap", eDI, XX, XX }, + /* d0 */ + { "(bad)", XX, XX, XX }, + { "psrlw", MX, EM, XX }, + { "psrld", MX, EM, XX }, + { "psrlq", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "pmullw", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "pmovmskb", Ev, MX, XX }, + /* d8 */ + { "psubusb", MX, EM, XX }, + { "psubusw", MX, EM, XX }, + { "pminub", MX, EM, XX }, + { "pand", MX, EM, XX }, + { "paddusb", MX, EM, XX }, + { "paddusw", MX, EM, XX }, + { "pmaxub", MX, EM, XX }, + { "pandn", MX, EM, XX }, + /* e0 */ + { "pavgb", MX, EM, XX }, + { "psraw", MX, EM, XX }, + { "psrad", MX, EM, XX }, + { "pavgw", MX, EM, XX }, + { "pmulhuw", MX, EM, XX }, + { "pmulhw", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "movntq", Ev, MX, XX }, + /* e8 */ + { "psubsb", MX, EM, XX }, + { "psubsw", MX, EM, XX }, + { "pminsw", MX, EM, XX }, + { "por", MX, EM, XX }, + { "paddsb", MX, EM, XX }, + { "paddsw", MX, EM, XX }, + { "pmaxsw", MX, EM, XX }, + { "pxor", MX, EM, XX }, + /* f0 */ + { "(bad)", XX, XX, XX }, + { "psllw", MX, EM, XX }, + { "pslld", MX, EM, XX }, + { "psllq", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "pmaddwd", MX, EM, XX }, + { "psadbw", MX, EM, XX }, + { "maskmovq", MX, EM, XX }, + /* f8 */ + { "psubb", MX, EM, XX }, + { "psubw", MX, EM, XX }, + { "psubd", MX, EM, XX }, + { "(bad)", XX, XX, XX }, + { "paddb", MX, EM, XX }, + { "paddw", MX, EM, XX }, + { "paddd", MX, EM, XX }, + { "(bad)", XX, XX, XX } +}; + +static const unsigned char onebyte_has_modrm[256] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ------------------------------- */ + /* 00 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 00 */ + /* 10 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 10 */ + /* 20 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 20 */ + /* 30 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 30 */ + /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 40 */ + /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 50 */ + /* 60 */ 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0, /* 60 */ + /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 70 */ + /* 80 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 80 */ + /* 90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 90 */ + /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0 */ + /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* b0 */ + /* c0 */ 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* c0 */ + /* d0 */ 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1, /* d0 */ + /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0 */ + /* f0 */ 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1 /* f0 */ + /* ------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; + +static const unsigned char twobyte_has_modrm[256] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ------------------------------- */ + /* 00 */ 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1, /* 0f */ + /* 10 */ 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0, /* 1f */ + /* 20 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 2f */ + /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */ + /* 40 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4f */ + /* 50 */ 1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, /* 5f */ + /* 60 */ 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1, /* 6f */ + /* 70 */ 1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, /* 7f */ + /* 80 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 8f */ + /* 90 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 9f */ + /* a0 */ 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1, /* af */ + /* b0 */ 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1, /* bf */ + /* c0 */ 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, /* cf */ + /* d0 */ 0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1, /* df */ + /* e0 */ 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1, /* ef */ + /* f0 */ 0,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0 /* ff */ + /* ------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; + +static const unsigned char twobyte_uses_f3_prefix[256] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ------------------------------- */ + /* 00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0f */ + /* 10 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1f */ + /* 20 */ 0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0, /* 2f */ + /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */ + /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */ + /* 50 */ 0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,1, /* 5f */ + /* 60 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */ + /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 7f */ + /* 80 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 8f */ + /* 90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 9f */ + /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */ + /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */ + /* c0 */ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */ + /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */ + /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */ + /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* ff */ + /* ------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; + +static char obuf[100]; +static char *obufp; +static char scratchbuf[100]; +static unsigned char *start_codep; +static unsigned char *insn_codep; +static unsigned char *codep; +static disassemble_info *the_info; +static int mod; +static int rm; +static int reg; +static void oappend PARAMS ((const char *s)); + +static const char *names32[]={ + "%eax","%ecx","%edx","%ebx", "%esp","%ebp","%esi","%edi", +}; +static const char *names16[] = { + "%ax","%cx","%dx","%bx","%sp","%bp","%si","%di", +}; +static const char *names8[] = { + "%al","%cl","%dl","%bl","%ah","%ch","%dh","%bh", +}; +static const char *names_seg[] = { + "%es","%cs","%ss","%ds","%fs","%gs","%?","%?", +}; +static const char *index16[] = { + "%bx,%si","%bx,%di","%bp,%si","%bp,%di","%si","%di","%bp","%bx" +}; + +static const struct dis386 grps[][8] = { + /* GRP1b */ + { + { "addA", Eb, Ib, XX }, + { "orA", Eb, Ib, XX }, + { "adcA", Eb, Ib, XX }, + { "sbbA", Eb, Ib, XX }, + { "andA", Eb, Ib, XX }, + { "subA", Eb, Ib, XX }, + { "xorA", Eb, Ib, XX }, + { "cmpA", Eb, Ib, XX } + }, + /* GRP1S */ + { + { "addQ", Ev, Iv, XX }, + { "orQ", Ev, Iv, XX }, + { "adcQ", Ev, Iv, XX }, + { "sbbQ", Ev, Iv, XX }, + { "andQ", Ev, Iv, XX }, + { "subQ", Ev, Iv, XX }, + { "xorQ", Ev, Iv, XX }, + { "cmpQ", Ev, Iv, XX } + }, + /* GRP1Ss */ + { + { "addQ", Ev, sIb, XX }, + { "orQ", Ev, sIb, XX }, + { "adcQ", Ev, sIb, XX }, + { "sbbQ", Ev, sIb, XX }, + { "andQ", Ev, sIb, XX }, + { "subQ", Ev, sIb, XX }, + { "xorQ", Ev, sIb, XX }, + { "cmpQ", Ev, sIb, XX } + }, + /* GRP2b */ + { + { "rolA", Eb, Ib, XX }, + { "rorA", Eb, Ib, XX }, + { "rclA", Eb, Ib, XX }, + { "rcrA", Eb, Ib, XX }, + { "shlA", Eb, Ib, XX }, + { "shrA", Eb, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "sarA", Eb, Ib, XX }, + }, + /* GRP2S */ + { + { "rolQ", Ev, Ib, XX }, + { "rorQ", Ev, Ib, XX }, + { "rclQ", Ev, Ib, XX }, + { "rcrQ", Ev, Ib, XX }, + { "shlQ", Ev, Ib, XX }, + { "shrQ", Ev, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "sarQ", Ev, Ib, XX }, + }, + /* GRP2b_one */ + { + { "rolA", Eb, XX, XX }, + { "rorA", Eb, XX, XX }, + { "rclA", Eb, XX, XX }, + { "rcrA", Eb, XX, XX }, + { "shlA", Eb, XX, XX }, + { "shrA", Eb, XX, XX }, + { "(bad)", XX, XX, XX }, + { "sarA", Eb, XX, XX }, + }, + /* GRP2S_one */ + { + { "rolQ", Ev, XX, XX }, + { "rorQ", Ev, XX, XX }, + { "rclQ", Ev, XX, XX }, + { "rcrQ", Ev, XX, XX }, + { "shlQ", Ev, XX, XX }, + { "shrQ", Ev, XX, XX }, + { "(bad)", XX, XX, XX}, + { "sarQ", Ev, XX, XX }, + }, + /* GRP2b_cl */ + { + { "rolA", Eb, CL, XX }, + { "rorA", Eb, CL, XX }, + { "rclA", Eb, CL, XX }, + { "rcrA", Eb, CL, XX }, + { "shlA", Eb, CL, XX }, + { "shrA", Eb, CL, XX }, + { "(bad)", XX, XX, XX }, + { "sarA", Eb, CL, XX }, + }, + /* GRP2S_cl */ + { + { "rolQ", Ev, CL, XX }, + { "rorQ", Ev, CL, XX }, + { "rclQ", Ev, CL, XX }, + { "rcrQ", Ev, CL, XX }, + { "shlQ", Ev, CL, XX }, + { "shrQ", Ev, CL, XX }, + { "(bad)", XX, XX, XX }, + { "sarQ", Ev, CL, XX } + }, + /* GRP3b */ + { + { "testA", Eb, Ib, XX }, + { "(bad)", Eb, XX, XX }, + { "notA", Eb, XX, XX }, + { "negA", Eb, XX, XX }, + { "mulB", AL, Eb, XX }, + { "imulB", AL, Eb, XX }, + { "divB", AL, Eb, XX }, + { "idivB", AL, Eb, XX } + }, + /* GRP3S */ + { + { "testQ", Ev, Iv, XX }, + { "(bad)", XX, XX, XX }, + { "notQ", Ev, XX, XX }, + { "negQ", Ev, XX, XX }, + { "mulS", eAX, Ev, XX }, + { "imulS", eAX, Ev, XX }, + { "divS", eAX, Ev, XX }, + { "idivS", eAX, Ev, XX }, + }, + /* GRP4 */ + { + { "incA", Eb, XX, XX }, + { "decA", Eb, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRP5 */ + { + { "incQ", Ev, XX, XX }, + { "decQ", Ev, XX, XX }, + { "callP", indirEv, XX, XX }, + { "lcallP", indirEv, XX, XX }, + { "jmpP", indirEv, XX, XX }, + { "ljmpP", indirEv, XX, XX }, + { "pushQ", Ev, XX, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRP6 */ + { + { "sldt", Ew, XX, XX }, + { "str", Ew, XX, XX }, + { "lldt", Ew, XX, XX }, + { "ltr", Ew, XX, XX }, + { "verr", Ew, XX, XX }, + { "verw", Ew, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX } + }, + /* GRP7 */ + { + { "sgdt", Ew, XX, XX }, + { "sidt", Ew, XX, XX }, + { "lgdt", Ew, XX, XX }, + { "lidt", Ew, XX, XX }, + { "smsw", Ew, XX, XX }, + { "(bad)", XX, XX, XX }, + { "lmsw", Ew, XX, XX }, + { "invlpg", Ew, XX, XX }, + }, + /* GRP8 */ + { + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "btQ", Ev, Ib, XX }, + { "btsQ", Ev, Ib, XX }, + { "btrQ", Ev, Ib, XX }, + { "btcQ", Ev, Ib, XX }, + }, + /* GRP9 */ + { + { "(bad)", XX, XX, XX }, + { "cmpxchg8b", Ev, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRP10 */ + { + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "psrlw", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "psraw", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "psllw", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRP11 */ + { + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "psrld", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "psrad", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "pslld", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRP12 */ + { + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "psrlq", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "psllq", MS, Ib, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRP13 */ + { + { "fxsave", Ev, XX, XX }, + { "fxrstor", Ev, XX, XX }, + { "ldmxcsr", Ev, XX, XX }, + { "stmxcsr", Ev, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "sfence", None, XX, XX }, + }, + /* GRP14 */ + { + { "prefetchnta", Ev, XX, XX }, + { "prefetcht0", Ev, XX, XX }, + { "prefetcht1", Ev, XX, XX }, + { "prefetcht2", Ev, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + }, + /* GRPAMD */ + { + { "prefetch", Eb, XX, XX }, + { "prefetchw", Eb, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + } + +}; + +static const struct dis386 prefix_user_table[][2] = { + /* PREGRP0 */ + { + { "addps", XM, EX, XX }, + { "addss", XM, EX, XX }, + }, + /* PREGRP1 */ + { + { "", XM, EX, OPSIMD }, /* See OP_SIMD_SUFFIX */ + { "", XM, EX, OPSIMD }, + }, + /* PREGRP2 */ + { + { "cvtpi2ps", XM, EM, XX }, + { "cvtsi2ss", XM, Ev, XX }, + }, + /* PREGRP3 */ + { + { "cvtps2pi", MX, EX, XX }, + { "cvtss2si", Gv, EX, XX }, + }, + /* PREGRP4 */ + { + { "cvttps2pi", MX, EX, XX }, + { "cvttss2si", Gv, EX, XX }, + }, + /* PREGRP5 */ + { + { "divps", XM, EX, XX }, + { "divss", XM, EX, XX }, + }, + /* PREGRP6 */ + { + { "maxps", XM, EX, XX }, + { "maxss", XM, EX, XX }, + }, + /* PREGRP7 */ + { + { "minps", XM, EX, XX }, + { "minss", XM, EX, XX }, + }, + /* PREGRP8 */ + { + { "movups", XM, EX, XX }, + { "movss", XM, EX, XX }, + }, + /* PREGRP9 */ + { + { "movups", EX, XM, XX }, + { "movss", EX, XM, XX }, + }, + /* PREGRP10 */ + { + { "mulps", XM, EX, XX }, + { "mulss", XM, EX, XX }, + }, + /* PREGRP11 */ + { + { "rcpps", XM, EX, XX }, + { "rcpss", XM, EX, XX }, + }, + /* PREGRP12 */ + { + { "rsqrtps", XM, EX, XX }, + { "rsqrtss", XM, EX, XX }, + }, + /* PREGRP13 */ + { + { "sqrtps", XM, EX, XX }, + { "sqrtss", XM, EX, XX }, + }, + /* PREGRP14 */ + { + { "subps", XM, EX, XX }, + { "subss", XM, EX, XX }, + } +}; + +#define INTERNAL_DISASSEMBLER_ERROR "" + +static void +ckprefix () +{ + prefixes = 0; + used_prefixes = 0; + while (1) + { + FETCH_DATA (the_info, codep + 1); + switch (*codep) + { + case 0xf3: + prefixes |= PREFIX_REPZ; + break; + case 0xf2: + prefixes |= PREFIX_REPNZ; + break; + case 0xf0: + prefixes |= PREFIX_LOCK; + break; + case 0x2e: + prefixes |= PREFIX_CS; + break; + case 0x36: + prefixes |= PREFIX_SS; + break; + case 0x3e: + prefixes |= PREFIX_DS; + break; + case 0x26: + prefixes |= PREFIX_ES; + break; + case 0x64: + prefixes |= PREFIX_FS; + break; + case 0x65: + prefixes |= PREFIX_GS; + break; + case 0x66: + prefixes |= PREFIX_DATA; + break; + case 0x67: + prefixes |= PREFIX_ADDR; + break; + case FWAIT_OPCODE: + /* fwait is really an instruction. If there are prefixes + before the fwait, they belong to the fwait, *not* to the + following instruction. */ + if (prefixes) + { + prefixes |= PREFIX_FWAIT; + codep++; + return; + } + prefixes = PREFIX_FWAIT; + break; + default: + return; + } + codep++; + } +} + +/* Return the name of the prefix byte PREF, or NULL if PREF is not a + prefix byte. */ + +static const char * +prefix_name (pref, sizeflag) + int pref; + int sizeflag; +{ + switch (pref) + { + case 0xf3: + return "repz"; + case 0xf2: + return "repnz"; + case 0xf0: + return "lock"; + case 0x2e: + return "cs"; + case 0x36: + return "ss"; + case 0x3e: + return "ds"; + case 0x26: + return "es"; + case 0x64: + return "fs"; + case 0x65: + return "gs"; + case 0x66: + return (sizeflag & DFLAG) ? "data16" : "data32"; + case 0x67: + return (sizeflag & AFLAG) ? "addr16" : "addr32"; + case FWAIT_OPCODE: + return "fwait"; + default: + return NULL; + } +} + +static char op1out[100], op2out[100], op3out[100]; +static int op_ad, op_index[3]; +static unsigned int op_address[3]; +static unsigned int start_pc; + + +/* + * On the 386's of 1988, the maximum length of an instruction is 15 bytes. + * (see topic "Redundant prefixes" in the "Differences from 8086" + * section of the "Virtual 8086 Mode" chapter.) + * 'pc' should be the address of this instruction, it will + * be used to print the target address if this is a relative jump or call + * The function returns the length of this instruction in bytes. + */ + +static int print_insn_i386 + PARAMS ((bfd_vma pc, disassemble_info *info)); + +static char intel_syntax; +static char open_char; +static char close_char; +static char separator_char; +static char scale_char; + +int +print_insn_i386_att (pc, info) + bfd_vma pc; + disassemble_info *info; +{ + intel_syntax = 0; + open_char = '('; + close_char = ')'; + separator_char = ','; + scale_char = ','; + + return print_insn_i386 (pc, info); +} + +int +print_insn_i386_intel (pc, info) + bfd_vma pc; + disassemble_info *info; +{ + intel_syntax = 1; + open_char = '['; + close_char = ']'; + separator_char = '+'; + scale_char = '*'; + + return print_insn_i386 (pc, info); +} + +static int +print_insn_i386 (pc, info) + bfd_vma pc; + disassemble_info *info; +{ + const struct dis386 *dp; + int i; + int two_source_ops; + char *first, *second, *third; + int needcomma; + unsigned char need_modrm; + unsigned char uses_f3_prefix; + VOLATILE int sizeflag; + VOLATILE int orig_sizeflag; + + struct dis_private priv; + bfd_byte *inbuf = priv.the_buffer; + + if (info->mach == bfd_mach_i386_i386 + || info->mach == bfd_mach_i386_i386_intel_syntax) + sizeflag = AFLAG|DFLAG; + else if (info->mach == bfd_mach_i386_i8086) + sizeflag = 0; + else + abort (); + orig_sizeflag = sizeflag; + + /* The output looks better if we put 7 bytes on a line, since that + puts most long word instructions on a single line. */ + info->bytes_per_line = 7; + + info->private_data = (PTR) &priv; + priv.max_fetched = priv.the_buffer; + priv.insn_start = pc; + + obuf[0] = 0; + op1out[0] = 0; + op2out[0] = 0; + op3out[0] = 0; + + op_index[0] = op_index[1] = op_index[2] = -1; + + the_info = info; + start_pc = pc; + start_codep = inbuf; + codep = inbuf; + +#ifndef __KERNEL__ + if (setjmp (priv.bailout) != 0) + { + const char *name; + + /* Getting here means we tried for data but didn't get it. That + means we have an incomplete instruction of some sort. Just + print the first byte as a prefix or a .byte pseudo-op. */ + if (codep > inbuf) + { + name = prefix_name (inbuf[0], orig_sizeflag); + if (name != NULL) + (*info->fprintf_func) (info->stream, "%s", name); + else + { + /* Just print the first byte as a .byte instruction. */ + (*info->fprintf_func) (info->stream, ".byte 0x%x", + (unsigned int) inbuf[0]); + } + + return 1; + } + + return -1; + } +#endif + + ckprefix (); + + insn_codep = codep; + + FETCH_DATA (info, codep + 1); + two_source_ops = (*codep == 0x62) || (*codep == 0xc8); + + obufp = obuf; + + if ((prefixes & PREFIX_FWAIT) + && ((*codep < 0xd8) || (*codep > 0xdf))) + { + const char *name; + + /* fwait not followed by floating point instruction. Print the + first prefix, which is probably fwait itself. */ + name = prefix_name (inbuf[0], orig_sizeflag); + if (name == NULL) + name = INTERNAL_DISASSEMBLER_ERROR; + (*info->fprintf_func) (info->stream, "%s", name); + return 1; + } + + if (*codep == 0x0f) + { + FETCH_DATA (info, codep + 2); + if (intel_syntax) + dp = &dis386_twobyte_intel[*++codep]; + else + dp = &dis386_twobyte_att[*++codep]; + need_modrm = twobyte_has_modrm[*codep]; + uses_f3_prefix = twobyte_uses_f3_prefix[*codep]; + } + else + { + if (intel_syntax) + dp = &dis386_intel[*codep]; + else + dp = &dis386_att[*codep]; + need_modrm = onebyte_has_modrm[*codep]; + uses_f3_prefix = 0; + } + codep++; + + if (!uses_f3_prefix && (prefixes & PREFIX_REPZ)) + { + oappend ("repz "); + used_prefixes |= PREFIX_REPZ; + } + if (prefixes & PREFIX_REPNZ) + { + oappend ("repnz "); + used_prefixes |= PREFIX_REPNZ; + } + if (prefixes & PREFIX_LOCK) + { + oappend ("lock "); + used_prefixes |= PREFIX_LOCK; + } + + if (prefixes & PREFIX_DATA) + sizeflag ^= DFLAG; + + if (prefixes & PREFIX_ADDR) + { + sizeflag ^= AFLAG; + if (sizeflag & AFLAG) + oappend ("addr32 "); + else + oappend ("addr16 "); + used_prefixes |= PREFIX_ADDR; + } + + if (need_modrm) + { + FETCH_DATA (info, codep + 1); + mod = (*codep >> 6) & 3; + reg = (*codep >> 3) & 7; + rm = *codep & 7; + } + + if (dp->name == NULL && dp->bytemode1 == FLOATCODE) + { + dofloat (sizeflag); + } + else + { + if (dp->name == NULL) + { + switch(dp->bytemode2) + { + case USE_GROUPS: + dp = &grps[dp->bytemode1][reg]; + break; + case USE_PREFIX_USER_TABLE: + dp = &prefix_user_table[dp->bytemode1][prefixes & PREFIX_REPZ ? 1 : 0]; + used_prefixes |= (prefixes & PREFIX_REPZ); + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + break; + } + } + + putop (dp->name, sizeflag); + + obufp = op1out; + op_ad = 2; + if (dp->op1) + (*dp->op1)(dp->bytemode1, sizeflag); + + obufp = op2out; + op_ad = 1; + if (dp->op2) + (*dp->op2)(dp->bytemode2, sizeflag); + + obufp = op3out; + op_ad = 0; + if (dp->op3) + (*dp->op3)(dp->bytemode3, sizeflag); + } + + /* See if any prefixes were not used. If so, print the first one + separately. If we don't do this, we'll wind up printing an + instruction stream which does not precisely correspond to the + bytes we are disassembling. */ + if ((prefixes & ~used_prefixes) != 0) + { + const char *name; + + name = prefix_name (inbuf[0], orig_sizeflag); + if (name == NULL) + name = INTERNAL_DISASSEMBLER_ERROR; + (*info->fprintf_func) (info->stream, "%s", name); + return 1; + } + + obufp = obuf + strlen (obuf); + for (i = strlen (obuf); i < 6; i++) + oappend (" "); + oappend (" "); + (*info->fprintf_func) (info->stream, "%s", obuf); + + /* The enter and bound instructions are printed with operands in the same + order as the intel book; everything else is printed in reverse order. */ + if (intel_syntax || two_source_ops) + { + first = op1out; + second = op2out; + third = op3out; + op_ad = op_index[0]; + op_index[0] = op_index[2]; + op_index[2] = op_ad; + } + else + { + first = op3out; + second = op2out; + third = op1out; + } + needcomma = 0; + if (*first) + { + if (op_index[0] != -1) + (*info->print_address_func) ((bfd_vma) op_address[op_index[0]], info); + else + (*info->fprintf_func) (info->stream, "%s", first); + needcomma = 1; + } + if (*second) + { + if (needcomma) + (*info->fprintf_func) (info->stream, ","); + if (op_index[1] != -1) + (*info->print_address_func) ((bfd_vma) op_address[op_index[1]], info); + else + (*info->fprintf_func) (info->stream, "%s", second); + needcomma = 1; + } + if (*third) + { + if (needcomma) + (*info->fprintf_func) (info->stream, ","); + if (op_index[2] != -1) + (*info->print_address_func) ((bfd_vma) op_address[op_index[2]], info); + else + (*info->fprintf_func) (info->stream, "%s", third); + } + return codep - inbuf; +} + +static const char *float_mem_att[] = { + /* d8 */ + "fadds", + "fmuls", + "fcoms", + "fcomps", + "fsubs", + "fsubrs", + "fdivs", + "fdivrs", + /* d9 */ + "flds", + "(bad)", + "fsts", + "fstps", + "fldenv", + "fldcw", + "fNstenv", + "fNstcw", + /* da */ + "fiaddl", + "fimull", + "ficoml", + "ficompl", + "fisubl", + "fisubrl", + "fidivl", + "fidivrl", + /* db */ + "fildl", + "(bad)", + "fistl", + "fistpl", + "(bad)", + "fldt", + "(bad)", + "fstpt", + /* dc */ + "faddl", + "fmull", + "fcoml", + "fcompl", + "fsubl", + "fsubrl", + "fdivl", + "fdivrl", + /* dd */ + "fldl", + "(bad)", + "fstl", + "fstpl", + "frstor", + "(bad)", + "fNsave", + "fNstsw", + /* de */ + "fiadd", + "fimul", + "ficom", + "ficomp", + "fisub", + "fisubr", + "fidiv", + "fidivr", + /* df */ + "fild", + "(bad)", + "fist", + "fistp", + "fbld", + "fildll", + "fbstp", + "fistpll", +}; + +static const char *float_mem_intel[] = { + /* d8 */ + "fadd", + "fmul", + "fcom", + "fcomp", + "fsub", + "fsubr", + "fdiv", + "fdivr", + /* d9 */ + "fld", + "(bad)", + "fst", + "fstp", + "fldenv", + "fldcw", + "fNstenv", + "fNstcw", + /* da */ + "fiadd", + "fimul", + "ficom", + "ficomp", + "fisub", + "fisubr", + "fidiv", + "fidivr", + /* db */ + "fild", + "(bad)", + "fist", + "fistp", + "(bad)", + "fld", + "(bad)", + "fstp", + /* dc */ + "fadd", + "fmul", + "fcom", + "fcomp", + "fsub", + "fsubr", + "fdiv", + "fdivr", + /* dd */ + "fld", + "(bad)", + "fst", + "fstp", + "frstor", + "(bad)", + "fNsave", + "fNstsw", + /* de */ + "fiadd", + "fimul", + "ficom", + "ficomp", + "fisub", + "fisubr", + "fidiv", + "fidivr", + /* df */ + "fild", + "(bad)", + "fist", + "fistp", + "fbld", + "fild", + "fbstp", + "fistpll", +}; + +#define ST OP_ST, 0 +#define STi OP_STi, 0 + +#define FGRPd9_2 NULL, NULL, 0, NULL, 0, NULL, 0 +#define FGRPd9_4 NULL, NULL, 1, NULL, 0, NULL, 0 +#define FGRPd9_5 NULL, NULL, 2, NULL, 0, NULL, 0 +#define FGRPd9_6 NULL, NULL, 3, NULL, 0, NULL, 0 +#define FGRPd9_7 NULL, NULL, 4, NULL, 0, NULL, 0 +#define FGRPda_5 NULL, NULL, 5, NULL, 0, NULL, 0 +#define FGRPdb_4 NULL, NULL, 6, NULL, 0, NULL, 0 +#define FGRPde_3 NULL, NULL, 7, NULL, 0, NULL, 0 +#define FGRPdf_4 NULL, NULL, 8, NULL, 0, NULL, 0 + +static const struct dis386 float_reg[][8] = { + /* d8 */ + { + { "fadd", ST, STi, XX }, + { "fmul", ST, STi, XX }, + { "fcom", STi, XX, XX }, + { "fcomp", STi, XX, XX }, + { "fsub", ST, STi, XX }, + { "fsubr", ST, STi, XX }, + { "fdiv", ST, STi, XX }, + { "fdivr", ST, STi, XX }, + }, + /* d9 */ + { + { "fld", STi, XX, XX }, + { "fxch", STi, XX, XX }, + { FGRPd9_2 }, + { "(bad)", XX, XX, XX }, + { FGRPd9_4 }, + { FGRPd9_5 }, + { FGRPd9_6 }, + { FGRPd9_7 }, + }, + /* da */ + { + { "fcmovb", ST, STi, XX }, + { "fcmove", ST, STi, XX }, + { "fcmovbe",ST, STi, XX }, + { "fcmovu", ST, STi, XX }, + { "(bad)", XX, XX, XX }, + { FGRPda_5 }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + }, + /* db */ + { + { "fcmovnb",ST, STi, XX }, + { "fcmovne",ST, STi, XX }, + { "fcmovnbe",ST, STi, XX }, + { "fcmovnu",ST, STi, XX }, + { FGRPdb_4 }, + { "fucomi", ST, STi, XX }, + { "fcomi", ST, STi, XX }, + { "(bad)", XX, XX, XX }, + }, + /* dc */ + { + { "fadd", STi, ST, XX }, + { "fmul", STi, ST, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, +#if UNIXWARE_COMPAT + { "fsub", STi, ST, XX }, + { "fsubr", STi, ST, XX }, + { "fdiv", STi, ST, XX }, + { "fdivr", STi, ST, XX }, +#else + { "fsubr", STi, ST, XX }, + { "fsub", STi, ST, XX }, + { "fdivr", STi, ST, XX }, + { "fdiv", STi, ST, XX }, +#endif + }, + /* dd */ + { + { "ffree", STi, XX, XX }, + { "(bad)", XX, XX, XX }, + { "fst", STi, XX, XX }, + { "fstp", STi, XX, XX }, + { "fucom", STi, XX, XX }, + { "fucomp", STi, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + }, + /* de */ + { + { "faddp", STi, ST, XX }, + { "fmulp", STi, ST, XX }, + { "(bad)", XX, XX, XX }, + { FGRPde_3 }, +#if UNIXWARE_COMPAT + { "fsubp", STi, ST, XX }, + { "fsubrp", STi, ST, XX }, + { "fdivp", STi, ST, XX }, + { "fdivrp", STi, ST, XX }, +#else + { "fsubrp", STi, ST, XX }, + { "fsubp", STi, ST, XX }, + { "fdivrp", STi, ST, XX }, + { "fdivp", STi, ST, XX }, +#endif + }, + /* df */ + { + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { "(bad)", XX, XX, XX }, + { FGRPdf_4 }, + { "fucomip",ST, STi, XX }, + { "fcomip", ST, STi, XX }, + { "(bad)", XX, XX, XX }, + }, +}; + + +static char *fgrps[][8] = { + /* d9_2 0 */ + { + "fnop","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)", + }, + + /* d9_4 1 */ + { + "fchs","fabs","(bad)","(bad)","ftst","fxam","(bad)","(bad)", + }, + + /* d9_5 2 */ + { + "fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz","(bad)", + }, + + /* d9_6 3 */ + { + "f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp", + }, + + /* d9_7 4 */ + { + "fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos", + }, + + /* da_5 5 */ + { + "(bad)","fucompp","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)", + }, + + /* db_4 6 */ + { + "feni(287 only)","fdisi(287 only)","fNclex","fNinit", + "fNsetpm(287 only)","(bad)","(bad)","(bad)", + }, + + /* de_3 7 */ + { + "(bad)","fcompp","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)", + }, + + /* df_4 8 */ + { + "fNstsw","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)","(bad)", + }, +}; + +static void +dofloat (sizeflag) + int sizeflag; +{ + const struct dis386 *dp; + unsigned char floatop; + + floatop = codep[-1]; + + if (mod != 3) + { + if (intel_syntax) + putop (float_mem_intel[(floatop - 0xd8 ) * 8 + reg], sizeflag); + else + putop (float_mem_att[(floatop - 0xd8 ) * 8 + reg], sizeflag); + obufp = op1out; + if (floatop == 0xdb) + OP_E (x_mode, sizeflag); + else if (floatop == 0xdd) + OP_E (d_mode, sizeflag); + else + OP_E (v_mode, sizeflag); + return; + } + codep++; + + dp = &float_reg[floatop - 0xd8][reg]; + if (dp->name == NULL) + { + putop (fgrps[dp->bytemode1][rm], sizeflag); + + /* instruction fnstsw is only one with strange arg */ + if (floatop == 0xdf && codep[-1] == 0xe0) + strcpy (op1out, names16[0]); + } + else + { + putop (dp->name, sizeflag); + + obufp = op1out; + if (dp->op1) + (*dp->op1)(dp->bytemode1, sizeflag); + obufp = op2out; + if (dp->op2) + (*dp->op2)(dp->bytemode2, sizeflag); + } +} + +/* ARGSUSED */ +static void +OP_ST (ignore, sizeflag) + int ignore ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + oappend ("%st"); +} + +/* ARGSUSED */ +static void +OP_STi (ignore, sizeflag) + int ignore ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + sprintf (scratchbuf, "%%st(%d)", rm); + oappend (scratchbuf); +} + + +/* capital letters in template are macros */ +static void +putop (template, sizeflag) + const char *template; + int sizeflag; +{ + const char *p; + + for (p = template; *p; p++) + { + switch (*p) + { + default: + *obufp++ = *p; + break; + case 'A': + if (intel_syntax) + break; + if (mod != 3 +#ifdef SUFFIX_ALWAYS + || (sizeflag & SUFFIX_ALWAYS) +#endif + ) + *obufp++ = 'b'; + break; + case 'B': + if (intel_syntax) + break; +#ifdef SUFFIX_ALWAYS + if (sizeflag & SUFFIX_ALWAYS) + *obufp++ = 'b'; +#endif + break; + case 'E': /* For jcxz/jecxz */ + if (sizeflag & AFLAG) + *obufp++ = 'e'; + break; + case 'L': + if (intel_syntax) + break; +#ifdef SUFFIX_ALWAYS + if (sizeflag & SUFFIX_ALWAYS) + *obufp++ = 'l'; +#endif + break; + case 'N': + if ((prefixes & PREFIX_FWAIT) == 0) + *obufp++ = 'n'; + else + used_prefixes |= PREFIX_FWAIT; + break; + case 'P': + if (intel_syntax) + break; + if ((prefixes & PREFIX_DATA) +#ifdef SUFFIX_ALWAYS + || (sizeflag & SUFFIX_ALWAYS) +#endif + ) + { + if (sizeflag & DFLAG) + *obufp++ = 'l'; + else + *obufp++ = 'w'; + used_prefixes |= (prefixes & PREFIX_DATA); + } + break; + case 'Q': + if (intel_syntax) + break; + if (mod != 3 +#ifdef SUFFIX_ALWAYS + || (sizeflag & SUFFIX_ALWAYS) +#endif + ) + { + if (sizeflag & DFLAG) + *obufp++ = 'l'; + else + *obufp++ = 'w'; + used_prefixes |= (prefixes & PREFIX_DATA); + } + break; + case 'R': + if (intel_syntax) + { + if (sizeflag & DFLAG) + { + *obufp++ = 'd'; + *obufp++ = 'q'; + } + else + { + *obufp++ = 'w'; + *obufp++ = 'd'; + } + } + else + { + if (sizeflag & DFLAG) + *obufp++ = 'l'; + else + *obufp++ = 'w'; + } + used_prefixes |= (prefixes & PREFIX_DATA); + break; + case 'S': + if (intel_syntax) + break; +#ifdef SUFFIX_ALWAYS + if (sizeflag & SUFFIX_ALWAYS) + { + if (sizeflag & DFLAG) + *obufp++ = 'l'; + else + *obufp++ = 'w'; + used_prefixes |= (prefixes & PREFIX_DATA); + } +#endif + break; + case 'W': + /* operand size flag for cwtl, cbtw */ + if (sizeflag & DFLAG) + *obufp++ = 'w'; + else + *obufp++ = 'b'; + if (intel_syntax) + { + if (sizeflag & DFLAG) + { + *obufp++ = 'd'; + *obufp++ = 'e'; + } + else + { + *obufp++ = 'w'; + } + } + used_prefixes |= (prefixes & PREFIX_DATA); + break; + } + } + *obufp = 0; +} + +static void +oappend (s) + const char *s; +{ + strcpy (obufp, s); + obufp += strlen (s); +} + +static void +append_seg () +{ + if (prefixes & PREFIX_CS) + { + oappend ("%cs:"); + used_prefixes |= PREFIX_CS; + } + if (prefixes & PREFIX_DS) + { + oappend ("%ds:"); + used_prefixes |= PREFIX_DS; + } + if (prefixes & PREFIX_SS) + { + oappend ("%ss:"); + used_prefixes |= PREFIX_SS; + } + if (prefixes & PREFIX_ES) + { + oappend ("%es:"); + used_prefixes |= PREFIX_ES; + } + if (prefixes & PREFIX_FS) + { + oappend ("%fs:"); + used_prefixes |= PREFIX_FS; + } + if (prefixes & PREFIX_GS) + { + oappend ("%gs:"); + used_prefixes |= PREFIX_GS; + } +} + +static void +OP_indirE (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + if (!intel_syntax) + oappend ("*"); + OP_E (bytemode, sizeflag); +} + +static void +OP_E (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + int disp; + + /* skip mod/rm byte */ + codep++; + + if (mod == 3) + { + switch (bytemode) + { + case b_mode: + oappend (names8[rm]); + break; + case w_mode: + oappend (names16[rm]); + break; + case d_mode: + oappend (names32[rm]); + break; + case v_mode: + if (sizeflag & DFLAG) + oappend (names32[rm]); + else + oappend (names16[rm]); + used_prefixes |= (prefixes & PREFIX_DATA); + break; + case 0: + if ( !(codep[-2] == 0xAE && codep[-1] == 0xF8 /* sfence */)) + BadOp(); /* bad sfence,lea,lds,les,lfs,lgs,lss modrm */ + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + break; + } + return; + } + + disp = 0; + append_seg (); + + if (sizeflag & AFLAG) /* 32 bit address mode */ + { + int havesib; + int havebase; + int base; + int index = 0; + int scale = 0; + + havesib = 0; + havebase = 1; + base = rm; + + if (base == 4) + { + havesib = 1; + FETCH_DATA (the_info, codep + 1); + scale = (*codep >> 6) & 3; + index = (*codep >> 3) & 7; + base = *codep & 7; + codep++; + } + + switch (mod) + { + case 0: + if (base == 5) + { + havebase = 0; + disp = get32 (); + } + break; + case 1: + FETCH_DATA (the_info, codep + 1); + disp = *codep++; + if ((disp & 0x80) != 0) + disp -= 0x100; + break; + case 2: + disp = get32 (); + break; + } + + if (!intel_syntax) + if (mod != 0 || base == 5) + { + sprintf (scratchbuf, "0x%x", disp); + oappend (scratchbuf); + } + + if (havebase || (havesib && (index != 4 || scale != 0))) + { + if (intel_syntax) + { + switch (bytemode) + { + case b_mode: + oappend("BYTE PTR "); + break; + case w_mode: + oappend("WORD PTR "); + break; + case v_mode: + oappend("DWORD PTR "); + break; + case d_mode: + oappend("QWORD PTR "); + break; + case x_mode: + oappend("XWORD PTR "); + break; + default: + break; + } + } + *obufp++ = open_char; + *obufp = '\0'; + if (havebase) + oappend (names32[base]); + if (havesib) + { + if (index != 4) + { + if (intel_syntax) + { + if (havebase) + { + *obufp++ = separator_char; + *obufp = '\0'; + } + sprintf (scratchbuf, "%s", names32[index]); + } + else + sprintf (scratchbuf, ",%s", names32[index]); + oappend (scratchbuf); + } + if (!intel_syntax + || (intel_syntax + && bytemode != b_mode + && bytemode != w_mode + && bytemode != v_mode)) + { + *obufp++ = scale_char; + *obufp = '\0'; + sprintf (scratchbuf, "%d", 1 << scale); + oappend (scratchbuf); + } + } + if (intel_syntax) + if (mod != 0 || base == 5) + { + /* Don't print zero displacements */ + if (disp > 0) + { + sprintf (scratchbuf, "+%d", disp); + oappend (scratchbuf); + } + else if (disp < 0) + { + sprintf (scratchbuf, "%d", disp); + oappend (scratchbuf); + } + } + + *obufp++ = close_char; + *obufp = '\0'; + } + else if (intel_syntax) + { + if (mod != 0 || base == 5) + { + if (prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS + | PREFIX_ES | PREFIX_FS | PREFIX_GS)) + ; + else + { + oappend (names_seg[3]); + oappend (":"); + } + sprintf (scratchbuf, "0x%x", disp); + oappend (scratchbuf); + } + } + } + else + { /* 16 bit address mode */ + switch (mod) + { + case 0: + if (rm == 6) + { + disp = get16 (); + if ((disp & 0x8000) != 0) + disp -= 0x10000; + } + break; + case 1: + FETCH_DATA (the_info, codep + 1); + disp = *codep++; + if ((disp & 0x80) != 0) + disp -= 0x100; + break; + case 2: + disp = get16 (); + if ((disp & 0x8000) != 0) + disp -= 0x10000; + break; + } + + if (!intel_syntax) + if (mod != 0 || rm == 6) + { + sprintf (scratchbuf, "%d", disp); + oappend (scratchbuf); + } + + if (mod != 0 || rm != 6) + { + *obufp++ = open_char; + *obufp = '\0'; + oappend (index16[rm]); + *obufp++ = close_char; + *obufp = '\0'; + } + } +} + +static void +OP_G (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + switch (bytemode) + { + case b_mode: + oappend (names8[reg]); + break; + case w_mode: + oappend (names16[reg]); + break; + case d_mode: + oappend (names32[reg]); + break; + case v_mode: + if (sizeflag & DFLAG) + oappend (names32[reg]); + else + oappend (names16[reg]); + used_prefixes |= (prefixes & PREFIX_DATA); + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + break; + } +} + +static int +get32 () +{ + int x = 0; + + FETCH_DATA (the_info, codep + 4); + x = *codep++ & 0xff; + x |= (*codep++ & 0xff) << 8; + x |= (*codep++ & 0xff) << 16; + x |= (*codep++ & 0xff) << 24; + return x; +} + +static int +get16 () +{ + int x = 0; + + FETCH_DATA (the_info, codep + 2); + x = *codep++ & 0xff; + x |= (*codep++ & 0xff) << 8; + return x; +} + +static void +set_op (op) + unsigned int op; +{ + op_index[op_ad] = op_ad; + op_address[op_ad] = op; +} + +static void +OP_REG (code, sizeflag) + int code; + int sizeflag; +{ + const char *s; + + switch (code) + { + case indir_dx_reg: + s = "(%dx)"; + break; + case ax_reg: case cx_reg: case dx_reg: case bx_reg: + case sp_reg: case bp_reg: case si_reg: case di_reg: + s = names16[code - ax_reg]; + break; + case es_reg: case ss_reg: case cs_reg: + case ds_reg: case fs_reg: case gs_reg: + s = names_seg[code - es_reg]; + break; + case al_reg: case ah_reg: case cl_reg: case ch_reg: + case dl_reg: case dh_reg: case bl_reg: case bh_reg: + s = names8[code - al_reg]; + break; + case eAX_reg: case eCX_reg: case eDX_reg: case eBX_reg: + case eSP_reg: case eBP_reg: case eSI_reg: case eDI_reg: + if (sizeflag & DFLAG) + s = names32[code - eAX_reg]; + else + s = names16[code - eAX_reg]; + used_prefixes |= (prefixes & PREFIX_DATA); + break; + default: + s = INTERNAL_DISASSEMBLER_ERROR; + break; + } + oappend (s); +} + +static void +OP_I (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + int op; + + switch (bytemode) + { + case b_mode: + FETCH_DATA (the_info, codep + 1); + op = *codep++ & 0xff; + break; + case v_mode: + if (sizeflag & DFLAG) + op = get32 (); + else + op = get16 (); + used_prefixes |= (prefixes & PREFIX_DATA); + break; + case w_mode: + op = get16 (); + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + return; + } + + if (intel_syntax) + sprintf (scratchbuf, "0x%x", op); + else + sprintf (scratchbuf, "$0x%x", op); + oappend (scratchbuf); + scratchbuf[0] = '\0'; +} + +static void +OP_sI (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + int op; + + switch (bytemode) + { + case b_mode: + FETCH_DATA (the_info, codep + 1); + op = *codep++; + if ((op & 0x80) != 0) + op -= 0x100; + break; + case v_mode: + if (sizeflag & DFLAG) + op = get32 (); + else + { + op = get16(); + if ((op & 0x8000) != 0) + op -= 0x10000; + } + used_prefixes |= (prefixes & PREFIX_DATA); + break; + case w_mode: + op = get16 (); + if ((op & 0x8000) != 0) + op -= 0x10000; + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + return; + } + if (intel_syntax) + sprintf (scratchbuf, "%d", op); + else + sprintf (scratchbuf, "$0x%x", op); + oappend (scratchbuf); +} + +static void +OP_J (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + int disp; + int mask = -1; + + switch (bytemode) + { + case b_mode: + FETCH_DATA (the_info, codep + 1); + disp = *codep++; + if ((disp & 0x80) != 0) + disp -= 0x100; + break; + case v_mode: + if (sizeflag & DFLAG) + disp = get32 (); + else + { + disp = get16 (); + /* for some reason, a data16 prefix on a jump instruction + means that the pc is masked to 16 bits after the + displacement is added! */ + mask = 0xffff; + } + used_prefixes |= (prefixes & PREFIX_DATA); + break; + default: + oappend (INTERNAL_DISASSEMBLER_ERROR); + return; + } + disp = (start_pc + codep - start_codep + disp) & mask; + set_op (disp); + sprintf (scratchbuf, "0x%x", disp); + oappend (scratchbuf); +} + +/* ARGSUSED */ +static void +OP_SEG (dummy, sizeflag) + int dummy ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + static char *sreg[] = { + "%es","%cs","%ss","%ds","%fs","%gs","%?","%?", + }; + + oappend (sreg[reg]); +} + +/* ARGSUSED */ +static void +OP_DIR (dummy, sizeflag) + int dummy ATTRIBUTE_UNUSED; + int sizeflag; +{ + int seg, offset; + + if (sizeflag & DFLAG) + { + offset = get32 (); + seg = get16 (); + } + else + { + offset = get16 (); + seg = get16 (); + } + used_prefixes |= (prefixes & PREFIX_DATA); + sprintf (scratchbuf, "$0x%x,$0x%x", seg, offset); + oappend (scratchbuf); +} + +/* ARGSUSED */ +static void +OP_OFF (ignore, sizeflag) + int ignore ATTRIBUTE_UNUSED; + int sizeflag; +{ + int off; + + append_seg (); + + if (sizeflag & AFLAG) + off = get32 (); + else + off = get16 (); + + if (intel_syntax) + { + if (!(prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS + | PREFIX_ES | PREFIX_FS | PREFIX_GS))) + { + oappend (names_seg[3]); + oappend (":"); + } + } + sprintf (scratchbuf, "0x%x", off); + oappend (scratchbuf); +} + +static void +ptr_reg (code, sizeflag) + int code; + int sizeflag; +{ + const char *s; + oappend ("("); + if (sizeflag & AFLAG) + s = names32[code - eAX_reg]; + else + s = names16[code - eAX_reg]; + oappend (s); + oappend (")"); +} + +static void +OP_ESreg (code, sizeflag) + int code; + int sizeflag; +{ + oappend ("%es:"); + ptr_reg (code, sizeflag); +} + +static void +OP_DSreg (code, sizeflag) + int code; + int sizeflag; +{ + if ((prefixes + & (PREFIX_CS + | PREFIX_DS + | PREFIX_SS + | PREFIX_ES + | PREFIX_FS + | PREFIX_GS)) == 0) + prefixes |= PREFIX_DS; + append_seg(); + ptr_reg (code, sizeflag); +} + +/* ARGSUSED */ +static void +OP_C (dummy, sizeflag) + int dummy ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + sprintf (scratchbuf, "%%cr%d", reg); + oappend (scratchbuf); +} + +/* ARGSUSED */ +static void +OP_D (dummy, sizeflag) + int dummy ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + sprintf (scratchbuf, "%%db%d", reg); + oappend (scratchbuf); +} + +/* ARGSUSED */ +static void +OP_T (dummy, sizeflag) + int dummy ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + sprintf (scratchbuf, "%%tr%d", reg); + oappend (scratchbuf); +} + +static void +OP_Rd (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + if (mod == 3) + OP_E (bytemode, sizeflag); + else + BadOp(); +} + +static void +OP_MMX (ignore, sizeflag) + int ignore ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + sprintf (scratchbuf, "%%mm%d", reg); + oappend (scratchbuf); +} + +static void +OP_XMM (bytemode, sizeflag) + int bytemode ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + sprintf (scratchbuf, "%%xmm%d", reg); + oappend (scratchbuf); +} + +static void +OP_EM (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + if (mod != 3) + { + OP_E (bytemode, sizeflag); + return; + } + + codep++; + sprintf (scratchbuf, "%%mm%d", rm); + oappend (scratchbuf); +} + +static void +OP_EX (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + if (mod != 3) + { + OP_E (bytemode, sizeflag); + return; + } + + codep++; + sprintf (scratchbuf, "%%xmm%d", rm); + oappend (scratchbuf); +} + +static void +OP_MS (bytemode, sizeflag) + int bytemode; + int sizeflag; +{ + if (mod == 3) + OP_EM (bytemode, sizeflag); + else + BadOp(); +} + +static const char *Suffix3DNow[] = { +/* 00 */ NULL, NULL, NULL, NULL, +/* 04 */ NULL, NULL, NULL, NULL, +/* 08 */ NULL, NULL, NULL, NULL, +/* 0C */ "pi2fw", "pi2fd", NULL, NULL, +/* 10 */ NULL, NULL, NULL, NULL, +/* 14 */ NULL, NULL, NULL, NULL, +/* 18 */ NULL, NULL, NULL, NULL, +/* 1C */ "pf2iw", "pf2id", NULL, NULL, +/* 20 */ NULL, NULL, NULL, NULL, +/* 24 */ NULL, NULL, NULL, NULL, +/* 28 */ NULL, NULL, NULL, NULL, +/* 2C */ NULL, NULL, NULL, NULL, +/* 30 */ NULL, NULL, NULL, NULL, +/* 34 */ NULL, NULL, NULL, NULL, +/* 38 */ NULL, NULL, NULL, NULL, +/* 3C */ NULL, NULL, NULL, NULL, +/* 40 */ NULL, NULL, NULL, NULL, +/* 44 */ NULL, NULL, NULL, NULL, +/* 48 */ NULL, NULL, NULL, NULL, +/* 4C */ NULL, NULL, NULL, NULL, +/* 50 */ NULL, NULL, NULL, NULL, +/* 54 */ NULL, NULL, NULL, NULL, +/* 58 */ NULL, NULL, NULL, NULL, +/* 5C */ NULL, NULL, NULL, NULL, +/* 60 */ NULL, NULL, NULL, NULL, +/* 64 */ NULL, NULL, NULL, NULL, +/* 68 */ NULL, NULL, NULL, NULL, +/* 6C */ NULL, NULL, NULL, NULL, +/* 70 */ NULL, NULL, NULL, NULL, +/* 74 */ NULL, NULL, NULL, NULL, +/* 78 */ NULL, NULL, NULL, NULL, +/* 7C */ NULL, NULL, NULL, NULL, +/* 80 */ NULL, NULL, NULL, NULL, +/* 84 */ NULL, NULL, NULL, NULL, +/* 88 */ NULL, NULL, "pfnacc", NULL, +/* 8C */ NULL, NULL, "pfpnacc", NULL, +/* 90 */ "pfcmpge", NULL, NULL, NULL, +/* 94 */ "pfmin", NULL, "pfrcp", "pfrsqrt", +/* 98 */ NULL, NULL, "pfsub", NULL, +/* 9C */ NULL, NULL, "pfadd", NULL, +/* A0 */ "pfcmpgt", NULL, NULL, NULL, +/* A4 */ "pfmax", NULL, "pfrcpit1", "pfrsqit1", +/* A8 */ NULL, NULL, "pfsubr", NULL, +/* AC */ NULL, NULL, "pfacc", NULL, +/* B0 */ "pfcmpeq", NULL, NULL, NULL, +/* B4 */ "pfmul", NULL, "pfrcpit2", "pfmulhrw", +/* B8 */ NULL, NULL, NULL, "pswapd", +/* BC */ NULL, NULL, NULL, "pavgusb", +/* C0 */ NULL, NULL, NULL, NULL, +/* C4 */ NULL, NULL, NULL, NULL, +/* C8 */ NULL, NULL, NULL, NULL, +/* CC */ NULL, NULL, NULL, NULL, +/* D0 */ NULL, NULL, NULL, NULL, +/* D4 */ NULL, NULL, NULL, NULL, +/* D8 */ NULL, NULL, NULL, NULL, +/* DC */ NULL, NULL, NULL, NULL, +/* E0 */ NULL, NULL, NULL, NULL, +/* E4 */ NULL, NULL, NULL, NULL, +/* E8 */ NULL, NULL, NULL, NULL, +/* EC */ NULL, NULL, NULL, NULL, +/* F0 */ NULL, NULL, NULL, NULL, +/* F4 */ NULL, NULL, NULL, NULL, +/* F8 */ NULL, NULL, NULL, NULL, +/* FC */ NULL, NULL, NULL, NULL, +}; + +static void +OP_3DNowSuffix (bytemode, sizeflag) + int bytemode ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + const char *mnemonic; + + FETCH_DATA (the_info, codep + 1); + /* AMD 3DNow! instructions are specified by an opcode suffix in the + place where an 8-bit immediate would normally go. ie. the last + byte of the instruction. */ + obufp = obuf + strlen(obuf); + mnemonic = Suffix3DNow[*codep++ & 0xff]; + if (mnemonic) + oappend (mnemonic); + else + { + /* Since a variable sized modrm/sib chunk is between the start + of the opcode (0x0f0f) and the opcode suffix, we need to do + all the modrm processing first, and don't know until now that + we have a bad opcode. This necessitates some cleaning up. */ + op1out[0] = '\0'; + op2out[0] = '\0'; + BadOp(); + } +} + + +static const char *simd_cmp_op [] = { + "eq", + "lt", + "le", + "unord", + "neq", + "nlt", + "nle", + "ord" +}; + +static void +OP_SIMD_Suffix (bytemode, sizeflag) + int bytemode ATTRIBUTE_UNUSED; + int sizeflag ATTRIBUTE_UNUSED; +{ + unsigned int cmp_type; + + FETCH_DATA (the_info, codep + 1); + obufp = obuf + strlen(obuf); + cmp_type = *codep++ & 0xff; + if (cmp_type < 8) + { + sprintf (scratchbuf, "cmp%s%cs", + simd_cmp_op[cmp_type], + prefixes & PREFIX_REPZ ? 's' : 'p'); + used_prefixes |= (prefixes & PREFIX_REPZ); + oappend (scratchbuf); + } + else + { + /* We have a bad extension byte. Clean up. */ + op1out[0] = '\0'; + op2out[0] = '\0'; + BadOp(); + } +} + +static void +SIMD_Fixup (extrachar, sizeflag) + int extrachar; + int sizeflag ATTRIBUTE_UNUSED; +{ + /* Change movlps/movhps to movhlps/movlhps for 2 register operand + forms of these instructions. */ + if (mod == 3) + { + char *p = obuf + strlen(obuf); + *(p+1) = '\0'; + *p = *(p-1); + *(p-1) = *(p-2); + *(p-2) = *(p-3); + *(p-3) = extrachar; + } +} + +static void BadOp (void) +{ + codep = insn_codep + 1; /* throw away prefixes and 1st. opcode byte */ + oappend ("(bad)"); +} diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_bp.c linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_bp.c --- linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_bp.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_bp.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,777 @@ +/* + * Kernel Debugger Architecture Dependent Breakpoint Handling + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ + +#include +#include +#include +#include +#include +#include +#include + + +static char *kdba_rwtypes[] = { "Instruction(Register)", "Data Write", + "I/O", "Data Access"}; + +/* + * Table describing processor architecture hardware + * breakpoint registers. + */ + +kdbhard_bp_t kdb_hardbreaks[KDB_MAXHARDBPT]; + +/* + * kdba_db_trap + * + * Perform breakpoint processing upon entry to the + * processor debugger fault. Determine and print + * the active breakpoint. + * + * Parameters: + * ef Exception frame containing machine register state + * error Error number passed to kdb. + * Outputs: + * None. + * Returns: + * KDB_DB_BPT Standard instruction or data breakpoint encountered + * KDB_DB_SS Single Step fault ('ss' command or end of 'ssb' command) + * KDB_DB_SSB Single Step fault, caller should continue ('ssb' command) + * KDB_DB_SSBPT Single step over breakpoint + * KDB_DB_NOBPT No existing kdb breakpoint matches this debug exception + * Locking: + * None. + * Remarks: + * Yup, there be goto's here. + * + * If multiple processors receive debug exceptions simultaneously, + * one may be waiting at the kdb fence in kdb() while the user + * issues a 'bc' command to clear the breakpoint the processor + * which is waiting has already encountered. If this is the case, + * the debug registers will no longer match any entry in the + * breakpoint table, and we'll return the value KDB_DB_NOBPT. + * This can cause a panic in die_if_kernel(). It is safer to + * disable the breakpoint (bd), go until all processors are past + * the breakpoint then clear the breakpoint (bc). This code + * recognises a breakpoint even when disabled but not when it has + * been cleared. + * + * WARNING: This routine clears the debug state. It should be called + * once per debug and the result cached. + */ + +kdb_dbtrap_t +kdba_db_trap(kdb_eframe_t ef, int error_unused) +{ + kdb_machreg_t dr6; + kdb_machreg_t dr7; + int rw, reg; + int i; + kdb_dbtrap_t rv = KDB_DB_BPT; + kdb_bp_t *bp; + + dr6 = kdba_getdr6(); + dr7 = kdba_getdr7(); + + if (KDB_DEBUG(BP)) + kdb_printf("kdb: dr6 0x%lx dr7 0x%lx\n", dr6, dr7); + if (dr6 & DR6_BS) { + if (KDB_STATE(SSBPT)) { + if (KDB_DEBUG(BP)) + kdb_printf("ssbpt\n"); + KDB_STATE_CLEAR(SSBPT); + for(i=0,bp=kdb_breakpoints; + i < KDB_MAXBPT; + i++, bp++) { + if (KDB_DEBUG(BP)) + kdb_printf("bp 0x%p enabled %d delayed %d global %d cpu %d\n", + bp, bp->bp_enabled, bp->bp_delayed, bp->bp_global, bp->bp_cpu); + if (!bp->bp_enabled) + continue; + if (!bp->bp_global && bp->bp_cpu != smp_processor_id()) + continue; + if (KDB_DEBUG(BP)) + kdb_printf("bp for this cpu\n"); + if (bp->bp_delayed) { + bp->bp_delayed = 0; + if (KDB_DEBUG(BP)) + kdb_printf("kdba_installbp\n"); + kdba_installbp(ef, bp); + if (!KDB_STATE(DOING_SS)) { + ef->eflags &= ~EF_TF; + return(KDB_DB_SSBPT); + } + break; + } + } + if (i == KDB_MAXBPT) { + kdb_printf("kdb: Unable to find delayed breakpoint\n"); + } + if (!KDB_STATE(DOING_SS)) { + ef->eflags &= ~EF_TF; + return(KDB_DB_NOBPT); + } + /* FALLTHROUGH */ + } + + /* + * KDB_STATE_DOING_SS is set when the kernel debugger is using + * the processor trap flag to single-step a processor. If a + * single step trap occurs and this flag is clear, the SS trap + * will be ignored by KDB and the kernel will be allowed to deal + * with it as necessary (e.g. for ptrace). + */ + if (!KDB_STATE(DOING_SS)) + goto unknown; + + /* single step */ + rv = KDB_DB_SS; /* Indicate single step */ + if (KDB_STATE(DOING_SSB)) { + unsigned char op1, op2 = 0; + + kdb_id1(ef->eip); + op1 = (unsigned char)kdba_getword(ef->eip, sizeof(op1)); + if (op1 == 0x0f) { + op2 = (unsigned char)kdba_getword(ef->eip+1, sizeof(op2)); + } + if (((op1&0xf0) == 0xe0) /* short disp jumps */ + || ((op1&0xf0) == 0x70) /* Misc. jumps */ + || (op1 == 0xc2) /* ret */ + || (op1 == 0x9a) /* call */ + || ((op1&0xf8) == 0xc8) /* enter, leave, iret, int, */ + || ((op1 == 0x0f) + && ((op2&0xf0)== 0x80))) { + /* + * End the ssb command here. + */ + KDB_STATE_CLEAR(DOING_SSB); + KDB_STATE_CLEAR(DOING_SS); + } else { + rv = KDB_DB_SSB; /* Indicate ssb - dismiss immediately */ + } + } else { + /* + * Print current insn + */ + kdb_printf("SS trap at "); + kdb_symbol_print(ef->eip, NULL, KDB_SP_DEFAULT|KDB_SP_NEWLINE); + kdb_id1(ef->eip); + KDB_STATE_CLEAR(DOING_SS); + } + + if (rv != KDB_DB_SSB) + ef->eflags &= ~EF_TF; + } + + if (dr6 & DR6_B0) { + rw = DR7_RW0(dr7); + reg = 0; + goto handle; + } + + if (dr6 & DR6_B1) { + rw = DR7_RW1(dr7); + reg = 1; + goto handle; + } + + if (dr6 & DR6_B2) { + rw = DR7_RW2(dr7); + reg = 2; + goto handle; + } + + if (dr6 & DR6_B3) { + rw = DR7_RW3(dr7); + reg = 3; + goto handle; + } + + if (rv > 0) + goto handled; + + goto unknown; /* dismiss */ + +handle: + /* + * Set Resume Flag + */ + ef->eflags |= EF_RF; + + /* + * Determine which breakpoint was encountered. + */ + for(i=0, bp=kdb_breakpoints; ibp_free) + && (bp->bp_global || bp->bp_cpu == smp_processor_id()) + && (bp->bp_hard) + && (bp->bp_hard->bph_reg == reg)) { + /* + * Hit this breakpoint. + */ + kdb_printf("%s breakpoint #%d at " kdb_bfd_vma_fmt "\n", + kdba_rwtypes[rw], + i, bp->bp_addr); + + /* + * For an instruction breakpoint, disassemble + * the current instruction. + */ + if (rw == 0) { + kdb_id1(ef->eip); + } + + goto handled; + } + } + +unknown: + ef->eflags |= EF_RF; /* Supress further faults */ + rv = KDB_DB_NOBPT; /* Cause kdb() to return */ + +handled: + + /* + * Clear the pending exceptions. + */ + kdba_putdr6(0); + + return rv; +} + +/* + * kdba_bp_trap + * + * Perform breakpoint processing upon entry to the + * processor breakpoint instruction fault. Determine and print + * the active breakpoint. + * + * Parameters: + * ef Exception frame containing machine register state + * error Error number passed to kdb. + * Outputs: + * None. + * Returns: + * 0 Standard instruction or data breakpoint encountered + * 1 Single Step fault ('ss' command) + * 2 Single Step fault, caller should continue ('ssb' command) + * 3 No existing kdb breakpoint matches this debug exception + * Locking: + * None. + * Remarks: + * + * If multiple processors receive debug exceptions simultaneously, + * one may be waiting at the kdb fence in kdb() while the user + * issues a 'bc' command to clear the breakpoint the processor which + * is waiting has already encountered. If this is the case, the + * debug registers will no longer match any entry in the breakpoint + * table, and we'll return the value '3'. This can cause a panic + * in die_if_kernel(). It is safer to disable the breakpoint (bd), + * 'go' until all processors are past the breakpoint then clear the + * breakpoint (bc). This code recognises a breakpoint even when + * disabled but not when it has been cleared. + * + * WARNING: This routine resets the eip. It should be called + * once per breakpoint and the result cached. + */ + +kdb_dbtrap_t +kdba_bp_trap(kdb_eframe_t ef, int error_unused) +{ + int i; + kdb_dbtrap_t rv; + kdb_bp_t *bp; + + /* + * Determine which breakpoint was encountered. + */ + if (KDB_DEBUG(BP)) + kdb_printf("kdba_bp_trap: eip=0x%lx (not adjusted) " + "eflags=0x%lx ef=0x%p esp=0x%lx\n", + ef->eip, ef->eflags, ef, ef->esp); + + rv = KDB_DB_NOBPT; /* Cause kdb() to return */ + + for(i=0, bp=kdb_breakpoints; ibp_free) + continue; + if (!bp->bp_global && bp->bp_cpu != smp_processor_id()) + continue; + if ((void *)bp->bp_addr == (void *)(ef->eip - bp->bp_adjust)) { + /* Hit this breakpoint. */ + ef->eip -= bp->bp_adjust; + kdb_printf("Instruction(i) breakpoint #%d at 0x%lx (adjusted)\n", + i, ef->eip); + kdb_id1(ef->eip); + rv = KDB_DB_BPT; + bp->bp_delay = 1; + break; + } + } + + return rv; +} + +/* + * kdba_handle_bp + * + * Handle an instruction-breakpoint trap. Called when re-installing + * an enabled breakpoint which has has the bp_delay bit set. + * + * Parameters: + * Returns: + * Locking: + * Remarks: + * + * Ok, we really need to: + * 1) Restore the original instruction byte + * 2) Single Step + * 3) Restore breakpoint instruction + * 4) Continue. + * + * + */ + +static void +kdba_handle_bp(kdb_eframe_t ef, kdb_bp_t *bp) +{ + if (!ef) { + kdb_printf("kdba_handle_bp: ef == NULL\n"); + return; + } + + if (KDB_DEBUG(BP)) + kdb_printf("ef->eip = 0x%lx\n", ef->eip); + + /* + * Setup single step + */ + kdba_setsinglestep(ef); + + /* KDB_STATE_SSBPT is set when the kernel debugger must single step + * a task in order to re-establish an instruction breakpoint which + * uses the instruction replacement mechanism. + */ + KDB_STATE_SET(SSBPT); + + /* + * Reset delay attribute + */ + bp->bp_delay = 0; + bp->bp_delayed = 1; +} + + +/* + * kdba_bptype + * + * Return a string describing type of breakpoint. + * + * Parameters: + * bph Pointer to hardware breakpoint description + * Outputs: + * None. + * Returns: + * Character string. + * Locking: + * None. + * Remarks: + */ + +char * +kdba_bptype(kdbhard_bp_t *bph) +{ + char *mode; + + mode = kdba_rwtypes[bph->bph_mode]; + + return mode; +} + +/* + * kdba_printbpreg + * + * Print register name assigned to breakpoint + * + * Parameters: + * bph Pointer hardware breakpoint structure + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + */ + +void +kdba_printbpreg(kdbhard_bp_t *bph) +{ + kdb_printf(" in dr%ld", bph->bph_reg); +} + +/* + * kdba_printbp + * + * Print string describing hardware breakpoint. + * + * Parameters: + * bph Pointer to hardware breakpoint description + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + */ + +void +kdba_printbp(kdb_bp_t *bp) +{ + kdb_printf("\n is enabled"); + if (bp->bp_hardtype) { + kdba_printbpreg(bp->bp_hard); + if (bp->bp_hard->bph_mode != 0) { + kdb_printf(" for %d bytes", + bp->bp_hard->bph_length+1); + } + } +} + +/* + * kdba_parsebp + * + * Parse architecture dependent portion of the + * breakpoint command. + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic for failure + * Locking: + * None. + * Remarks: + * for Ia32 architure, data access, data write and + * I/O breakpoints are supported in addition to instruction + * breakpoints. + * + * {datar|dataw|io|inst} [length] + */ + +int +kdba_parsebp(int argc, const char **argv, int *nextargp, kdb_bp_t *bp) +{ + int nextarg = *nextargp; + int diag; + kdbhard_bp_t *bph = &bp->bp_template; + + bph->bph_mode = 0; /* Default to instruction breakpoint */ + bph->bph_length = 0; /* Length must be zero for insn bp */ + if ((argc + 1) != nextarg) { + if (strnicmp(argv[nextarg], "datar", sizeof("datar")) == 0) { + bph->bph_mode = 3; + } else if (strnicmp(argv[nextarg], "dataw", sizeof("dataw")) == 0) { + bph->bph_mode = 1; + } else if (strnicmp(argv[nextarg], "io", sizeof("io")) == 0) { + bph->bph_mode = 2; + } else if (strnicmp(argv[nextarg], "inst", sizeof("inst")) == 0) { + bph->bph_mode = 0; + } else { + return KDB_ARGCOUNT; + } + + bph->bph_length = 3; /* Default to 4 byte */ + + nextarg++; + + if ((argc + 1) != nextarg) { + unsigned long len; + + diag = kdbgetularg((char *)argv[nextarg], + &len); + if (diag) + return diag; + + + if ((len > 4) || (len == 3)) + return KDB_BADLENGTH; + + bph->bph_length = len; + bph->bph_length--; /* Normalize for debug register */ + nextarg++; + } + + if ((argc + 1) != nextarg) + return KDB_ARGCOUNT; + + /* + * Indicate to architecture independent level that + * a hardware register assignment is required to enable + * this breakpoint. + */ + + bph->bph_free = 0; + } else { + if (KDB_DEBUG(BP)) + kdb_printf("kdba_bp: no args, forcehw is %d\n", bp->bp_forcehw); + if (bp->bp_forcehw) { + /* + * We are forced to use a hardware register for this + * breakpoint because either the bph or bpha + * commands were used to establish this breakpoint. + */ + bph->bph_free = 0; + } else { + /* + * Indicate to architecture dependent level that + * the instruction replacement breakpoint technique + * should be used for this breakpoint. + */ + bph->bph_free = 1; + bp->bp_adjust = 1; /* software, int 3 is one byte */ + } + } + + *nextargp = nextarg; + return 0; +} + +/* + * kdba_allocbp + * + * Associate a hardware register with a breakpoint. + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * A pointer to the allocated register kdbhard_bp_t structure for + * success, Null and a non-zero diagnostic for failure. + * Locking: + * None. + * Remarks: + */ + +kdbhard_bp_t * +kdba_allocbp(kdbhard_bp_t *bph, int *diagp) +{ + int i; + kdbhard_bp_t *newbph; + + for(i=0,newbph=kdb_hardbreaks; i < KDB_MAXHARDBPT; i++, newbph++) { + if (newbph->bph_free) { + break; + } + } + + if (i == KDB_MAXHARDBPT) { + *diagp = KDB_TOOMANYDBREGS; + return NULL; + } + + *diagp = 0; + + /* + * Copy data from template. Can't just copy the entire template + * here because the register number in kdb_hardbreaks must be + * preserved. + */ + newbph->bph_data = bph->bph_data; + newbph->bph_write = bph->bph_write; + newbph->bph_mode = bph->bph_mode; + newbph->bph_length = bph->bph_length; + + /* + * Mark entry allocated. + */ + newbph->bph_free = 0; + + return newbph; +} + +/* + * kdba_freebp + * + * Deallocate a hardware breakpoint + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic for failure + * Locking: + * None. + * Remarks: + */ + +void +kdba_freebp(kdbhard_bp_t *bph) +{ + bph->bph_free = 1; +} + +/* + * kdba_initbp + * + * Initialize the breakpoint table for the hardware breakpoint + * register. + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic for failure + * Locking: + * None. + * Remarks: + * + * There is one entry per register. On the ia32 architecture + * all the registers are interchangeable, so no special allocation + * criteria are required. + */ + +void +kdba_initbp(void) +{ + int i; + kdbhard_bp_t *bph; + + /* + * Clear the hardware breakpoint table + */ + + memset(kdb_hardbreaks, '\0', sizeof(kdb_hardbreaks)); + + for(i=0,bph=kdb_hardbreaks; ibph_reg = i; + bph->bph_free = 1; + } +} + +/* + * kdba_installbp + * + * Install a breakpoint + * + * Parameters: + * ef Exception frame + * bp Breakpoint structure for the breakpoint to be installed + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * For hardware breakpoints, a debug register is allocated + * and assigned to the breakpoint. If no debug register is + * available, a warning message is printed and the breakpoint + * is disabled. + * + * For instruction replacement breakpoints, we must single-step + * over the replaced instruction at this point so we can re-install + * the breakpoint instruction after the single-step. + */ + +void +kdba_installbp(kdb_eframe_t ef, kdb_bp_t *bp) +{ + /* + * Install the breakpoint, if it is not already installed. + */ + + if (KDB_DEBUG(BP)) { + kdb_printf("kdba_installbp bp_installed %d\n", bp->bp_installed); + } + if (!bp->bp_installed) { + if (bp->bp_hardtype) { + kdba_installdbreg(bp); + bp->bp_installed = 1; + if (KDB_DEBUG(BP)) { + kdb_printf("kdba_installbp hardware reg %ld at " kdb_bfd_vma_fmt "\n", + bp->bp_hard->bph_reg, bp->bp_addr); + } + } else if (bp->bp_delay) { + if (KDB_DEBUG(BP)) + kdb_printf("kdba_installbp delayed bp\n"); + kdba_handle_bp(ef, bp); + } else { + bp->bp_inst = kdba_getword(bp->bp_addr, 1); + kdba_putword(bp->bp_addr, 1, IA32_BREAKPOINT_INSTRUCTION); + bp->bp_instvalid = 1; + if (KDB_DEBUG(BP)) + kdb_printf("kdba_installbp instruction 0x%x at " kdb_bfd_vma_fmt "\n", + IA32_BREAKPOINT_INSTRUCTION, bp->bp_addr); + bp->bp_installed = 1; + } + } +} + +/* + * kdba_removebp + * + * Make a breakpoint ineffective. + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + */ + +void +kdba_removebp(kdb_bp_t *bp) +{ + /* + * For hardware breakpoints, remove it from the active register, + * for software breakpoints, restore the instruction stream. + */ + if (KDB_DEBUG(BP)) { + kdb_printf("kdba_removebp bp_installed %d\n", bp->bp_installed); + } + if (bp->bp_installed) { + if (bp->bp_hardtype) { + if (KDB_DEBUG(BP)) { + kdb_printf("kdb: removing hardware reg %ld at " kdb_bfd_vma_fmt "\n", + bp->bp_hard->bph_reg, bp->bp_addr); + } + kdba_removedbreg(bp); + } else if (bp->bp_instvalid) { + if (KDB_DEBUG(BP)) + kdb_printf("kdb: restoring instruction 0x%x at " kdb_bfd_vma_fmt "\n", + bp->bp_inst, bp->bp_addr); + kdba_putword(bp->bp_addr, 1, bp->bp_inst); + bp->bp_instvalid = 0; + } + bp->bp_installed = 0; + } +} diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_bt.c linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_bt.c --- linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_bt.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_bt.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,345 @@ +/* + * Minimalist Kernel Debugger - Architecture Dependent Stack Traceback + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Srinivasa Thirumalachar + * RSE support for ia64 + * Masahiro Adegawa 1999/12/01 + * 'sr' command, active flag in 'ps' + * Scott Lurndal 1999/12/12 + * Significantly restructure for linux2.3 + * Keith Owens 2000/05/23 + * KDB v1.2 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * bt_print_one + * + * Print one back trace entry. + * + * Inputs: + * ebp Previous frame pointer, 0 if not valid. + * eip Current program counter. + * symtab Information about symbol that eip falls within. + * ar Activation record for this frame. + * argcount Maximum number of arguments to print. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * None. + */ + +static void +bt_print_one(kdb_machreg_t eip, kdb_machreg_t ebp, const kdb_ar_t *ar, + const kdb_symtab_t *symtab, int argcount) +{ + int btsymarg = 0; + int nosect = 0; + + kdbgetintenv("BTSYMARG", &btsymarg); + kdbgetintenv("NOSECT", &nosect); + + if (ebp) + kdb_printf("0x%08lx", ebp); + else + kdb_printf(" "); + kdb_symbol_print(eip, symtab, KDB_SP_SPACEB|KDB_SP_VALUE); + if (argcount && ar->args) { + int i, argc = ar->args / 4; + + kdb_printf(" ("); + if (argc > argcount) + argc = argcount; + + for(i=1; i<=argc; i++){ + kdb_machreg_t argp = ar->arg0 - ar->args + 4*i; + + if (i != 1) + kdb_printf(", "); + kdb_printf("0x%lx", + kdba_getword(argp, sizeof(kdb_machreg_t))); + } + kdb_printf(")"); + } + if (symtab->sym_name) { + if (!nosect) { + kdb_printf("\n"); + kdb_printf(" %s %s 0x%lx 0x%lx 0x%lx", + symtab->mod_name, + symtab->sec_name, + symtab->sec_start, + symtab->sym_start, + symtab->sym_end); + } + } + kdb_printf("\n"); + if (argcount && ar->args && btsymarg) { + int i, argc = ar->args / 4; + kdb_symtab_t arg_symtab; + kdb_machreg_t arg; + for(i=1; i<=argc; i++){ + kdb_machreg_t argp = ar->arg0 - ar->args + 4*i; + arg = kdba_getword(argp, sizeof(kdb_machreg_t)); + if (kdbnearsym(arg, &arg_symtab)) { + kdb_printf(" "); + kdb_symbol_print(arg, &arg_symtab, KDB_SP_DEFAULT|KDB_SP_NEWLINE); + } + } + } +} + +/* + * kdba_bt_stack_i386 + * + * kdba_bt_stack with i386 specific parameters. + * Specification as kdba_bt_stack plus :- + * + * Inputs: + * As kba_bt_stack plus + * regs_esp If 1 get esp from the registers (exception frame), if 0 + * get esp from kdba_getregcontents. + */ + +static int +kdba_bt_stack_i386(struct pt_regs *regs, kdb_machreg_t *addr, int argcount, + struct task_struct *p, int regs_esp) +{ + kdb_ar_t ar; + kdb_machreg_t eip, esp, ebp, ss, cs; + kdb_symtab_t symtab; + + /* + * The caller may have supplied an address at which the + * stack traceback operation should begin. This address + * is assumed by this code to point to a return-address + * on the stack to be traced back. + * + * The end result of this will make it appear as if a function + * entitled '' was called from the function which + * contains return-address. + */ + if (addr) { + eip = 0; + ebp = 0; + esp = *addr; + cs = __KERNEL_CS; /* have to assume kernel space */ + } else { + eip = regs->eip; + ebp = regs->ebp; + if (regs_esp) + esp = regs->esp; + else + kdba_getregcontents("esp", regs, &esp); + kdba_getregcontents("xcs", regs, &cs); + } + ss = esp & -8192; + + if ((cs & 0xffff) != __KERNEL_CS) { + kdb_printf("Stack is not in kernel space, backtrace not available\n"); + return 0; + } + + kdb_printf(" EBP EIP Function(args)\n"); + + /* + * Run through the activation records and print them. + */ + + while (1) { + kdbnearsym(eip, &symtab); + if (!kdb_get_next_ar(esp, symtab.sym_start, eip, ebp, ss, + &ar, &symtab)) { + break; + } + + if (strcmp(".text.lock", symtab.sec_name) == 0) { + /* + * Instructions in the .text.lock area are generated by + * the out of line code in lock handling, see + * include/asm-i386 semaphore.h and rwlock.h. There can + * be multiple instructions which eventually end with a + * jump back to the mainline code. Use the disassmebler + * to silently step through the code until we find the + * jump, resolve its destination and translate it to a + * symbol. Replace '.text.lock' with the symbol. + */ + unsigned char inst; + kdb_machreg_t offset = 0, realeip = eip; + int length, offsize = 0; + kdb_symtab_t lock_symtab; + /* Dummy out the disassembler print function */ + fprintf_ftype save_fprintf_func = kdb_di.fprintf_func; + + kdb_di.fprintf_func = &kdb_dis_fprintf_dummy; + while((length = kdba_id_printinsn(realeip, &kdb_di)) > 0) { + inst = kdba_getword(realeip, 1); + offsize = 0; + switch (inst) { + case 0xeb: /* jmp with 1 byte offset */ + offsize = 1; + offset = kdba_getword(realeip+1, offsize); + break; + case 0xe9: /* jmp with 4 byte offset */ + offsize = 4; + offset = kdba_getword(realeip+1, offsize); + break; + default: + realeip += length; /* next instruction */ + break; + } + if (offsize) + break; + } + kdb_di.fprintf_func = save_fprintf_func; + + if (offsize) { + realeip += 1 + offsize + offset; + if (kdbnearsym(realeip, &lock_symtab)) { + /* Print the stext entry without args */ + bt_print_one(eip, 0, &ar, &symtab, 0); + /* Point to mainline code */ + eip = realeip; + continue; + } + } + } + + if (strcmp("ret_from_intr", symtab.sym_name) == 0 || + strcmp("error_code", symtab.sym_name) == 0) { + if (strcmp("ret_from_intr", symtab.sym_name) == 0) { + /* + * Non-standard frame. ret_from_intr is + * preceded by 9 registers (ebx, ecx, edx, esi, + * edi, ebp, eax, ds, cs), original eax and the + * return address for a total of 11 words. + */ + ar.start = ar.end + 11*4; + } + if (strcmp("error_code", symtab.sym_name) == 0) { + /* + * Non-standard frame. error_code is preceded + * by two parameters (-> registers, error code), + * 9 registers (ebx, ecx, edx, esi, edi, ebp, + * eax, ds, cs), original eax and the return + * address for a total of 13 words. + */ + ar.start = ar.end + 13*4; + } + /* Print the non-standard entry without args */ + bt_print_one(eip, 0, &ar, &symtab, 0); + kdb_printf("Interrupt registers:\n"); + kdba_dumpregs((struct pt_regs *)(ar.end), NULL, NULL); + /* Step the frame to the interrupted code */ + eip = kdba_getword(ar.start-4, 4); + ebp = 0; + esp = ar.start; + if ((((struct pt_regs *)(ar.end))->xcs & 0xffff) != __KERNEL_CS) { + kdb_printf("Interrupt from user space, end of kernel trace\n"); + break; + } + continue; + } + + bt_print_one(eip, ebp, &ar, &symtab, argcount); + + if (ar.ret == 0) + break; /* End of frames */ + eip = ar.ret; + ebp = ar.oldfp; + esp = ar.start; + } + + return 0; +} + +/* + * kdba_bt_stack + * + * This function implements the 'bt' command. Print a stack + * traceback. + * + * bt [] (addr-exp is for alternate stacks) + * btp (Kernel stack for ) + * + * address expression refers to a return address on the stack. It + * may be preceeded by a frame pointer. + * + * Inputs: + * regs registers at time kdb was entered. + * addr Pointer to Address provided to 'bt' command, if any. + * argcount + * p Pointer to task for 'btp' command. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * mds comes in handy when examining the stack to do a manual + * traceback. + */ + +int +kdba_bt_stack(struct pt_regs *regs, kdb_machreg_t *addr, int argcount, + struct task_struct *p) +{ + return(kdba_bt_stack_i386(regs, addr, argcount, p, 0)); +} + +int +kdba_bt_process(struct task_struct *p, int argcount) +{ + struct pt_regs taskregs; + + memset(&taskregs, 0, sizeof(taskregs)); + taskregs.eip = p->thread.eip; + taskregs.esp = p->thread.esp; + + /* + * Since we don't really use the TSS + * to store register between task switches, + * attempt to locate real ebp (should be + * top of stack if task is in schedule) + */ + taskregs.ebp = *(kdb_machreg_t *)(taskregs.esp); + + taskregs.xcs = __KERNEL_CS; /* have to assume kernel space */ + + if (taskregs.esp < (unsigned long)p || + taskregs.esp >= (unsigned long)p + 8192) { + kdb_printf("Stack is not in task_struct, backtrace not available\n"); + return(0); + } + + return kdba_bt_stack_i386(&taskregs, NULL, argcount, p, 1); + +} diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_id.c linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_id.c --- linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_id.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_id.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,270 @@ +/* + * Minimalist Kernel Debugger - Architecture Dependent Instruction Disassembly + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Srinivasa Thirumalachar + * RSE support for ia64 + * Masahiro Adegawa 1999/12/01 + * 'sr' command, active flag in 'ps' + * Scott Lurndal 1999/12/12 + * Significantly restructure for linux2.3 + * Keith Owens 2000/05/23 + * KDB v1.2 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * kdba_dis_getsym + * + * Get a symbol for the disassembler. + * + * Parameters: + * addr Address for which to get symbol + * dip Pointer to disassemble_info + * Returns: + * 0 + * Locking: + * Remarks: + * Not used for kdb. + */ + +/* ARGSUSED */ +static int +kdba_dis_getsym(bfd_vma addr, disassemble_info *dip) +{ + + return 0; +} + +/* + * kdba_printaddress + * + * Print (symbolically) an address. + * + * Parameters: + * addr Address for which to get symbol + * dip Pointer to disassemble_info + * flag True if a ":" sequence should follow the address + * Returns: + * 0 + * Locking: + * Remarks: + * + */ + +/* ARGSUSED */ +void +kdba_printaddress(kdb_machreg_t addr, disassemble_info *dip, int flag) +{ + kdb_symtab_t symtab; + + /* + * Print a symbol name or address as necessary. + */ + kdbnearsym(addr, &symtab); + if (symtab.sym_name) { + /* Do not use kdb_symbol_print here, it always does + * kdb_printf but we want dip->fprintf_func. + */ + dip->fprintf_func(dip->stream, + "0x%0*lx %s", + 2*sizeof(addr), addr, symtab.sym_name); + if (addr != symtab.sym_start) + dip->fprintf_func(dip->stream, "+0x%x", addr - symtab.sym_start); + + } else { + dip->fprintf_func(dip->stream, "0x%x", addr); + } + + if (flag) + dip->fprintf_func(dip->stream, ": "); +} + +/* + * kdba_dis_printaddr + * + * Print (symbolically) an address. Called by GNU disassembly + * code via disassemble_info structure. + * + * Parameters: + * addr Address for which to get symbol + * dip Pointer to disassemble_info + * Returns: + * 0 + * Locking: + * Remarks: + * This function will always append ":" to the printed + * symbolic address. + */ + +static void +kdba_dis_printaddr(bfd_vma addr, disassemble_info *dip) +{ + kdba_printaddress(addr, dip, 1); +} + +/* + * kdba_dis_getmem + * + * Fetch 'length' bytes from 'addr' into 'buf'. + * + * Parameters: + * addr Address for which to get symbol + * buf Address of buffer to fill with bytes from 'addr' + * length Number of bytes to fetch + * dip Pointer to disassemble_info + * Returns: + * 0 + * Locking: + * Remarks: + * + */ + +/* ARGSUSED */ +static int +kdba_dis_getmem(bfd_vma addr, bfd_byte *buf, unsigned int length, disassemble_info *dip) +{ + bfd_byte *bp = buf; + int i; + + /* + * Fill the provided buffer with bytes from + * memory, starting at address 'addr' for 'length bytes. + * + */ + + for(i=0; imach = bfd_mach_i386_i386; + } else if (strcmp(mode, "8086") == 0) { + dip->mach = bfd_mach_i386_i8086; + } else { + return KDB_BADMODE; + } + } + + return 0; +} + +/* + * kdba_check_pc + * + * Check that the pc is satisfactory. + * + * Parameters: + * pc Program Counter Value. + * Returns: + * None + * Locking: + * None. + * Remarks: + * Can change pc. + */ + +void +kdba_check_pc(kdb_machreg_t *pc) +{ + /* No action */ +} + +/* + * kdba_id_printinsn + * + * Format and print a single instruction at 'pc'. Return the + * length of the instruction. + * + * Parameters: + * pc Program Counter Value. + * dip Disassemble_info structure pointer + * Returns: + * Length of instruction, -1 for error. + * Locking: + * None. + * Remarks: + * Depends on 'IDMODE' environment variable. + */ + +int +kdba_id_printinsn(kdb_machreg_t pc, disassemble_info *dip) +{ + kdba_dis_printaddr(pc, dip); + return print_insn_i386_att(pc, dip); +} + +/* + * kdba_id_init + * + * Initialize the architecture dependent elements of + * the disassembly information structure + * for the GNU disassembler. + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + */ + +void __init +kdba_id_init(disassemble_info *dip) +{ + dip->read_memory_func = kdba_dis_getmem; + dip->print_address_func = kdba_dis_printaddr; + dip->symbol_at_address_func = kdba_dis_getsym; + + dip->flavour = bfd_target_elf_flavour; + dip->arch = bfd_arch_i386; + dip->mach = bfd_mach_i386_i386; + dip->endian = BFD_ENDIAN_LITTLE; + + dip->display_endian = BFD_ENDIAN_LITTLE; +} diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_io.c linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_io.c --- linux-2.4.17-rc2-virgin/arch/i386/kdb/kdba_io.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/kdba_io.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,375 @@ +/* + * Kernel Debugger Console I/O handler + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Chuck Fleckenstein 1999/07/20 + * Move kdb_info struct declaration to this file + * for cases where serial support is not compiled into + * the kernel. + * + * Masahiro Adegawa 1999/07/20 + * Handle some peculiarities of japanese 86/106 + * keyboards. + * + * marc@mucom.co.il 1999/07/20 + * Catch buffer overflow for serial input. + * + * Scott Foehner + * Port to ia64 + * + * Scott Lurndal 2000/01/03 + * Restructure for v1.0 + * + * Keith Owens 2000/05/23 + * KDB v1.2 + * + * Andi Kleen 2000/03/19 + * Support simultaneous input from serial line and keyboard. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define KDB_BLINK_LED 1 + +int kdb_port; + +/* + * This module contains code to read characters from the keyboard or a serial + * port. + * + * It is used by the kernel debugger, and is polled, not interrupt driven. + * + */ + +#ifdef KDB_BLINK_LED +/* + * send: Send a byte to the keyboard controller. Used primarily to + * alter LED settings. + */ + +static void +kdb_kbdsend(unsigned char byte) +{ + while (inb(KBD_STATUS_REG) & KBD_STAT_IBF) + ; + outb(byte, KBD_DATA_REG); +} + +static void +kdb_toggleled(int led) +{ + static int leds; + + leds ^= led; + + kdb_kbdsend(KBD_CMD_SET_LEDS); + kdb_kbdsend((unsigned char)leds); +} +#endif /* KDB_BLINK_LED */ + +void +kdb_resetkeyboard(void) +{ +#if 0 + kdb_kbdsend(KBD_CMD_ENABLE); +#endif +} + +#if defined(CONFIG_SERIAL_CONSOLE) +/* Check if there is a byte ready at the serial port */ +static int get_serial_char(void) +{ + unsigned char ch; + int status; +#define serial_inp(info, offset) inb((info) + (offset)) + + if (kdb_port == 0) + return -1; + + if ((status = serial_inp(kdb_port, UART_LSR)) & UART_LSR_DR) { + ch = serial_inp(kdb_port, UART_RX); + if (ch == 0x7f) + ch = 8; + if (ch == '\t') + ch = ' '; + if (ch == 8) { /* BS */ + ; + } else if (ch == 13) { /* Enter */ + kdb_printf("\n"); + } else { + if (!isprint(ch)) + return(-1); + kdb_printf("%c", ch); + } + return ch; + } + return -1; +} +#endif /* CONFIG_SERIAL_CONSOLE */ + +#if defined(CONFIG_VT) +/* + * Check if the keyboard controller has a keypress for us. + * Some parts (Enter Release, LED change) are still blocking polled here, + * but hopefully they are all short. + */ +static int get_kbd_char(void) +{ + int scancode, scanstatus; + static int shift_lock; /* CAPS LOCK state (0-off, 1-on) */ + static int shift_key; /* Shift next keypress */ + static int ctrl_key; + u_short keychar; + extern u_short plain_map[], shift_map[], ctrl_map[]; + + if ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) + return -1; + + /* + * Fetch the scancode + */ + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + + /* + * Ignore mouse events. + */ + if (scanstatus & KBD_STAT_MOUSE_OBF) + return -1; + + /* + * Ignore release, trigger on make + * (except for shift keys, where we want to + * keep the shift state so long as the key is + * held down). + */ + + if (((scancode&0x7f) == 0x2a) || ((scancode&0x7f) == 0x36)) { + /* + * Next key may use shift table + */ + if ((scancode & 0x80) == 0) { + shift_key=1; + } else { + shift_key=0; + } + return -1; + } + + if ((scancode&0x7f) == 0x1d) { + /* + * Left ctrl key + */ + if ((scancode & 0x80) == 0) { + ctrl_key = 1; + } else { + ctrl_key = 0; + } + return -1; + } + + if ((scancode & 0x80) != 0) + return -1; + + scancode &= 0x7f; + + /* + * Translate scancode + */ + + if (scancode == 0x3a) { + /* + * Toggle caps lock + */ + shift_lock ^= 1; + + kdb_toggleled(0x4); + return -1; + } + + if (scancode == 0x0e) { + /* + * Backspace + */ + return 8; + } + + if (scancode == 0xe0) { + return -1; + } + + /* + * For Japanese 86/106 keyboards + * See comment in drivers/char/pc_keyb.c. + * - Masahiro Adegawa + */ + if (scancode == 0x73) { + scancode = 0x59; + } else if (scancode == 0x7d) { + scancode = 0x7c; + } + + if (!shift_lock && !shift_key && !ctrl_key) { + keychar = plain_map[scancode]; + } else if (shift_lock || shift_key) { + keychar = shift_map[scancode]; + } else if (ctrl_key) { + keychar = ctrl_map[scancode]; + } else { + keychar = 0x0020; + kdb_printf("Unknown state/scancode (%d)\n", scancode); + } + keychar &= 0x0fff; + if (keychar == '\t') + keychar = ' '; + switch (KTYP(keychar)) { + case KT_LETTER: + case KT_LATIN: + if (isprint(keychar)) + break; /* printable characters */ + /* drop through */ + case KT_SPEC: + if (keychar == K_ENTER) + break; + /* drop through */ + default: + return(-1); /* ignore unprintables */ + } + + if ((scancode & 0x7f) == 0x1c) { + /* + * enter key. All done. Absorb the release scancode. + */ + while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) + ; + + /* + * Fetch the scancode + */ + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + + while (scanstatus & KBD_STAT_MOUSE_OBF) { + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + } + + if (scancode != 0x9c) { + /* + * Wasn't an enter-release, why not? + */ + kdb_printf("kdb: expected enter got 0x%x status 0x%x\n", + scancode, scanstatus); + } + + kdb_printf("\n"); + return 13; + } + + /* + * echo the character. + */ + kdb_printf("%c", keychar&0xff); + + return keychar & 0xff; +} +#endif /* CONFIG_VT */ + +#ifdef KDB_BLINK_LED + +/* Leave numlock alone, setting it messes up laptop keyboards with the keypad + * mapped over normal keys. + */ +int kdba_blink_mask = 0x1 | 0x4; + +#define BOGOMIPS (boot_cpu_data.loops_per_jiffy/(500000/HZ)) +static int blink_led(void) +{ + static long delay; + if (--delay < 0) { + if (BOGOMIPS == 0) /* early kdb */ + delay = 150000000/1000; /* arbitrary bogomips */ + else + delay = 150000000/BOGOMIPS; /* Roughly 1 second when polling */ + kdb_toggleled(kdba_blink_mask); + } + return -1; +} +#endif + +typedef int (*get_char_func)(void); + +static get_char_func poll_funcs[] = { +#if defined(CONFIG_VT) + get_kbd_char, +#endif +#if defined(CONFIG_SERIAL_CONSOLE) + get_serial_char, +#endif +#ifdef KDB_BLINK_LED + blink_led, +#endif + NULL +}; + +char * +kdba_read(char *buffer, size_t bufsize) +{ + char *cp = buffer; + char *bufend = buffer+bufsize-2; /* Reserve space for newline and null byte */ + + for (;;) { + int key; + get_char_func *f; + for (f = &poll_funcs[0]; ; ++f) { + if (*f == NULL) { + /* Reset NMI watchdog once per poll loop */ + touch_nmi_watchdog(); + f = &poll_funcs[0]; + } + key = (*f)(); + if (key != -1) + break; + } + + /* Echo is done in the low level functions */ + switch (key) { + case 8: /* backspace */ + if (cp > buffer) { + kdb_printf("\b \b"); + --cp; + } + break; + case 13: /* enter */ + *cp++ = '\n'; + *cp++ = '\0'; + return buffer; + default: + if (cp < bufend) + *cp++ = key; + break; + } + } +} diff -urN linux-2.4.17-rc2-virgin/arch/i386/kdb/kdbasupport.c linux-2.4.17-rc2-wli2/arch/i386/kdb/kdbasupport.c --- linux-2.4.17-rc2-virgin/arch/i386/kdb/kdbasupport.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/arch/i386/kdb/kdbasupport.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,1584 @@ +/* + * Kernel Debugger Architecture Independent Support Functions + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +char *kdb_diemsg; + + +/* + * kdba_find_return + * + * Given a starting point on the stack and symtab data for the + * current function, scan up the stack looking for a return + * address for this function. + * Inputs: + * sp Starting stack pointer for scan + * ss Start of stack for current process + * symtab kallsyms symbol data for the function + * Outputs: + * None. + * Returns: + * Position on stack of return address, 0 if not found. + * Locking: + * None. + * Remarks: + * This is sensitive to the calling sequence generated by gcc. + */ + +static kdb_machreg_t +kdba_find_return(kdb_machreg_t sp, kdb_machreg_t ss, const kdb_symtab_t *symtab) +{ + kdb_machreg_t ret; + kdb_symtab_t caller_symtab; + + if (KDB_DEBUG(ARA)) { + kdb_printf(" kdba_find_return: start\n"); + } + + if ((sp & -8192) != ss) { + kdb_printf(" sp is in wrong stack 0x%lx 0x%lx 0x%lx\n", sp, ss, sp & -8192); + return(0); + } + + if ((sp & (8192 - 1)) < sizeof(struct task_struct)) { + kdb_printf(" sp is inside task_struct\n"); + return(0); + } + + for (;ret = 0, sp & (8192-1);sp += 4) { + if (KDB_DEBUG(ARA)) { + kdb_printf(" sp=0x%lx", sp); + } + ret = kdba_getword(sp, 4); + kdbnearsym(ret, &caller_symtab); + if (KDB_DEBUG(ARA)) { + kdb_printf(" ret="); + kdb_symbol_print(ret, &caller_symtab, KDB_SP_DEFAULT|KDB_SP_SYMSIZE); + } + if (!caller_symtab.sym_name) { + if (KDB_DEBUG(ARA)) { + kdb_printf("\n"); + } + continue; /* not a valid kernel address */ + } + if (kdba_getword(ret-5, 1) == 0xe8) { + /* call disp32 */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" call disp32"); + } + if (ret + kdba_getword(ret-4, 4) == symtab->sym_start) { + if (KDB_DEBUG(ARA)) { + kdb_printf(" matched\n"); + } + break; /* call to this function */ + } + if (KDB_DEBUG(ARA)) { + kdb_printf(" failed"); + } + } else if (kdba_getword(ret-7, 1) == 0xff && + kdba_getword(ret-6, 1) == 0x14 && + kdba_getword(ret-5, 1) == 0x85) { + /* call *0xnnnn(,%eax,4), used by syscall. + * Cannot calculate address, assume it is valid + * if the current function name starts with + * 'sys_' or 'old_'. + */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" call *0xnnnn(,%%eax,4)"); + } + if (strncmp(symtab->sym_name, "sys_", 4) == 0 || + strncmp(symtab->sym_name, "old_", 4) == 0) { + if (KDB_DEBUG(ARA)) { + kdb_printf(" matched\n"); + } + break; /* probably call to this function */ + } + if (KDB_DEBUG(ARA)) { + kdb_printf(" failed"); + } + } else if (kdba_getword(ret-2, 1) == 0xff && + (kdba_getword(ret-1, 1) & 0xf8) == 0xd0) { + /* call *%reg. Cannot validate, have to assume + * it is valid. + */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" call *%%reg, assume valid\n"); + } + break; /* hope it is a call to this function */ + } else if (kdba_getword(ret-3, 1) == 0xff && + (kdba_getword(ret-2, 1) & 0xf8) == 0x50) { + /* call *disp8(%reg). Cannot validate, have to assume + * it is valid. + */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" call *disp8(%%reg), assume valid\n"); + } + break; /* hope it is a call to this function */ + } else if (kdba_getword(ret-5, 1) == 0xe9) { + /* jmp disp32. I have been told that gcc may + * do function tail optimization and replace + * call with jmp. + */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" jmp disp32\n"); + } + if (ret + kdba_getword(ret-4, 4) == symtab->sym_start) { + if (KDB_DEBUG(ARA)) { + kdb_printf(" matched\n"); + } + break; /* jmp to this function */ + } + if (KDB_DEBUG(ARA)) { + kdb_printf(" failed"); + } + } else if (kdba_getword(ret-2, 1) == 0xeb) { + /* jmp disp8 */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" jmp disp8\n"); + } + if (ret + kdba_getword(ret-1, 1) == symtab->sym_start) { + if (KDB_DEBUG(ARA)) { + kdb_printf(" matched\n"); + } + break; /* jmp to this function */ + } + if (KDB_DEBUG(ARA)) { + kdb_printf(" failed"); + } + } else if (strcmp(caller_symtab.sym_name, "ret_from_intr") == 0 + && ret == caller_symtab.sym_start) { + /* ret_from_intr is pushed on stack for interrupts */ + if (KDB_DEBUG(ARA)) { + kdb_printf(" ret_from_intr matched\n"); + } + break; /* special case, hand crafted frame */ + } + if (KDB_DEBUG(ARA)) { + kdb_printf("\n"); + } + } + if (KDB_DEBUG(ARA)) { + kdb_printf(" end ret=0x%lx sp=0x%lx\n", ret, sp); + } + if (ret) + return(sp); + return(0); +} + +/* + * kdba_prologue + * + * This function analyzes a gcc-generated function prototype + * with or without frame pointers to determine the amount of + * automatic storage and register save storage is used on the + * stack of the target function. It only counts instructions + * that have been executed up to but excluding the current eip. + * Inputs: + * code Start address of function code to analyze + * pc Current program counter within function + * sp Current stack pointer for function + * fp Current frame pointer for function, may not be valid + * ss Start of stack for current process. + * caller 1 if looking for data on the caller frame, 0 for callee. + * Outputs: + * ar Activation record, all fields may be set. fp and oldfp + * are 0 if they cannot be extracted. return is 0 if the + * code cannot find a valid return address. args and arg0 + * are 0 if the number of arguments cannot be safely + * calculated. + * Returns: + * 1 if prologue is valid, 0 otherwise. If pc is 0 treat it as a + * valid prologue to allow bt on wild branches. + * Locking: + * None. + * Remarks: + * + * A prologue for ia32 generally looks like: + * + * pushl %ebp [All functions, but only if + * movl %esp, %ebp compiled with frame pointers] + * subl $auto, %esp [some functions] + * pushl %reg [some functions] + * pushl %reg [some functions] + * + * FIXME: Mike Galbraith says that gcc 2.95 can generate a slightly + * different prologue. No support for gcc 2.95 yet. + */ + +int +kdba_prologue(const kdb_symtab_t *symtab, kdb_machreg_t pc, kdb_machreg_t sp, + kdb_machreg_t fp, kdb_machreg_t ss, int caller, kdb_ar_t *ar) +{ + kdb_machreg_t ret_p, code = symtab->sym_start; + int oldfp_present = 0, unwound = 0; + + if (KDB_DEBUG(ARA)) { + kdb_printf("kdba_prologue: code=0x%lx %s pc=0x%lx sp=0x%lx fp=0x%lx\n", + code, symtab->sym_name, pc, sp, fp); + } + + /* Special case for wild branches. Assumes top of stack is return address */ + if (pc == 0) { + memset(ar, 0, sizeof(*ar)); + ar->setup = 4; + ar->end = sp; + ar->start = ar->end + 4; + ar->ret = kdba_getword(sp, 4); + if (KDB_DEBUG(ARA)) { + kdb_printf(" pc==0: ret=0x%lx\n", ar->ret); + } + return(1); + } + + if (code == 0 || sp & 3 || ss != (sp & -8192)) + return(0); + + ar->end = sp; /* End of activation record +1 */ + + /* Special cases galore when the caller pc is within entry.S. + * The return address for these routines is outside the kernel, + * so the normal algorithm to find the frame does not work. + * Hand craft the frame to no setup, regs, locals etc, assume 6 + * parameters. + * This list was extracted from entry.S by looking for all call + * instructions that were eventually followed by RESTORE_ALL, + * take the label before each such instruction. + */ + if (caller && + (strcmp(symtab->sym_name, "lcall7") == 0 || + strcmp(symtab->sym_name, "lcall27") == 0 || + strcmp(symtab->sym_name, "kdb_call") == 0 || + strcmp(symtab->sym_name, "system_call") == 0 || + strcmp(symtab->sym_name, "tracesys") == 0 || + strcmp(symtab->sym_name, "signal_return") == 0 || + strcmp(symtab->sym_name, "v86_signal_return") == 0 || + strcmp(symtab->sym_name, "tracesys") == 0 || + strcmp(symtab->sym_name, "tracesys_exit") == 0 || + strcmp(symtab->sym_name, "handle_softirq") == 0 || + strcmp(symtab->sym_name, "reschedule") == 0 || + strcmp(symtab->sym_name, "error_code") == 0 || + strcmp(symtab->sym_name, "device_not_available") == 0 || + strcmp(symtab->sym_name, "nmi") == 0)) { + ar->start = ar->end + 6*4; /* 6 parameters */ + if ((ar->start & -8192) != ss) + ar->start = 0; + return(1); + } + + ar->setup = 4; /* Return address is always on stack */ + + /* Kludge. If we are sitting on 'ret' then the stack has been unwound, + * ignore all the startup code. + */ + if (kdba_getword(pc, 1) == 0xc3) { + /* ret */ + unwound = 1; + } + + if (!unwound + && code < pc + && kdba_getword(code, 1) == 0x55) { + /* pushl %ebp */ + ar->setup += 4; /* Frame pointer is on stack */ + oldfp_present = 1; + ++code; + if (KDB_DEBUG(ARA)) { + kdb_printf(" pushl %%ebp\n"); + } + if (code < pc && + kdba_getword(code, 1) == 0x89 && + kdba_getword(code+1, 1) == 0xe5) { + /* movl %esp,%ebp */ + if (fp >= sp && (fp & -8192) == ss) + ar->fp = fp; /* %ebp has been set */ + code += 2; + if (KDB_DEBUG(ARA)) { + kdb_printf(" movl %%esp,%%ebp, fp=0x%lx\n", ar->fp); + } + } + } + + if (!unwound && code < pc) { + if (kdba_getword(code, 1) == 0x83 && + kdba_getword(code+1, 1) == 0xec) { + /* subl $xx,%esp */ + code += 2; + ar->locals = kdba_getword(code, 1); + ++code; + if (KDB_DEBUG(ARA)) { + kdb_printf(" subl $xx,%%esp, locals=%d\n", ar->locals); + } + } else if (kdba_getword(code, 1) == 0x81 && + kdba_getword(code+1, 1) == 0xec) { + /* subl $xxxxxxxx,%esp */ + code += 2; + ar->locals = kdba_getword(code, 4); + code += 4; + if (KDB_DEBUG(ARA)) { + kdb_printf(" subl $xxxxxxxx,%%esp, locals=%d\n", ar->locals); + } + } + } + + while (!unwound && code < pc && (kdba_getword(code, 1)&0xf8) == 0x50) { + /* pushl %reg */ + ar->regs += 4; + ++code; + if (KDB_DEBUG(ARA)) { + kdb_printf(" pushl %%reg, regs=%d\n", ar->regs); + } + } + + /* Check the return address. It must point within the kernel + * and the code at that location must be a valid entry sequence. + */ + if (ar->fp) { + ret_p = ar->fp + ar->setup; + } + else { + ret_p = ar->end + ar->regs + ar->locals + ar->setup; + } + ret_p -= 4; + if (KDB_DEBUG(ARA)) { + kdb_printf(" ret_p(0)=0x%lx\n", ret_p); + } + if ((ret_p & -8192) == ss && + (ret_p = kdba_find_return(ret_p, ss, symtab))) { + ar->ret = kdba_getword(ret_p, 4); + } + if (KDB_DEBUG(ARA)) { + kdb_printf(" ret_p(1)=0x%lx ret=0x%lx\n", ret_p, ar->ret); + } + if (ar->ret) { + ar->fp = ret_p - ar->setup + 4; /* "accurate" fp */ + ar->start = ret_p + 4; + if (KDB_DEBUG(ARA)) { + kdb_printf(" fp=0x%lx start=0x%lx\n", ar->fp, ar->start); + } + } + if (oldfp_present) { + if (ar->fp) + ar->oldfp = kdba_getword(ar->fp, 4); + if (KDB_DEBUG(ARA)) { + kdb_printf(" oldfp=0x%lx", ar->oldfp); + } + if (ar->oldfp <= ar->fp || (ar->oldfp & -8192) != ss) { + ar->oldfp = 0; + if (KDB_DEBUG(ARA)) { + kdb_printf(" (out of range)"); + } + } + if (KDB_DEBUG(ARA)) { + kdb_printf("\n"); + } + } + return(1); +} + +kdb_machreg_t +kdba_getdr6(void) +{ + return kdba_getdr(6); +} + +kdb_machreg_t +kdba_getdr7(void) +{ + return kdba_getdr(7); +} + +void +kdba_putdr6(kdb_machreg_t contents) +{ + kdba_putdr(6, contents); +} + +static void +kdba_putdr7(kdb_machreg_t contents) +{ + kdba_putdr(7, contents); +} + +void +kdba_installdbreg(kdb_bp_t *bp) +{ + kdb_machreg_t dr7; + + dr7 = kdba_getdr7(); + + kdba_putdr(bp->bp_hard->bph_reg, bp->bp_addr); + + dr7 |= DR7_GE; + + switch (bp->bp_hard->bph_reg){ + case 0: + DR7_RW0SET(dr7,bp->bp_hard->bph_mode); + DR7_LEN0SET(dr7,bp->bp_hard->bph_length); + DR7_G0SET(dr7); + break; + case 1: + DR7_RW1SET(dr7,bp->bp_hard->bph_mode); + DR7_LEN1SET(dr7,bp->bp_hard->bph_length); + DR7_G1SET(dr7); + break; + case 2: + DR7_RW2SET(dr7,bp->bp_hard->bph_mode); + DR7_LEN2SET(dr7,bp->bp_hard->bph_length); + DR7_G2SET(dr7); + break; + case 3: + DR7_RW3SET(dr7,bp->bp_hard->bph_mode); + DR7_LEN3SET(dr7,bp->bp_hard->bph_length); + DR7_G3SET(dr7); + break; + default: + kdb_printf("kdb: Bad debug register!! %ld\n", + bp->bp_hard->bph_reg); + break; + } + + kdba_putdr7(dr7); + return; +} + +void +kdba_removedbreg(kdb_bp_t *bp) +{ + int regnum; + kdb_machreg_t dr7; + + if (!bp->bp_hard) + return; + + regnum = bp->bp_hard->bph_reg; + + dr7 = kdba_getdr7(); + + kdba_putdr(regnum, 0); + + switch (regnum) { + case 0: + DR7_G0CLR(dr7); + DR7_L0CLR(dr7); + break; + case 1: + DR7_G1CLR(dr7); + DR7_L1CLR(dr7); + break; + case 2: + DR7_G2CLR(dr7); + DR7_L2CLR(dr7); + break; + case 3: + DR7_G3CLR(dr7); + DR7_L3CLR(dr7); + break; + default: + kdb_printf("kdb: Bad debug register!! %d\n", regnum); + break; + } + + kdba_putdr7(dr7); +} + +kdb_machreg_t +kdba_getdr(int regnum) +{ + kdb_machreg_t contents = 0; + switch(regnum) { + case 0: + __asm__ ("movl %%db0,%0\n\t":"=r"(contents)); + break; + case 1: + __asm__ ("movl %%db1,%0\n\t":"=r"(contents)); + break; + case 2: + __asm__ ("movl %%db2,%0\n\t":"=r"(contents)); + break; + case 3: + __asm__ ("movl %%db3,%0\n\t":"=r"(contents)); + break; + case 4: + case 5: + break; + case 6: + __asm__ ("movl %%db6,%0\n\t":"=r"(contents)); + break; + case 7: + __asm__ ("movl %%db7,%0\n\t":"=r"(contents)); + break; + default: + break; + } + + return contents; +} + + +kdb_machreg_t +kdb_getcr(int regnum) +{ + kdb_machreg_t contents = 0; + switch(regnum) { + case 0: + __asm__ ("movl %%cr0,%0\n\t":"=r"(contents)); + break; + case 1: + break; + case 2: + __asm__ ("movl %%cr2,%0\n\t":"=r"(contents)); + break; + case 3: + __asm__ ("movl %%cr3,%0\n\t":"=r"(contents)); + break; + case 4: + __asm__ ("movl %%cr4,%0\n\t":"=r"(contents)); + break; + default: + break; + } + + return contents; +} + +void +kdba_putdr(int regnum, kdb_machreg_t contents) +{ + switch(regnum) { + case 0: + __asm__ ("movl %0,%%db0\n\t"::"r"(contents)); + break; + case 1: + __asm__ ("movl %0,%%db1\n\t"::"r"(contents)); + break; + case 2: + __asm__ ("movl %0,%%db2\n\t"::"r"(contents)); + break; + case 3: + __asm__ ("movl %0,%%db3\n\t"::"r"(contents)); + break; + case 4: + case 5: + break; + case 6: + __asm__ ("movl %0,%%db6\n\t"::"r"(contents)); + break; + case 7: + __asm__ ("movl %0,%%db7\n\t"::"r"(contents)); + break; + default: + break; + } +} + +/* + * kdba_getregcontents + * + * Return the contents of the register specified by the + * input string argument. Return an error if the string + * does not match a machine register. + * + * The following pseudo register names are supported: + * ®s - Prints address of exception frame + * kesp - Prints kernel stack pointer at time of fault + * cesp - Prints current kernel stack pointer, inside kdb + * ceflags - Prints current flags, inside kdb + * % - Uses the value of the registers at the + * last time the user process entered kernel + * mode, instead of the registers at the time + * kdb was entered. + * + * Parameters: + * regname Pointer to string naming register + * regs Pointer to structure containing registers. + * Outputs: + * *contents Pointer to unsigned long to recieve register contents + * Returns: + * 0 Success + * KDB_BADREG Invalid register name + * Locking: + * None. + * Remarks: + * If kdb was entered via an interrupt from the kernel itself then + * ss and esp are *not* on the stack. + */ + +static struct kdbregs { + char *reg_name; + size_t reg_offset; +} kdbreglist[] = { + { "eax", offsetof(struct pt_regs, eax) }, + { "ebx", offsetof(struct pt_regs, ebx) }, + { "ecx", offsetof(struct pt_regs, ecx) }, + { "edx", offsetof(struct pt_regs, edx) }, + + { "esi", offsetof(struct pt_regs, esi) }, + { "edi", offsetof(struct pt_regs, edi) }, + { "esp", offsetof(struct pt_regs, esp) }, + { "eip", offsetof(struct pt_regs, eip) }, + + { "ebp", offsetof(struct pt_regs, ebp) }, + { "xss", offsetof(struct pt_regs, xss) }, + { "xcs", offsetof(struct pt_regs, xcs) }, + { "eflags", offsetof(struct pt_regs, eflags) }, + + { "xds", offsetof(struct pt_regs, xds) }, + { "xes", offsetof(struct pt_regs, xes) }, + { "origeax", offsetof(struct pt_regs, orig_eax) }, + +}; + +static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs); + +static struct kdbregs dbreglist[] = { + { "dr0", 0 }, + { "dr1", 1 }, + { "dr2", 2 }, + { "dr3", 3 }, + { "dr6", 6 }, + { "dr7", 7 }, +}; + +static const int ndbreglist = sizeof(dbreglist) / sizeof(struct kdbregs); + +int +kdba_getregcontents(const char *regname, + struct pt_regs *regs, + kdb_machreg_t *contents) +{ + int i; + + if (strcmp(regname, "®s") == 0) { + *contents = (unsigned long)regs; + return 0; + } + + if (strcmp(regname, "kesp") == 0) { + *contents = (unsigned long)regs + sizeof(struct pt_regs); + if ((regs->xcs & 0xffff) == __KERNEL_CS) { + /* esp and ss are not on stack */ + *contents -= 2*4; + } + return 0; + } + + if (strcmp(regname, "cesp") == 0) { + asm volatile("movl %%esp,%0":"=m" (*contents)); + return 0; + } + + if (strcmp(regname, "ceflags") == 0) { + int flags; + __save_flags(flags); + *contents = flags; + return 0; + } + + if (regname[0] == '%') { + /* User registers: %%e[a-c]x, etc */ + regname++; + regs = (struct pt_regs *) + (current->thread.esp0 - sizeof(struct pt_regs)); + } + + for (i=0; ixcs & 0xffff) == __KERNEL_CS) { + /* No cpl switch, esp and ss are not on stack */ + if (strcmp(kdbreglist[i].reg_name, "esp") == 0) { + *contents = (kdb_machreg_t)regs + + sizeof(struct pt_regs) - 2*4; + return(0); + } + if (strcmp(kdbreglist[i].reg_name, "xss") == 0) { + asm volatile( + "pushl %%ss\n" + "popl %0\n" + :"=m" (*contents)); + return(0); + } + } + *contents = *(unsigned long *)((unsigned long)regs + + kdbreglist[i].reg_offset); + return(0); + } + + for (i=0; i + * + * Parameters: + * regname Pointer to string naming register + * regs Pointer to structure containing registers. + * contents Unsigned long containing new register contents + * Outputs: + * Returns: + * 0 Success + * KDB_BADREG Invalid register name + * Locking: + * None. + * Remarks: + */ + +int +kdba_setregcontents(const char *regname, + struct pt_regs *regs, + unsigned long contents) +{ + int i; + + if (regname[0] == '%') { + regname++; + regs = (struct pt_regs *) + (current->thread.esp0 - sizeof(struct pt_regs)); + } + + for (i=0; ithread.esp0 - sizeof(struct pt_regs)); + } + + if (type == NULL) { + struct kdbregs *rlp; + kdb_machreg_t contents; + + for (i=0, rlp=kdbreglist; ieip; +} + +int +kdba_setpc(kdb_eframe_t ef, kdb_machreg_t newpc) +{ + ef->eip = newpc; + KDB_STATE_SET(IP_ADJUSTED); + return 0; +} + +/* + * kdba_main_loop + * + * Do any architecture specific set up before entering the main kdb loop. + * The primary function of this routine is to make all processes look the + * same to kdb, kdb must be able to list a process without worrying if the + * process is running or blocked, so make all process look as though they + * are blocked. + * + * Inputs: + * reason The reason KDB was invoked + * error The hardware-defined error code + * error2 kdb's current reason code. Initially error but can change + * acording to kdb state. + * db_result Result from break or debug point. + * ef The exception frame at time of fault/breakpoint. If reason + * is KDB_REASON_SILENT then ef is NULL, otherwise it should + * always be valid. + * Returns: + * 0 KDB was invoked for an event which it wasn't responsible + * 1 KDB handled the event for which it was invoked. + * Outputs: + * Sets eip and esp in current->thread. + * Locking: + * None. + * Remarks: + * none. + */ + +int +kdba_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, + kdb_dbtrap_t db_result, kdb_eframe_t ef) +{ + if (ef) { + kdba_getregcontents("eip", ef, &(current->thread.eip)); + kdba_getregcontents("esp", ef, &(current->thread.esp)); + } + return(kdb_main_loop(reason, reason2, error, db_result, ef)); +} + +void +kdba_disableint(kdb_intstate_t *state) +{ + int *fp = (int *)state; + int flags; + + __save_flags(flags); + __cli(); + + *fp = flags; +} + +void +kdba_restoreint(kdb_intstate_t *state) +{ + int flags = *(int *)state; + __restore_flags(flags); +} + +void +kdba_setsinglestep(struct pt_regs *regs) +{ + if (regs->eflags & EF_IE) + KDB_STATE_SET(A_IF); + else + KDB_STATE_CLEAR(A_IF); + regs->eflags = (regs->eflags | EF_TF) & ~EF_IE; +} + +void +kdba_clearsinglestep(struct pt_regs *regs) +{ + if (KDB_STATE(A_IF)) + regs->eflags |= EF_IE; + else + regs->eflags &= ~EF_IE; +} + +int +kdba_getcurrentframe(struct pt_regs *regs) +{ + regs->xcs = 0; +#if defined(CONFIG_FRAME_POINTER) + asm volatile("movl %%ebp,%0":"=m" (*(int *)®s->ebp)); +#endif + asm volatile("movl %%esp,%0":"=m" (*(int *)®s->esp)); + + return 0; +} + +#ifdef KDB_HAVE_LONGJMP +int +kdba_setjmp(kdb_jmp_buf *jb) +{ +#if defined(CONFIG_FRAME_POINTER) + __asm__ ("movl 8(%esp), %eax\n\t" + "movl %ebx, 0(%eax)\n\t" + "movl %esi, 4(%eax)\n\t" + "movl %edi, 8(%eax)\n\t" + "movl (%esp), %ecx\n\t" + "movl %ecx, 12(%eax)\n\t" + "leal 8(%esp), %ecx\n\t" + "movl %ecx, 16(%eax)\n\t" + "movl 4(%esp), %ecx\n\t" + "movl %ecx, 20(%eax)\n\t"); +#else /* CONFIG_FRAME_POINTER */ + __asm__ ("movl 4(%esp), %eax\n\t" + "movl %ebx, 0(%eax)\n\t" + "movl %esi, 4(%eax)\n\t" + "movl %edi, 8(%eax)\n\t" + "movl %ebp, 12(%eax)\n\t" + "leal 4(%esp), %ecx\n\t" + "movl %ecx, 16(%eax)\n\t" + "movl 0(%esp), %ecx\n\t" + "movl %ecx, 20(%eax)\n\t"); +#endif /* CONFIG_FRAME_POINTER */ + KDB_STATE_SET(LONGJMP); + return 0; +} + +void +kdba_longjmp(kdb_jmp_buf *jb, int reason) +{ +#if defined(CONFIG_FRAME_POINTER) + __asm__("movl 8(%esp), %ecx\n\t" + "movl 12(%esp), %eax\n\t" + "movl 20(%ecx), %edx\n\t" + "movl 0(%ecx), %ebx\n\t" + "movl 4(%ecx), %esi\n\t" + "movl 8(%ecx), %edi\n\t" + "movl 12(%ecx), %ebp\n\t" + "movl 16(%ecx), %esp\n\t" + "jmp *%edx\n"); +#else /* CONFIG_FRAME_POINTER */ + __asm__("movl 4(%esp), %ecx\n\t" + "movl 8(%esp), %eax\n\t" + "movl 20(%ecx), %edx\n\t" + "movl 0(%ecx), %ebx\n\t" + "movl 4(%ecx), %esi\n\t" + "movl 8(%ecx), %edi\n\t" + "movl 12(%ecx), %ebp\n\t" + "movl 16(%ecx), %esp\n\t" + "jmp *%edx\n"); +#endif /* CONFIG_FRAME_POINTER */ +} +#endif /* KDB_HAVE_LONGJMP */ + + +/* + * kdba_enable_mce + * + * This function is called once on each CPU to enable machine + * check exception handling. + * + * Inputs: + * None. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * + */ + +void +kdba_enable_mce(void) +{ + /* No longer required, arch/i386/kernel/bluesmoke.c does the job now */ +} + +/* + * kdba_enable_lbr + * + * Enable last branch recording. + * + * Parameters: + * None. + * Returns: + * None + * Locking: + * None + * Remarks: + * None. + */ + +static unsigned char lbr_warned; + +void +kdba_enable_lbr(void) +{ + u32 lv, hv; + + if (!test_bit(X86_FEATURE_MCA, &boot_cpu_data.x86_capability)) { + if (lbr_warned) { + kdb_printf("kdb: machine does not support last branch recording\n"); + lbr_warned = 1; + } + return; + } + rdmsr(MSR_IA32_DEBUGCTLMSR, lv, hv); + lv |= 0x1; /* Set LBR enable */ + wrmsr(MSR_IA32_DEBUGCTLMSR, lv, hv); +} + +/* + * kdba_disable_lbr + * + * disable last branch recording. + * + * Parameters: + * None. + * Returns: + * None + * Locking: + * None + * Remarks: + * None. + */ + +void +kdba_disable_lbr(void) +{ + u32 lv, hv; + + if (!test_bit(X86_FEATURE_MCA, &boot_cpu_data.x86_capability)) { + if (lbr_warned) { + kdb_printf("kdb: machine does not support last branch recording\n"); + lbr_warned = 1; + } + return; + } + rdmsr(MSR_IA32_DEBUGCTLMSR, lv, hv); + lv &= ~0x1; /* Set LBR disable */ + wrmsr(MSR_IA32_DEBUGCTLMSR, lv, hv); +} + +/* + * kdba_print_lbr + * + * Print last branch and last exception addresses + * + * Parameters: + * None. + * Returns: + * None + * Locking: + * None + * Remarks: + * None. + */ + +void +kdba_print_lbr(void) +{ + u32 from, to, dummy; + + if (!test_bit(X86_FEATURE_MCA, &boot_cpu_data.x86_capability)) + return; + + rdmsr(MSR_IA32_LASTBRANCHFROMIP, from, dummy); + rdmsr(MSR_IA32_LASTBRANCHTOIP, to, dummy); + kdb_printf("Last Branch IP, from: "); + kdb_symbol_print(from, NULL, KDB_SP_DEFAULT); + kdb_printf(" to: "); + kdb_symbol_print(to, NULL, KDB_SP_DEFAULT); + kdb_printf("\n"); + rdmsr(MSR_IA32_LASTINTFROMIP, from, dummy); + rdmsr(MSR_IA32_LASTINTTOIP, to, dummy); + kdb_printf("Last Int IP, from: "); + kdb_symbol_print(from, NULL, KDB_SP_DEFAULT); + kdb_printf(" to: "); + kdb_symbol_print(to, NULL, KDB_SP_DEFAULT); + kdb_printf("\n"); +} + +/* + * kdba_getword + * + * Architecture specific function to access kernel virtual + * address space. + * + * Parameters: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * None. + */ + +unsigned long +kdba_getword(unsigned long addr, size_t width) +{ + /* + * This function checks the address for validity. Any address + * in the range PAGE_OFFSET to high_memory is legal, any address + * which maps to a vmalloc region is legal, and any address which + * is a user address, we use get_user() to verify validity. + */ + + if (addr < PAGE_OFFSET) { + /* + * Usermode address. + */ + unsigned long diag; + unsigned long ulval; + + switch (width) { + case 4: + { unsigned long *lp; + + lp = (unsigned long *) addr; + diag = get_user(ulval, lp); + break; + } + case 2: + { unsigned short *sp; + + sp = (unsigned short *) addr; + diag = get_user(ulval, sp); + break; + } + case 1: + { unsigned char *cp; + + cp = (unsigned char *) addr; + diag = get_user(ulval, cp); + break; + } + default: + kdb_printf("kdbgetword: Bad width\n"); + return 0L; + } + + if (diag) { + if (!KDB_STATE(SUPPRESS)) { + kdb_printf("kdb: Bad user address 0x%lx\n", addr); + KDB_STATE_SET(SUPPRESS); + } + return 0L; + } + KDB_STATE_CLEAR(SUPPRESS); + return ulval; + } + + if (addr > (unsigned long)high_memory) { + if (!kdb_vmlist_check(addr, addr+width)) { + /* + * Would appear to be an illegal kernel address; + * Print a message once, and don't print again until + * a legal address is used. + */ + if (!KDB_STATE(SUPPRESS)) { + kdb_printf("kdb: Bad kernel address 0x%lx\n", addr); + KDB_STATE_SET(SUPPRESS); + } + return 0L; + } + } + + /* + * A good address. Reset error flag. + */ + KDB_STATE_CLEAR(SUPPRESS); + + switch (width) { + case 4: + { unsigned long *lp; + + lp = (unsigned long *)(addr); + return *lp; + } + case 2: + { unsigned short *sp; + + sp = (unsigned short *)(addr); + return *sp; + } + case 1: + { unsigned char *cp; + + cp = (unsigned char *)(addr); + return *cp; + } + } + + kdb_printf("kdbgetword: Bad width\n"); + return 0L; +} + +/* + * kdba_putword + * + * Architecture specific function to access kernel virtual + * address space. + * + * Parameters: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * None. + */ + +unsigned long +kdba_putword(unsigned long addr, size_t size, unsigned long contents) +{ + /* + * This function checks the address for validity. Any address + * in the range PAGE_OFFSET to high_memory is legal, any address + * which maps to a vmalloc region is legal, and any address which + * is a user address, we use get_user() to verify validity. + */ + + if (addr < PAGE_OFFSET) { + /* + * Usermode address. + */ + unsigned long diag; + + switch (size) { + case 4: + { unsigned long *lp; + + lp = (unsigned long *) addr; + diag = put_user(contents, lp); + break; + } + case 2: + { unsigned short *sp; + + sp = (unsigned short *) addr; + diag = put_user(contents, sp); + break; + } + case 1: + { unsigned char *cp; + + cp = (unsigned char *) addr; + diag = put_user(contents, cp); + break; + } + default: + kdb_printf("kdba_putword: Bad width\n"); + return 0; + } + + if (diag) { + if (!KDB_STATE(SUPPRESS)) { + kdb_printf("kdb: Bad user address 0x%lx\n", addr); + KDB_STATE_SET(SUPPRESS); + } + return 0; + } + KDB_STATE_CLEAR(SUPPRESS); + return 0; + } + + if (addr > (unsigned long)high_memory) { + if (!kdb_vmlist_check(addr, addr+size)) { + /* + * Would appear to be an illegal kernel address; + * Print a message once, and don't print again until + * a legal address is used. + */ + if (!KDB_STATE(SUPPRESS)) { + kdb_printf("kdb: Bad kernel address 0x%lx\n", addr); + KDB_STATE_SET(SUPPRESS); + } + return 0L; + } + } + + /* + * A good address. Reset error flag. + */ + KDB_STATE_CLEAR(SUPPRESS); + + switch (size) { + case 4: + { unsigned long *lp; + + lp = (unsigned long *)(addr); + *lp = contents; + return 0; + } + case 2: + { unsigned short *sp; + + sp = (unsigned short *)(addr); + *sp = (unsigned short) contents; + return 0; + } + case 1: + { unsigned char *cp; + + cp = (unsigned char *)(addr); + *cp = (unsigned char) contents; + return 0; + } + } + + kdb_printf("kdba_putword: Bad width\n"); + return 0; +} + +/* + * kdba_callback_die + * + * Callback function for kernel 'die' function. + * + * Parameters: + * regs Register contents at time of trap + * error_code Trap-specific error code value + * trapno Trap number + * vp Pointer to die message + * Returns: + * Returns 1 if fault handled by kdb. + * Locking: + * None. + * Remarks: + * + */ +int +kdba_callback_die(struct pt_regs *regs, int error_code, long trapno, void *vp) +{ + /* + * Save a pointer to the message provided to 'die()'. + */ + kdb_diemsg = (char *)vp; + + return kdb(KDB_REASON_OOPS, error_code, (kdb_eframe_t) regs); +} + +/* + * kdba_callback_bp + * + * Callback function for kernel breakpoint trap. + * + * Parameters: + * regs Register contents at time of trap + * error_code Trap-specific error code value + * trapno Trap number + * vp Not Used. + * Returns: + * Returns 1 if fault handled by kdb. + * Locking: + * None. + * Remarks: + * + */ + +int +kdba_callback_bp(struct pt_regs *regs, int error_code, long trapno, void *vp) +{ + int diag; + + if (KDB_DEBUG(BP)) + kdb_printf("cb_bp: e_c = %d tn = %ld regs = 0x%p\n", error_code, + trapno, regs); + + diag = kdb(KDB_REASON_BREAK, error_code, (kdb_eframe_t) regs); + + if (KDB_DEBUG(BP)) + kdb_printf("cb_bp: e_c = %d tn = %ld regs = 0x%p diag = %d\n", error_code, + trapno, regs, diag); + return diag; +} + +/* + * kdba_callback_debug + * + * Callback function for kernel debug register trap. + * + * Parameters: + * regs Register contents at time of trap + * error_code Trap-specific error code value + * trapno Trap number + * vp Not used. + * Returns: + * Returns 1 if fault handled by kdb. + * Locking: + * None. + * Remarks: + * + */ + +int +kdba_callback_debug(struct pt_regs *regs, int error_code, long trapno, void *vp) +{ + return kdb(KDB_REASON_DEBUG, error_code, (kdb_eframe_t) regs); +} + +/* + * kdba_init + * + * Architecture specific initialization. + * + * Parameters: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * None. + */ + +void __init +kdba_init(void) +{ + kdba_enable_lbr(); + + return; +} + +/* + * kdba_adjust_ip + * + * Architecture specific adjustment of instruction pointer before leaving + * kdb. + * + * Parameters: + * reason The reason KDB was invoked + * error The hardware-defined error code + * ef The exception frame at time of fault/breakpoint. If reason + * is KDB_REASON_SILENT then ef is NULL, otherwise it should + * always be valid. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * noop on ix86. + */ + +void +kdba_adjust_ip(kdb_reason_t reason, int error, kdb_eframe_t ef) +{ + return; +} diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/bluesmoke.c linux-2.4.17-rc2-wli2/arch/i386/kernel/bluesmoke.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/bluesmoke.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/bluesmoke.c Tue Dec 18 22:21:49 2001 @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) { machine_check_vector(regs, error_code); + (void)kdb(KDB_REASON_NMI, error_code, regs); } /* diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/entry.S linux-2.4.17-rc2-wli2/arch/i386/kernel/entry.S --- linux-2.4.17-rc2-virgin/arch/i386/kernel/entry.S Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/entry.S Thu Dec 20 17:01:38 2001 @@ -71,7 +71,7 @@ * these are offsets into the task-struct. */ state = 0 -flags = 4 +preempt_count = 4 sigpending = 8 addr_limit = 12 exec_domain = 16 @@ -79,8 +79,28 @@ tsk_ptrace = 24 processor = 52 + /* These are offsets into the irq_stat structure + * There is one per cpu and it is aligned to 32 + * byte boundry (we put that here as a shift count) + */ +irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT + +irq_stat_local_irq_count = 4 +irq_stat_local_bh_count = 8 + ENOSYS = 38 +#ifdef CONFIG_SMP +#define GET_CPU_INDX movl processor(%ebx),%eax; \ + shll $irq_array_shift,%eax +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \ + GET_CPU_INDX +#define CPU_INDX (,%eax) +#else +#define GET_CPU_INDX +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx) +#define CPU_INDX +#endif #define SAVE_ALL \ cld; \ @@ -91,9 +111,9 @@ pushl %edi; \ pushl %esi; \ pushl %edx; \ + movl $(__KERNEL_DS),%edx; \ pushl %ecx; \ pushl %ebx; \ - movl $(__KERNEL_DS),%edx; \ movl %edx,%ds; \ movl %edx,%es; @@ -141,13 +161,13 @@ movl EFLAGS(%esp),%ecx # and this is cs.. movl %eax,EFLAGS(%esp) # movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # movl %esp,%ebx + movl %ecx,CS(%esp) # pushl %ebx andl $-8192,%ebx # GET_CURRENT movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain pushl $0x7 + movl 4(%edx),%edx # Get the lcall7 handler for the domain call *%edx addl $4, %esp popl %eax @@ -162,13 +182,13 @@ movl EFLAGS(%esp),%ecx # and this is cs.. movl %eax,EFLAGS(%esp) # movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # movl %esp,%ebx + movl %ecx,CS(%esp) # pushl %ebx andl $-8192,%ebx # GET_CURRENT movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain pushl $0x27 + movl 4(%edx),%edx # Get the lcall7 handler for the domain call *%edx addl $4, %esp popl %eax @@ -184,6 +204,18 @@ jne tracesys_exit jmp ret_from_sys_call +#if defined(CONFIG_KDB) +ENTRY(kdb_call) + pushl %eax # save orig EAX + SAVE_ALL + pushl %esp # struct pt_regs + pushl $0 # error_code + pushl $7 # KDB_REASON_ENTRY + call SYMBOL_NAME(kdb) + addl $12,%esp # remove args + RESTORE_ALL +#endif + /* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to @@ -247,12 +279,30 @@ ALIGN ENTRY(ret_from_intr) GET_CURRENT(%ebx) +#ifdef CONFIG_PREEMPT + cli + decl preempt_count(%ebx) +#endif ret_from_exception: movl EFLAGS(%esp),%eax # mix EFLAGS and CS movb CS(%esp),%al testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? jne ret_from_sys_call +#ifdef CONFIG_PREEMPT + cmpl $0,preempt_count(%ebx) + jnz restore_all + cmpl $0,need_resched(%ebx) + jz restore_all + movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx + addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx + jnz restore_all + incl preempt_count(%ebx) + sti + call SYMBOL_NAME(preempt_schedule) + jmp ret_from_intr +#else jmp restore_all +#endif ALIGN reschedule: @@ -289,6 +339,9 @@ GET_CURRENT(%ebx) call *%edi addl $8,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(coprocessor_error) @@ -308,12 +361,18 @@ movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) jne device_not_available_emulate +#ifdef CONFIG_PREEMPT + cli +#endif call SYMBOL_NAME(math_state_restore) jmp ret_from_exception device_not_available_emulate: pushl $0 # temporary storage for ORIG_EIP call SYMBOL_NAME(math_emulate) addl $4,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(debug) @@ -379,6 +438,22 @@ ENTRY(alignment_check) pushl $ SYMBOL_NAME(do_alignment_check) jmp error_code + +#if defined(CONFIG_KDB) +ENTRY(page_fault_mca) + pushl %ecx + pushl %edx + pushl %eax + movl $473,%ecx + rdmsr + andl $0xfffffffe,%eax /* Disable last branch recording */ + wrmsr + popl %eax + popl %edx + popl %ecx + pushl $ SYMBOL_NAME(do_page_fault) + jmp error_code +#endif ENTRY(page_fault) pushl $ SYMBOL_NAME(do_page_fault) diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/i387.c linux-2.4.17-rc2-wli2/arch/i386/kernel/i387.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/i387.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/i387.c Tue Dec 18 22:28:41 2001 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,8 @@ { struct task_struct *tsk = current; + preempt_disable(); + if (tsk->flags & PF_USEDFPU) { __save_init_fpu(tsk); return; diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/i8259.c linux-2.4.17-rc2-wli2/arch/i386/kernel/i8259.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/i8259.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/i8259.c Tue Dec 18 22:21:49 2001 @@ -456,7 +456,11 @@ */ for (i = 0; i < NR_IRQS; i++) { int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != SYSCALL_VECTOR) + if ((vector != SYSCALL_VECTOR) +#if defined(CONFIG_KDB) + && (vector != KDBENTER_VECTOR) +#endif + ) set_intr_gate(vector, interrupt[i]); } diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/irq.c linux-2.4.17-rc2-wli2/arch/i386/kernel/irq.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/irq.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/irq.c Tue Dec 18 22:21:49 2001 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -316,6 +317,11 @@ static inline void get_irqlock(int cpu) { +#ifdef CONFIG_KDB + static int kdb_rate; + if (KDB_IS_RUNNING() && kdb_rate++ < 10) + kdb_printf("Warning: get_irqlock on cpu %d while kdb is running, may hang\n", smp_processor_id()); +#endif /* CONFIG_KDB */ if (test_and_set_bit(0,&global_irq_lock)) { /* do we already hold the lock? */ if ((unsigned char) cpu == global_irq_holder) diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/process.c linux-2.4.17-rc2-wli2/arch/i386/kernel/process.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/process.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/process.c Tue Dec 18 22:21:49 2001 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -397,6 +398,14 @@ * Stop all CPUs and turn off local APICs and the IO-APIC, so * other OSs see a clean IRQ state. */ +#if defined(CONFIG_KDB) + /* + * If this restart is occuring while kdb is running (e.g. reboot + * command), the other CPU's are already stopped. Don't try to + * stop them yet again. + */ + if (!KDB_IS_RUNNING()) +#endif smp_send_stop(); disable_IO_APIC(); #endif diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/semaphore.c linux-2.4.17-rc2-wli2/arch/i386/kernel/semaphore.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/semaphore.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/semaphore.c Tue Dec 18 22:21:49 2001 @@ -182,6 +182,10 @@ ".align 4\n" ".globl __down_failed\n" "__down_failed:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif "pushl %eax\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" @@ -189,6 +193,10 @@ "popl %ecx\n\t" "popl %edx\n\t" "popl %eax\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif "ret" ); @@ -197,11 +205,19 @@ ".align 4\n" ".globl __down_failed_interruptible\n" "__down_failed_interruptible:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif "pushl %edx\n\t" "pushl %ecx\n\t" "call __down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif "ret" ); @@ -210,11 +226,19 @@ ".align 4\n" ".globl __down_failed_trylock\n" "__down_failed_trylock:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif "pushl %edx\n\t" "pushl %ecx\n\t" "call __down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif "ret" ); diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/smp.c linux-2.4.17-rc2-wli2/arch/i386/kernel/smp.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/smp.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/smp.c Tue Dec 18 22:21:49 2001 @@ -23,6 +23,9 @@ #include #include +#include +#include + /* * Some notes on x86 processor bugs affecting SMP operation: * @@ -144,6 +147,15 @@ */ cfg = __prepare_ICR(shortcut, vector); +#if defined(CONFIG_KDB) + if (vector == KDB_VECTOR) { + /* + * Setup KDB IPI to be delivered as an NMI + */ + cfg = (cfg&~APIC_VECTOR_MASK)|APIC_DM_NMI; + } +#endif /* CONFIG_KDB */ + /* * Send the IPI. The write to APIC_ICR fires this off. */ @@ -484,6 +496,14 @@ do_flush_tlb_all_local(); } + +#if defined(CONFIG_KDB) +void +smp_kdb_stop(void) +{ + send_IPI_allbutself(KDB_VECTOR); +} +#endif /* CONFIG_KDB */ /* * this function sends a 'reschedule' IPI to another CPU. diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/smpboot.c linux-2.4.17-rc2-wli2/arch/i386/kernel/smpboot.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/smpboot.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/smpboot.c Tue Dec 18 22:21:49 2001 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -434,6 +435,11 @@ * Save our processor parameters */ smp_store_cpu_info(cpuid); + +#ifdef CONFIG_KDB + /* Activate any preset global breakpoints on this cpu */ + kdb(KDB_REASON_SILENT, 0, 0); +#endif /* CONFIG_KDB */ /* * Allow the master to continue. diff -urN linux-2.4.17-rc2-virgin/arch/i386/kernel/traps.c linux-2.4.17-rc2-wli2/arch/i386/kernel/traps.c --- linux-2.4.17-rc2-virgin/arch/i386/kernel/traps.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/kernel/traps.c Tue Dec 18 22:28:41 2001 @@ -30,6 +30,8 @@ #include #endif +#include + #include #include #include @@ -51,6 +53,9 @@ #include asmlinkage int system_call(void); +#if defined(CONFIG_KDB) +asmlinkage int kdb_call(void); +#endif asmlinkage void lcall7(void); asmlinkage void lcall27(void); @@ -79,6 +84,9 @@ asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +#if defined(CONFIG_KDB) +asmlinkage void page_fault_mca(void); +#endif asmlinkage void coprocessor_error(void); asmlinkage void simd_coprocessor_error(void); asmlinkage void alignment_check(void); @@ -237,6 +245,159 @@ printk("\n"); } +#if defined(CONFIG_KDB) +spinlock_t dblist_lock = SPIN_LOCK_UNLOCKED; + +#define MAXDBLIST 8 + +typedef int (*dbfunc_t)(struct pt_regs * regs, int error_code, + long trap, void *value); + +typedef struct __db_list_s { + struct __db_list_s *db_next; + dbfunc_t db_func; +} dblist_t; + +typedef struct __db_listhead_s { + dblist_t dblh_list[MAXDBLIST]; + dblist_t *dblh_head; + char *dblh_name; +} dblisthead_t; + + /* + * Hook-up list to 'die' function + */ +static dblisthead_t dblist_die = + { {{ NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }}, + NULL, + "die" + }; + + /* + * Hook-up list to int3 + */ +static dblisthead_t dblist_int3 = + { {{ NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }}, + NULL, + "int3" + }; + + /* + * Hook-up list to debug trap + */ +static dblisthead_t dblist_debug = + { {{ NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }, + { NULL, NULL }}, + NULL, + "debug" + }; + +static void register_dbfunc(dblisthead_t *lhp, dbfunc_t dfp) +{ + int i; + + spin_lock(&dblist_lock); + + if (KDB_DEBUG(CALLBACK)) + kdb_printf("listhead 0x%p func 0x%p\n", lhp, dfp); + + for(i=0; idblh_list[i].db_func == NULL) { + break; + } + } + if (i == MAXDBLIST) { + if (KDB_DEBUG(CALLBACK)) + kdb_printf("register_dbfunc: 0x%p not registered for %s\n", + dfp, lhp->dblh_name); + spin_unlock(&dblist_lock); + return; + } + + lhp->dblh_list[i].db_func = dfp; + lhp->dblh_list[i].db_next = lhp->dblh_head; + lhp->dblh_head = &lhp->dblh_list[i]; + + spin_unlock(&dblist_lock); +} + +void register_die(dbfunc_t dfp) +{ + register_dbfunc(&dblist_die, dfp); +} + +void register_int3(dbfunc_t dfp) +{ + register_dbfunc(&dblist_int3, dfp); +} + +void register_debug(dbfunc_t dfp) +{ + register_dbfunc(&dblist_debug, dfp); +} + +static inline int +callout_dbfunc(dblisthead_t *lhp, struct pt_regs *regs, int error_code, + long trap_number, void *parameter) +{ + dblist_t *dlp = lhp->dblh_head; + int diag = 0; + + if (KDB_DEBUG(CALLBACK)) + kdb_printf("callout dbfunc: cpu %d lhp 0x%p\n", smp_processor_id(), lhp); + /* If we oops inside kdb, we already have the dblist_lock */ + if (!KDB_IS_RUNNING()) + spin_lock(&dblist_lock); + while (dlp) { + int rv; + + /* + * The first callout function to handle this callout + * condition will return '1'. No other callout handlers + * will be invoked. Errors inside kdb will longjmp + * instead of returning. + */ + if (KDB_DEBUG(CALLBACK)) + kdb_printf("callout dbfunc: cpu %d func 0x%p\n", smp_processor_id(), dlp->db_func); + rv = dlp->db_func(regs, error_code, trap_number, parameter); + if (KDB_DEBUG(CALLBACK)) + kdb_printf("callout cpu %d diag %d\n", smp_processor_id(), rv); + if (rv) { + diag ++; + break; + } + + dlp = dlp->db_next; + } + if (!KDB_IS_RUNNING()) + spin_unlock(&dblist_lock); + if (KDB_DEBUG(CALLBACK)) + kdb_printf("callout dbfunc: cpu %d end\n", smp_processor_id()); + + return diag; +} +#endif + spinlock_t die_lock = SPIN_LOCK_UNLOCKED; void die(const char * str, struct pt_regs * regs, long err) @@ -244,6 +405,9 @@ console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); +#if defined(CONFIG_KDB) + (void) callout_dbfunc(&dblist_die, regs, err, -1, (void *)str); +#endif printk("%s: %04lx\n", str, err & 0xffff); show_registers(regs); bust_spinlocks(0); @@ -336,7 +500,9 @@ } DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +#if !defined(CONFIG_KDB) DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) +#endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) @@ -413,17 +579,35 @@ return; } #endif + (void)kdb(KDB_REASON_NMI, reason, regs); printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); } +#if defined(CONFIG_SMP) && defined(CONFIG_KDB) +static void +do_ack_apic_irq(void) +{ + ack_APIC_irq(); +} +#endif + asmlinkage void do_nmi(struct pt_regs * regs, long error_code) { unsigned char reason = inb(0x61); ++nmi_count(smp_processor_id()); +#if defined(CONFIG_SMP) && defined(CONFIG_KDB) + /* + * Call the kernel debugger to see if this NMI is due + * to an KDB requested IPI. If so, kdb will handle it. + */ + if (kdb_ipi((kdb_eframe_t)regs, do_ack_apic_irq)) { + return; + } +#endif if (!(reason & 0xc0)) { #if CONFIG_X86_LOCAL_APIC /* @@ -482,6 +666,15 @@ __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); +#if defined(CONFIG_KDB) + /* + * The callout functions will return 'true' if they've handled + * the callout condition. + */ + if (callout_dbfunc(&dblist_debug, regs, error_code, SIGTRAP, NULL)) + return; +#endif + /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg[7]) @@ -541,6 +734,16 @@ return; } +#if defined(CONFIG_KDB) +asmlinkage void do_int3(struct pt_regs * regs, long error_code) +{ + if (callout_dbfunc(&dblist_int3, regs, error_code, SIGTRAP, NULL)) + return; + + do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); +} +#endif + /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous @@ -697,6 +900,11 @@ */ asmlinkage void math_state_restore(struct pt_regs regs) { + /* + * CONFIG_PREEMPT + * Must be called with preemption disabled + */ + __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ if (current->used_math) { @@ -934,7 +1142,17 @@ set_trap_gate(11,&segment_not_present); set_trap_gate(12,&stack_segment); set_trap_gate(13,&general_protection); +#if defined(CONFIG_KDB) + if (test_bit(X86_FEATURE_MCE, &boot_cpu_data.x86_capability) && + test_bit(X86_FEATURE_MCA, &boot_cpu_data.x86_capability)) { + set_intr_gate(14,&page_fault_mca); + } + else { + set_intr_gate(14,&page_fault); + } +#else set_intr_gate(14,&page_fault); +#endif set_trap_gate(15,&spurious_interrupt_bug); set_trap_gate(16,&coprocessor_error); set_trap_gate(17,&alignment_check); @@ -942,6 +1160,17 @@ set_trap_gate(19,&simd_coprocessor_error); set_system_gate(SYSCALL_VECTOR,&system_call); +#if defined(CONFIG_KDB) + { + set_trap_gate(18, &machine_check); + } + kdb_enablehwfault(); + /* + * A trap gate, used by the kernel to enter the + * debugger, preserving all registers. + */ + set_trap_gate(KDBENTER_VECTOR, &kdb_call); +#endif /* CONFIG_KDB */ /* * default LDT is a single-entry callgate to lcall7 for iBCS @@ -959,5 +1188,11 @@ superio_init(); lithium_init(); cobalt_init(); +#endif + +#if defined(CONFIG_KDB) + register_die(kdba_callback_die); + register_int3(kdba_callback_bp); + register_debug(kdba_callback_debug); #endif } diff -urN linux-2.4.17-rc2-virgin/arch/i386/lib/dec_and_lock.c linux-2.4.17-rc2-wli2/arch/i386/lib/dec_and_lock.c --- linux-2.4.17-rc2-virgin/arch/i386/lib/dec_and_lock.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/lib/dec_and_lock.c Tue Dec 18 22:28:41 2001 @@ -8,6 +8,7 @@ */ #include +#include #include int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) diff -urN linux-2.4.17-rc2-virgin/arch/i386/vmlinux.lds linux-2.4.17-rc2-wli2/arch/i386/vmlinux.lds --- linux-2.4.17-rc2-virgin/arch/i386/vmlinux.lds Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/i386/vmlinux.lds Tue Dec 18 22:21:49 2001 @@ -29,6 +29,10 @@ __ksymtab : { *(__ksymtab) } __stop___ksymtab = .; + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + .data : { /* Data */ *(.data) CONSTRUCTORS diff -urN linux-2.4.17-rc2-virgin/arch/ia64/config.in linux-2.4.17-rc2-wli2/arch/ia64/config.in --- linux-2.4.17-rc2-virgin/arch/ia64/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/ia64/config.in Tue Dec 18 22:28:41 2001 @@ -94,6 +94,10 @@ define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore. bool 'SMP support' CONFIG_SMP +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi tristate 'Support running of Linux/x86 binaries' CONFIG_IA32_SUPPORT bool 'Performance monitor support' CONFIG_PERFMON tristate '/proc/pal support' CONFIG_IA64_PALINFO diff -urN linux-2.4.17-rc2-virgin/arch/m68k/config.in linux-2.4.17-rc2-wli2/arch/m68k/config.in --- linux-2.4.17-rc2-virgin/arch/m68k/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/m68k/config.in Tue Dec 18 22:28:41 2001 @@ -84,6 +84,10 @@ bool 'Use write-through caching for 68060 supervisor accesses' CONFIG_060_WRITETHROUGH fi fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu mainmenu_option next_comment diff -urN linux-2.4.17-rc2-virgin/arch/m68k/vmlinux-sun3.lds linux-2.4.17-rc2-wli2/arch/m68k/vmlinux-sun3.lds --- linux-2.4.17-rc2-virgin/arch/m68k/vmlinux-sun3.lds Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/m68k/vmlinux-sun3.lds Tue Dec 18 22:21:49 2001 @@ -30,6 +30,9 @@ __start___ksymtab = .; /* Kernel symbol table */ *(__ksymtab) __stop___ksymtab = .; + __start___kallsyms = .; /* All kernel symbols */ + *(__kallsyms) + __stop___kallsyms = .; } /* End of data goes *here* so that freeing init code works properly. */ _edata = .; diff -urN linux-2.4.17-rc2-virgin/arch/m68k/vmlinux.lds linux-2.4.17-rc2-wli2/arch/m68k/vmlinux.lds --- linux-2.4.17-rc2-virgin/arch/m68k/vmlinux.lds Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/m68k/vmlinux.lds Tue Dec 18 22:21:49 2001 @@ -24,6 +24,10 @@ __ksymtab : { *(__ksymtab) } __stop___ksymtab = .; + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + _etext = .; /* End of text section */ .data : { /* Data */ diff -urN linux-2.4.17-rc2-virgin/arch/mips/config.in linux-2.4.17-rc2-wli2/arch/mips/config.in --- linux-2.4.17-rc2-virgin/arch/mips/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/mips/config.in Tue Dec 18 22:28:41 2001 @@ -275,6 +275,10 @@ fi fi fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu mainmenu_option next_comment diff -urN linux-2.4.17-rc2-virgin/arch/mips64/config.in linux-2.4.17-rc2-wli2/arch/mips64/config.in --- linux-2.4.17-rc2-virgin/arch/mips64/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/mips64/config.in Tue Dec 18 22:28:41 2001 @@ -25,6 +25,10 @@ bool ' Multi-Processing support' CONFIG_SMP #bool ' IP27 XXL' CONFIG_SGI_SN0_XXL fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y diff -urN linux-2.4.17-rc2-virgin/arch/parisc/config.in linux-2.4.17-rc2-wli2/arch/parisc/config.in --- linux-2.4.17-rc2-virgin/arch/parisc/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/parisc/config.in Tue Dec 18 22:28:41 2001 @@ -45,6 +45,10 @@ # # if [ "$CONFIG_PCI_EPIC" = "y" ]; then... # +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu diff -urN linux-2.4.17-rc2-virgin/arch/ppc/config.in linux-2.4.17-rc2-wli2/arch/ppc/config.in --- linux-2.4.17-rc2-virgin/arch/ppc/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/ppc/config.in Tue Dec 18 22:28:41 2001 @@ -108,6 +108,10 @@ if [ "$CONFIG_SMP" = "y" ]; then bool ' Distribute interrupts on all CPUs by default' CONFIG_IRQ_ALL_CPUS fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi if [ "$CONFIG_6xx" = "y" -a "$CONFIG_8260" = "n" ];then bool 'AltiVec Support' CONFIG_ALTIVEC diff -urN linux-2.4.17-rc2-virgin/arch/ppc/kernel/setup.c linux-2.4.17-rc2-wli2/arch/ppc/kernel/setup.c --- linux-2.4.17-rc2-virgin/arch/ppc/kernel/setup.c Wed Nov 21 09:59:11 2001 +++ linux-2.4.17-rc2-wli2/arch/ppc/kernel/setup.c Thu Dec 20 19:49:13 2001 @@ -689,8 +689,12 @@ id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues); id->word92 = __le16_to_cpu(id->word92); id->hw_config = __le16_to_cpu(id->hw_config); - for (i = 0; i < 32; i++) - id->words94_125[i] = __le16_to_cpu(id->words94_125[i]); + id->acoustic = __le16_to_cpu(id->acoustic); + for (i = 0; i < 5; i++) + id->words95_99[i] = __le16_to_cpu(id->words95_99[i]); + id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2); + for (i = 0; i < 22; i++) + id->words104_125[i] = __le16_to_cpu(id->words104_125[i]); id->last_lun = __le16_to_cpu(id->last_lun); id->word127 = __le16_to_cpu(id->word127); id->dlf = __le16_to_cpu(id->dlf); @@ -700,6 +704,12 @@ id->word156 = __le16_to_cpu(id->word156); for (i = 0; i < 3; i++) id->words157_159[i] = __le16_to_cpu(id->words157_159[i]); - for (i = 0; i < 96; i++) - id->words160_255[i] = __le16_to_cpu(id->words160_255[i]); + id->cfa_power = __le16_to_cpu(id->cfa_power); + for (i = 0; i < 14; i++) + id->words161_175[i] = __le16_to_cpu(id->words161_175[i]); + for (i = 0; i < 31; i++) + id->words176_205[i] = __le16_to_cpu(id->words176_205[i]); + for (i = 0; i < 48; i++) + id->words206_254[i] = __le16_to_cpu(id->words206_254[i]); + id->integrity_word = __le16_to_cpu(id->integrity_word); } diff -urN linux-2.4.17-rc2-virgin/arch/ppc/vmlinux.lds linux-2.4.17-rc2-wli2/arch/ppc/vmlinux.lds --- linux-2.4.17-rc2-virgin/arch/ppc/vmlinux.lds Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/ppc/vmlinux.lds Tue Dec 18 22:21:49 2001 @@ -70,6 +70,10 @@ __ksymtab : { *(__ksymtab) } __stop___ksymtab = .; + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + __start___ftr_fixup = .; __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; diff -urN linux-2.4.17-rc2-virgin/arch/s390/config.in linux-2.4.17-rc2-wli2/arch/s390/config.in --- linux-2.4.17-rc2-virgin/arch/s390/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/s390/config.in Tue Dec 18 22:28:41 2001 @@ -32,6 +32,10 @@ comment 'Processor type and features' bool 'Symmetric multi-processing support' CONFIG_SMP bool 'IEEE FPU emulation' CONFIG_MATHEMU +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu mainmenu_option next_comment diff -urN linux-2.4.17-rc2-virgin/arch/s390x/config.in linux-2.4.17-rc2-wli2/arch/s390x/config.in --- linux-2.4.17-rc2-virgin/arch/s390x/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/s390x/config.in Tue Dec 18 22:28:41 2001 @@ -26,6 +26,10 @@ if [ "$CONFIG_S390_SUPPORT" = "y" ]; then tristate 'Kernel support for 31 bit ELF binaries' CONFIG_BINFMT_ELF32 fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu mainmenu_option next_comment diff -urN linux-2.4.17-rc2-virgin/arch/sh/config.in linux-2.4.17-rc2-wli2/arch/sh/config.in --- linux-2.4.17-rc2-virgin/arch/sh/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/sh/config.in Tue Dec 18 22:28:41 2001 @@ -22,6 +22,10 @@ bool ' Set version information on all module symbols' CONFIG_MODVERSIONS bool ' Kernel module loader' CONFIG_KMOD fi +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi endmenu mainmenu_option next_comment @@ -124,6 +128,8 @@ hex 'Physical memory start address' CONFIG_MEMORY_START 08000000 hex 'Physical memory size' CONFIG_MEMORY_SIZE 00400000 fi +# Preemptible kernel feature +bool 'Preemptible Kernel' CONFIG_PREEMPT endmenu if [ "$CONFIG_SH_HP690" = "y" ]; then diff -urN linux-2.4.17-rc2-virgin/arch/sh/kernel/entry.S linux-2.4.17-rc2-wli2/arch/sh/kernel/entry.S --- linux-2.4.17-rc2-virgin/arch/sh/kernel/entry.S Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/sh/kernel/entry.S Tue Dec 18 22:28:41 2001 @@ -60,10 +60,18 @@ /* * These are offsets into the task-struct. */ -flags = 4 +preempt_count = 4 sigpending = 8 need_resched = 20 tsk_ptrace = 24 +flags = 84 + +/* + * And these offsets are into irq_stat. + * (Find irq_cpustat_t in asm-sh/hardirq.h) + */ +local_irq_count = 8 +local_bh_count = 12 PT_TRACESYS = 0x00000002 PF_USEDFPU = 0x00100000 @@ -143,7 +151,7 @@ mov.l __INV_IMASK, r11; \ stc sr, r10; \ and r11, r10; \ - stc k_g_imask, r11; \ + stc k_g_imask, r11; \ or r11, r10; \ ldc r10, sr @@ -304,8 +312,8 @@ mov.l @(tsk_ptrace,r0), r0 ! Is current PTRACE_SYSCALL'd? mov #PT_TRACESYS, r1 tst r1, r0 - bt ret_from_syscall - bra syscall_ret_trace + bf syscall_ret_trace + bra ret_from_syscall nop .align 2 @@ -505,8 +513,6 @@ .long syscall_ret_trace __syscall_ret: .long syscall_ret -__INV_IMASK: - .long 0xffffff0f ! ~(IMASK) .align 2 @@ -518,7 +524,84 @@ .align 2 1: .long SYMBOL_NAME(schedule) +#ifdef CONFIG_PREEMPT + ! + ! Returning from interrupt during kernel mode: check if + ! preempt_schedule should be called. If need_resched flag + ! is set, preempt_count is zero, and we're not currently + ! in an interrupt handler (local irq or bottom half) then + ! call preempt_schedule. + ! + ! Increment preempt_count to prevent a nested interrupt + ! from reentering preempt_schedule, then decrement after + ! and drop through to regular interrupt return which will + ! jump back and check again in case such an interrupt did + ! come in (and didn't preempt due to preempt_count). + ! + ! NOTE: because we just checked that preempt_count was + ! zero before getting to the call, can't we use immediate + ! values (1 and 0) rather than inc/dec? Also, rather than + ! drop through to ret_from_irq, we already know this thread + ! is kernel mode, can't we go direct to ret_from_kirq? In + ! fact, with proper interrupt nesting and so forth could + ! the loop simply be on the need_resched w/o checking the + ! other stuff again? Optimize later... + ! + .align 2 +ret_from_kirq: + ! Nonzero preempt_count prevents scheduling + stc k_current, r1 + mov.l @(preempt_count,r1), r0 + cmp/eq #0, r0 + bf restore_all + ! Zero need_resched prevents scheduling + mov.l @(need_resched,r1), r0 + cmp/eq #0, r0 + bt restore_all + ! If in_interrupt(), don't schedule + mov.l __irq_stat, r1 + mov.l @(local_irq_count,r1), r0 + mov.l @(local_bh_count,r1), r1 + or r1, r0 + cmp/eq #0, r0 + bf restore_all + ! Allow scheduling using preempt_schedule + ! Adjust preempt_count and SR as needed. + stc k_current, r1 + mov.l @(preempt_count,r1), r0 ! Could replace this ... + add #1, r0 ! ... and this w/mov #1? + mov.l r0, @(preempt_count,r1) + STI() + mov.l __preempt_schedule, r0 + jsr @r0 + nop + /* CLI */ + stc sr, r0 + or #0xf0, r0 + ldc r0, sr + ! + stc k_current, r1 + mov.l @(preempt_count,r1), r0 ! Could replace this ... + add #-1, r0 ! ... and this w/mov #0? + mov.l r0, @(preempt_count,r1) + ! Maybe should bra ret_from_kirq, or loop over need_resched? + ! For now, fall through to ret_from_irq again... +#endif /* CONFIG_PREEMPT */ + ret_from_irq: + mov #OFF_SR, r0 + mov.l @(r0,r15), r0 ! get status register + shll r0 + shll r0 ! kernel space? +#ifndef CONFIG_PREEMPT + bt restore_all ! Yes, it's from kernel, go back soon +#else /* CONFIG_PREEMPT */ + bt ret_from_kirq ! From kernel: maybe preempt_schedule +#endif /* CONFIG_PREEMPT */ + ! + bra ret_from_syscall + nop + ret_from_exception: mov #OFF_SR, r0 mov.l @(r0,r15), r0 ! get status register @@ -564,6 +647,13 @@ .long SYMBOL_NAME(do_signal) __irq_stat: .long SYMBOL_NAME(irq_stat) +#ifdef CONFIG_PREEMPT +__preempt_schedule: + .long SYMBOL_NAME(preempt_schedule) +#endif /* CONFIG_PREEMPT */ +__INV_IMASK: + .long 0xffffff0f ! ~(IMASK) + .align 2 restore_all: @@ -679,7 +769,7 @@ __fpu_prepare_fd: .long SYMBOL_NAME(fpu_prepare_fd) __init_task_flags: - .long SYMBOL_NAME(init_task_union)+4 + .long SYMBOL_NAME(init_task_union)+flags __PF_USEDFPU: .long PF_USEDFPU #endif diff -urN linux-2.4.17-rc2-virgin/arch/sh/kernel/irq.c linux-2.4.17-rc2-wli2/arch/sh/kernel/irq.c --- linux-2.4.17-rc2-virgin/arch/sh/kernel/irq.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/sh/kernel/irq.c Tue Dec 18 22:28:41 2001 @@ -229,6 +229,14 @@ struct irqaction * action; unsigned int status; + /* + * At this point we're now about to actually call handlers, + * and interrupts might get reenabled during them... bump + * preempt_count to prevent any preemption while the handler + * called here is pending... + */ + preempt_disable(); + /* Get IRQ number */ asm volatile("stc r2_bank, %0\n\t" "shlr2 %0\n\t" @@ -298,8 +306,17 @@ desc->handler->end(irq); spin_unlock(&desc->lock); + if (softirq_pending(cpu)) do_softirq(); + + /* + * We're done with the handlers, interrupts should be + * currently disabled; decrement preempt_count now so + * as we return preemption may be allowed... + */ + preempt_enable_no_resched(); + return 1; } diff -urN linux-2.4.17-rc2-virgin/arch/sparc/config.in linux-2.4.17-rc2-wli2/arch/sparc/config.in --- linux-2.4.17-rc2-virgin/arch/sparc/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/sparc/config.in Tue Dec 18 22:28:41 2001 @@ -28,6 +28,10 @@ define_bool CONFIG_VT_CONSOLE y bool 'Symmetric multi-processing support (does not work on sun4/sun4c)' CONFIG_SMP +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi # Identify this as a Sparc32 build define_bool CONFIG_SPARC32 y diff -urN linux-2.4.17-rc2-virgin/arch/sparc64/config.in linux-2.4.17-rc2-wli2/arch/sparc64/config.in --- linux-2.4.17-rc2-virgin/arch/sparc64/config.in Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/arch/sparc64/config.in Tue Dec 18 22:28:41 2001 @@ -27,6 +27,10 @@ define_bool CONFIG_VT_CONSOLE y bool 'Symmetric multi-processing support' CONFIG_SMP +bool 'Real Time Scheduler' CONFIG_RTSCHED +if [ "$CONFIG_RTSCHED" = "y" ]; then + int 'Maximum Priority?' CONFIG_MAX_PRI 127 +fi # Identify this as a Sparc64 build define_bool CONFIG_SPARC64 y diff -urN linux-2.4.17-rc2-virgin/drivers/block/elevator.c linux-2.4.17-rc2-wli2/drivers/block/elevator.c --- linux-2.4.17-rc2-virgin/drivers/block/elevator.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/block/elevator.c Tue Dec 18 22:28:42 2001 @@ -74,11 +74,10 @@ return 0; } - int elevator_linus_merge(request_queue_t *q, struct request **req, struct list_head * head, struct buffer_head *bh, int rw, - int max_sectors) + int max_sectors, int max_bomb_segments) { struct list_head *entry = &q->queue_head; unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; @@ -116,6 +115,56 @@ } } + /* + * If we failed to merge a read anywhere in the request + * queue, we really don't want to place it at the end + * of the list, behind lots of writes. So place it near + * the front. + * + * We don't want to place it in front of _all_ writes: that + * would create lots of seeking, and isn't tunable. + * We try to avoid promoting this read in front of existing + * reads. + * + * max_bomb_sectors becomes the maximum number of write + * requests which we allow to remain in place in front of + * a newly introduced read. We weight things a little bit, + * so large writes are more expensive than small ones, but it's + * requests which count, not sectors. + */ + if (max_bomb_segments && rw == READ && ret == ELEVATOR_NO_MERGE) { + int cur_latency = 0; + struct request * const cur_request = *req; + + entry = head->next; + while (entry != &q->queue_head) { + struct request *__rq; + + if (entry == &q->queue_head) + BUG(); + if (entry == q->queue_head.next && + q->head_active && !q->plugged) + BUG(); + __rq = blkdev_entry_to_request(entry); + + if (__rq == cur_request) { + /* + * This is where the old algorithm placed it. + * There's no point pushing it further back, + * so leave it here, in sorted order. + */ + break; + } + if (__rq->cmd == WRITE) { + cur_latency += 1 + __rq->nr_sectors / 64; + if (cur_latency >= max_bomb_segments) { + *req = __rq; + break; + } + } + entry = entry->next; + } + } return ret; } @@ -144,7 +193,7 @@ int elevator_noop_merge(request_queue_t *q, struct request **req, struct list_head * head, struct buffer_head *bh, int rw, - int max_sectors) + int max_sectors, int max_bomb_segments) { struct list_head *entry; unsigned int count = bh->b_size >> 9; @@ -188,7 +237,7 @@ output.queue_ID = elevator->queue_ID; output.read_latency = elevator->read_latency; output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + output.max_bomb_segments = elevator->max_bomb_segments; if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) return -EFAULT; @@ -207,9 +256,12 @@ return -EINVAL; if (input.write_latency < 0) return -EINVAL; + if (input.max_bomb_segments < 0) + return -EINVAL; elevator->read_latency = input.read_latency; elevator->write_latency = input.write_latency; + elevator->max_bomb_segments = input.max_bomb_segments; return 0; } diff -urN linux-2.4.17-rc2-virgin/drivers/block/ll_rw_blk.c linux-2.4.17-rc2-wli2/drivers/block/ll_rw_blk.c --- linux-2.4.17-rc2-virgin/drivers/block/ll_rw_blk.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/block/ll_rw_blk.c Tue Dec 18 22:28:42 2001 @@ -690,7 +690,8 @@ } else if (q->head_active && !q->plugged) head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + el_ret = elevator->elevator_merge_fn(q, &req, head, bh, + rw, max_sectors, elevator->max_bomb_segments); switch (el_ret) { case ELEVATOR_BACK_MERGE: diff -urN linux-2.4.17-rc2-virgin/drivers/char/keyboard.c linux-2.4.17-rc2-wli2/drivers/char/keyboard.c --- linux-2.4.17-rc2-virgin/drivers/char/keyboard.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/char/keyboard.c Tue Dec 18 22:21:49 2001 @@ -42,6 +42,9 @@ #include #include #include +#if defined(CONFIG_KDB) +#include +#endif #define SIZE(x) (sizeof(x)/sizeof((x)[0])) @@ -245,6 +248,13 @@ up_flag = kbd_unexpected_up(keycode); } else rep = test_and_set_bit(keycode, key_down); + +#if defined(CONFIG_KDB) + if (!up_flag && (keycode == E1_PAUSE) && kdb_on) { + kdb(KDB_REASON_KEYBOARD, 0, kbd_pt_regs); + return; + } +#endif /* CONFIG_KDB */ #ifdef CONFIG_MAGIC_SYSRQ /* Handle the SysRq Hack */ if (keycode == SYSRQ_KEY) { diff -urN linux-2.4.17-rc2-virgin/drivers/char/mem.c linux-2.4.17-rc2-wli2/drivers/char/mem.c --- linux-2.4.17-rc2-virgin/drivers/char/mem.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/char/mem.c Tue Dec 18 22:28:42 2001 @@ -272,8 +272,6 @@ return virtr + read; } -extern long vwrite(char *buf, char *addr, unsigned long count); - /* * This function writes to the *virtual* memory as seen by the kernel. */ @@ -281,46 +279,12 @@ size_t count, loff_t *ppos) { unsigned long p = *ppos; - ssize_t wrote = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ - - if (p < (unsigned long) high_memory) { - wrote = count; - if (count > (unsigned long) high_memory - p) - wrote = (unsigned long) high_memory - p; - - wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos); - - p += wrote; - buf += wrote; - count -= wrote; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - int len = count; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - if (len && copy_from_user(kbuf, buf, len)) { - free_page((unsigned long)kbuf); - return -EFAULT; - } - len = vwrite(kbuf, (char *)p, len); - count -= len; - buf += len; - virtr += len; - p += len; - } - free_page((unsigned long)kbuf); - } - *ppos = p; - return virtr + wrote; + if (p >= (unsigned long) high_memory) + return 0; + if (count > (unsigned long) high_memory - p) + count = (unsigned long) high_memory - p; + return do_write_mem(file, (void*)p, p, buf, count, ppos); } #if !defined(__mc68000__) @@ -400,7 +364,7 @@ if (count > size) count = size; - zap_page_range(mm, addr, count); + zap_page_range(mm, addr, count, ZPR_NORMAL); zeromap_page_range(addr, count, PAGE_COPY); size -= count; diff -urN linux-2.4.17-rc2-virgin/drivers/char/serial.c linux-2.4.17-rc2-wli2/drivers/char/serial.c --- linux-2.4.17-rc2-virgin/drivers/char/serial.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/char/serial.c Tue Dec 18 22:21:49 2001 @@ -218,6 +218,29 @@ #include #endif +#if defined(CONFIG_KDB) +#include +/* + * kdb_serial_line records the serial line number of the first serial console. + * NOTE: The kernel ignores characters on the serial line unless a user space + * program has opened the line first. To enter kdb before user space has opened + * the serial line, you can use the 'kdb=early' flag to lilo and set the + * appropriate breakpoints. + * + * kdb_serial_str[] is the sequence that the user must enter on the serial + * console to invoke kdb. It can be a single character such as "\001" + * (control-A) or multiple characters such as "\eKdB". NOTE: All except the + * last character are passed through to the application reading from the serial + * console. + * + * I tried to make the sequence a CONFIG_ option but most of CML1 cannot cope + * with '\' in strings, CML2 should be able to do it. KAO. + */ + +static int kdb_serial_line = -1; +static char kdb_serial_str[] = "\001"; +static char *kdb_serial_ptr = kdb_serial_str; +#endif /* CONFIG_KDB */ /* * All of the compatibilty code so we can compile serial.c against * older kernels is hidden in serial_compat.h @@ -580,6 +603,18 @@ return; // if TTY_DONT_FLIP is set } ch = serial_inp(info, UART_RX); +#if defined(CONFIG_KDB) + if ((info->line == kdb_serial_line) && kdb_on) { + if (ch == *kdb_serial_ptr) { + if (!(*++kdb_serial_ptr)) { + kdb(KDB_REASON_KEYBOARD, 0, (kdb_eframe_t)regs); + kdb_serial_ptr = kdb_serial_str; + break; + } + } else + kdb_serial_ptr = kdb_serial_str; + } +#endif /* CONFIG_KDB */ *tty->flip.char_buf_ptr = ch; icount->rx++; @@ -5982,6 +6017,17 @@ */ if (serial_in(info, UART_LSR) == 0xff) return -1; + +#if defined(CONFIG_KDB) + /* + * Remember the line number of the first serial + * console. We'll make this the kdb serial console too. + */ + if (kdb_serial_line == -1) { + kdb_serial_line = co->index; + kdb_port = state->port; + } +#endif /* CONFIG_KDB */ return 0; } diff -urN linux-2.4.17-rc2-virgin/drivers/char/tty_io.c linux-2.4.17-rc2-wli2/drivers/char/tty_io.c --- linux-2.4.17-rc2-virgin/drivers/char/tty_io.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/char/tty_io.c Tue Dec 18 22:28:42 2001 @@ -722,6 +722,7 @@ ret = -ERESTARTSYS; if (signal_pending(current)) break; + debug_lock_break(551); if (current->need_resched) schedule(); } diff -urN linux-2.4.17-rc2-virgin/drivers/ide/Config.in linux-2.4.17-rc2-wli2/drivers/ide/Config.in --- linux-2.4.17-rc2-virgin/drivers/ide/Config.in Mon Oct 8 11:40:13 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/Config.in Thu Dec 20 19:49:13 2001 @@ -14,6 +14,7 @@ dep_tristate ' Include IDE/ATA-2 DISK support' CONFIG_BLK_DEV_IDEDISK $CONFIG_BLK_DEV_IDE dep_mbool ' Use multi-mode by default' CONFIG_IDEDISK_MULTI_MODE $CONFIG_BLK_DEV_IDEDISK + dep_mbool ' Auto-Geometry Resizing support' CONFIG_IDEDISK_STROKE $CONFIG_BLK_DEV_IDEDISK define_bool CONFIG_BLK_DEV_IDEDISK_VENDOR n dep_mbool ' Fujitsu Vendor Specific' CONFIG_BLK_DEV_IDEDISK_FUJITSU $CONFIG_BLK_DEV_IDEDISK_VENDOR @@ -32,6 +33,9 @@ dep_tristate ' Include IDE/ATAPI FLOPPY support' CONFIG_BLK_DEV_IDEFLOPPY $CONFIG_BLK_DEV_IDE dep_tristate ' SCSI emulation support' CONFIG_BLK_DEV_IDESCSI $CONFIG_BLK_DEV_IDE $CONFIG_SCSI + bool ' IDE Taskfile Access' CONFIG_IDE_TASK_IOCTL + bool ' IDE Taskfile IO' CONFIG_IDE_TASKFILE_IO + comment 'IDE chipset support/bugfixes' if [ "$CONFIG_BLK_DEV_IDE" != "n" ]; then dep_bool ' CMD640 chipset bugfix/support' CONFIG_BLK_DEV_CMD640 $CONFIG_X86 @@ -43,14 +47,17 @@ if [ "$CONFIG_BLK_DEV_IDEPCI" = "y" ]; then bool ' Sharing PCI IDE interrupts support' CONFIG_IDEPCI_SHARE_IRQ bool ' Generic PCI bus-master DMA support' CONFIG_BLK_DEV_IDEDMA_PCI -# bool ' Asynchronous DMA support (EXPERIMENTAL)' CONFIG_BLK_DEV_ADMA $CONFIG_BLK_DEV_IDEDMA_PCI - define_bool CONFIG_BLK_DEV_ADMA $CONFIG_BLK_DEV_IDEDMA_PCI bool ' Boot off-board chipsets first support' CONFIG_BLK_DEV_OFFBOARD + dep_bool ' Force enable legacy 2.0.X HOSTS to use DMA' CONFIG_BLK_DEV_IDEDMA_FORCED $CONFIG_BLK_DEV_IDEDMA_PCI dep_bool ' Use PCI DMA by default when available' CONFIG_IDEDMA_PCI_AUTO $CONFIG_BLK_DEV_IDEDMA_PCI + dep_bool ' Enable DMA only for disks ' CONFIG_IDEDMA_ONLYDISK $CONFIG_IDEDMA_PCI_AUTO define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_PCI dep_bool ' ATA Work(s) In Progress (EXPERIMENTAL)' CONFIG_IDEDMA_PCI_WIP $CONFIG_BLK_DEV_IDEDMA_PCI $CONFIG_EXPERIMENTAL -# dep_bool ' Attempt to HACK around Chipsets that TIMEOUT (WIP)' CONFIG_BLK_DEV_IDEDMA_TIMEOUT $CONFIG_IDEDMA_PCI_WIP + dep_bool ' Attempt to HACK around Chipsets that TIMEOUT (WIP)' CONFIG_BLK_DEV_IDEDMA_TIMEOUT $CONFIG_IDEDMA_PCI_WIP dep_bool ' Good-Bad DMA Model-Firmware (WIP)' CONFIG_IDEDMA_NEW_DRIVE_LISTINGS $CONFIG_IDEDMA_PCI_WIP +# dep_bool ' Asynchronous DMA support (WIP) (EXPERIMENTAL)' CONFIG_BLK_DEV_ADMA $CONFIG_BLK_DEV_IDEDMA_PCI $CONFIG_IDEDMA_PCI_WIP + define_bool CONFIG_BLK_DEV_ADMA $CONFIG_BLK_DEV_IDEDMA_PCI +# dep_bool ' Tag Command Queue DMA support (WIP) (EXPERIMENTAL)' CONFIG_BLK_DEV_IDEDMA_TCQ $CONFIG_BLK_DEV_IDEDMA_PCI $CONFIG_IDEDMA_PCI_WIP dep_bool ' AEC62XX chipset support' CONFIG_BLK_DEV_AEC62XX $CONFIG_BLK_DEV_IDEDMA_PCI dep_mbool ' AEC62XX Tuning support' CONFIG_AEC62XX_TUNING $CONFIG_BLK_DEV_AEC62XX @@ -59,6 +66,7 @@ dep_bool ' AMD Viper support' CONFIG_BLK_DEV_AMD74XX $CONFIG_BLK_DEV_IDEDMA_PCI dep_mbool ' AMD Viper ATA-66 Override (WIP)' CONFIG_AMD74XX_OVERRIDE $CONFIG_BLK_DEV_AMD74XX $CONFIG_IDEDMA_PCI_WIP dep_bool ' CMD64X chipset support' CONFIG_BLK_DEV_CMD64X $CONFIG_BLK_DEV_IDEDMA_PCI + dep_bool ' CMD680 chipset tuning support' CONFIG_BLK_DEV_CMD680 $CONFIG_BLK_DEV_CMD64X dep_bool ' CY82C693 chipset support' CONFIG_BLK_DEV_CY82C693 $CONFIG_BLK_DEV_IDEDMA_PCI dep_bool ' Cyrix CS5530 MediaGX chipset support' CONFIG_BLK_DEV_CS5530 $CONFIG_BLK_DEV_IDEDMA_PCI dep_bool ' HPT34X chipset support' CONFIG_BLK_DEV_HPT34X $CONFIG_BLK_DEV_IDEDMA_PCI @@ -74,7 +82,8 @@ fi dep_bool ' NS87415 chipset support (EXPERIMENTAL)' CONFIG_BLK_DEV_NS87415 $CONFIG_BLK_DEV_IDEDMA_PCI dep_bool ' OPTi 82C621 chipset enhanced support (EXPERIMENTAL)' CONFIG_BLK_DEV_OPTI621 $CONFIG_EXPERIMENTAL - dep_bool ' PROMISE PDC202{46|62|65|67|68} support' CONFIG_BLK_DEV_PDC202XX $CONFIG_BLK_DEV_IDEDMA_PCI + dep_mbool ' Pacific Digital A-DMA support (EXPERIMENTAL)' CONFIG_BLK_DEV_PDC_ADMA $CONFIG_BLK_DEV_ADMA $CONFIG_IDEDMA_PCI_WIP + dep_bool ' PROMISE PDC202{46|62|65|67|68|69|70} support' CONFIG_BLK_DEV_PDC202XX $CONFIG_BLK_DEV_IDEDMA_PCI dep_bool ' Special UDMA Feature' CONFIG_PDC202XX_BURST $CONFIG_BLK_DEV_PDC202XX dep_bool ' Special FastTrak Feature' CONFIG_PDC202XX_FORCE $CONFIG_BLK_DEV_PDC202XX dep_bool ' ServerWorks OSB4/CSB5 chipsets support' CONFIG_BLK_DEV_SVWKS $CONFIG_BLK_DEV_IDEDMA_PCI $CONFIG_X86 @@ -84,8 +93,6 @@ dep_bool ' VIA82CXXX chipset support' CONFIG_BLK_DEV_VIA82CXXX $CONFIG_BLK_DEV_IDEDMA_PCI fi -# dep_mbool ' Pacific Digital A-DMA support (EXPERIMENTAL)' CONFIG_BLK_DEV_PDC_ADMA $CONFIG_BLK_DEV_ADMA $CONFIG_IDEDMA_PCI_WIP - if [ "$CONFIG_PPC" = "y" -o "$CONFIG_ARM" = "y" ]; then bool ' Winbond SL82c105 support' CONFIG_BLK_DEV_SL82C105 fi @@ -149,6 +156,8 @@ bool ' UMC-8672 support' CONFIG_BLK_DEV_UMC8672 fi fi + + bool ' Use the NOOP Elevator (WARNING)' CONFIG_BLK_DEV_ELEVATOR_NOOP else bool 'Old hard disk (MFM/RLL/IDE) driver' CONFIG_BLK_DEV_HD_ONLY define_bool CONFIG_BLK_DEV_HD $CONFIG_BLK_DEV_HD_ONLY @@ -173,6 +182,7 @@ else define_bool CONFIG_DMA_NONPCI n fi + if [ "$CONFIG_IDE_CHIPSETS" = "y" -o \ "$CONFIG_BLK_DEV_AEC62XX" = "y" -o \ "$CONFIG_BLK_DEV_ALI15X3" = "y" -o \ diff -urN linux-2.4.17-rc2-virgin/drivers/ide/Makefile linux-2.4.17-rc2-wli2/drivers/ide/Makefile --- linux-2.4.17-rc2-virgin/drivers/ide/Makefile Tue Oct 9 09:18:37 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/Makefile Thu Dec 20 19:49:13 2001 @@ -10,7 +10,7 @@ O_TARGET := idedriver.o -export-objs := ide.o ide-features.o ataraid.o +export-objs := ide.o ide-features.o ide-probe.o ide-taskfile.o ataraid.o list-multi := ide-mod.o ide-probe-mod.o obj-y := @@ -24,6 +24,7 @@ obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd.o obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy.o + obj-$(CONFIG_BLK_DEV_IT8172) += it8172.o ide-obj-$(CONFIG_BLK_DEV_AEC62XX) += aec62xx.o @@ -69,13 +70,13 @@ # The virtualised raid layers MUST come after the ide itself or bad stuff # will happen. -obj-$(CONFIG_BLK_DEV_ATARAID) += ataraid.o +obj-$(CONFIG_BLK_DEV_ATARAID) += ataraid.o obj-$(CONFIG_BLK_DEV_ATARAID_PDC) += pdcraid.o obj-$(CONFIG_BLK_DEV_ATARAID_HPT) += hptraid.o ide-obj-$(CONFIG_PROC_FS) += ide-proc.o -ide-mod-objs := ide.o ide-features.o $(ide-obj-y) +ide-mod-objs := ide.o ide-features.o ide-taskfile.o $(ide-obj-y) ide-probe-mod-objs := ide-probe.o ide-geometry.o include $(TOPDIR)/Rules.make diff -urN linux-2.4.17-rc2-virgin/drivers/ide/alim15x3.c linux-2.4.17-rc2-wli2/drivers/ide/alim15x3.c --- linux-2.4.17-rc2-virgin/drivers/ide/alim15x3.c Sun Jul 15 16:22:23 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/alim15x3.c Thu Dec 20 19:49:13 2001 @@ -453,7 +453,7 @@ } dma_func = ide_dma_off_quietly; if ((id->field_valid & 4) && (m5229_revision >= 0xC2)) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x003F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive, can_ultra_dma); if ((id->field_valid & 2) && diff -urN linux-2.4.17-rc2-virgin/drivers/ide/amd74xx.c linux-2.4.17-rc2-wli2/drivers/ide/amd74xx.c --- linux-2.4.17-rc2-virgin/drivers/ide/amd74xx.c Mon Aug 13 14:56:19 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/amd74xx.c Thu Dec 20 19:49:13 2001 @@ -75,7 +75,8 @@ { unsigned int class_rev; - if (dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) + if ((dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) || + (dev->device == PCI_DEVICE_ID_AMD_VIPER_7441)) return 0; pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); @@ -122,8 +123,8 @@ pci_read_config_byte(dev, 0x4c, &pio_timing); #ifdef DEBUG - printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x ", - drive->name, ultra_timing, dma_pio_timing, pio_timing); + printk("%s:%d: Speed 0x%02x UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, drive->dn, speed, ultra_timing, dma_pio_timing, pio_timing); #endif ultra_timing &= ~0xC7; @@ -131,22 +132,19 @@ pio_timing &= ~(0x03 << drive->dn); #ifdef DEBUG - printk(":: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x ", - ultra_timing, dma_pio_timing, pio_timing); + printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, ultra_timing, dma_pio_timing, pio_timing); #endif switch(speed) { #ifdef CONFIG_BLK_DEV_IDEDMA + case XFER_UDMA_7: + case XFER_UDMA_6: + speed = XFER_UDMA_5; case XFER_UDMA_5: -#undef __CAN_MODE_5 -#ifdef __CAN_MODE_5 ultra_timing |= 0x46; dma_pio_timing |= 0x20; break; -#else - printk("%s: setting to mode 4, driver problems in mode 5.\n", drive->name); - speed = XFER_UDMA_4; -#endif /* __CAN_MODE_5 */ case XFER_UDMA_4: ultra_timing |= 0x45; dma_pio_timing |= 0x20; @@ -164,7 +162,7 @@ dma_pio_timing |= 0x20; break; case XFER_UDMA_0: - ultra_timing |= 0x42; + ultra_timing |= 0x42; dma_pio_timing |= 0x20; break; case XFER_MW_DMA_2: @@ -222,8 +220,8 @@ pci_write_config_byte(dev, 0x4c, pio_timing); #ifdef DEBUG - printk(":: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", - ultra_timing, dma_pio_timing, pio_timing); + printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, ultra_timing, dma_pio_timing, pio_timing); #endif #ifdef CONFIG_BLK_DEV_IDEDMA @@ -303,11 +301,15 @@ struct pci_dev *dev = hwif->pci_dev; struct hd_driveid *id = drive->id; byte udma_66 = eighty_ninty_three(drive); - byte udma_100 = (dev->device==PCI_DEVICE_ID_AMD_VIPER_7411) ? 1 : 0; + byte udma_100 = ((dev->device==PCI_DEVICE_ID_AMD_VIPER_7411)|| + (dev->device==PCI_DEVICE_ID_AMD_VIPER_7441)) ? 1 : 0; byte speed = 0x00; int rval; - if ((id->dma_ultra & 0x0020) && (udma_66)&& (udma_100)) { + if (udma_100) + udma_66 = eighty_ninty_three(drive); + + if ((id->dma_ultra & 0x0020) && (udma_66) && (udma_100)) { speed = XFER_UDMA_5; } else if ((id->dma_ultra & 0x0010) && (udma_66)) { speed = XFER_UDMA_4; @@ -331,7 +333,7 @@ (void) amd74xx_tune_chipset(drive, speed); - rval = (int)( ((id->dma_ultra >> 11) & 3) ? ide_dma_on : + rval = (int)( ((id->dma_ultra >> 11) & 7) ? ide_dma_on : ((id->dma_ultra >> 8) & 7) ? ide_dma_on : ((id->dma_mword >> 8) & 7) ? ide_dma_on : ide_dma_off_quietly); @@ -352,7 +354,7 @@ } dma_func = ide_dma_off_quietly; if (id->field_valid & 4) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x003F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive); if ((id->field_valid & 2) && @@ -442,17 +444,43 @@ unsigned int __init ata66_amd74xx (ide_hwif_t *hwif) { + struct pci_dev *dev = hwif->pci_dev; + byte cable_80_pin[2] = { 0, 0 }; + byte ata66 = 0; + byte tmpbyte; + + /* + * Ultra66 cable detection (from Host View) + * 7411, 7441, 0x42, bit0: primary, bit2: secondary 80 pin + */ + pci_read_config_byte(dev, 0x42, &tmpbyte); + + /* + * 0x42, bit0 is 1 => primary channel + * has 80-pin (from host view) + */ + if (tmpbyte & 0x01) cable_80_pin[0] = 1; + + /* + * 0x42, bit2 is 1 => secondary channel + * has 80-pin (from host view) + */ + if (tmpbyte & 0x04) cable_80_pin[1] = 1; + + switch(dev->device) { + case PCI_DEVICE_ID_AMD_VIPER_7441: + case PCI_DEVICE_ID_AMD_VIPER_7411: + ata66 = (hwif->channel) ? + cable_80_pin[1] : + cable_80_pin[0]; + default: + break; + } #ifdef CONFIG_AMD74XX_OVERRIDE - byte ata66 = 1; + return(1); #else - byte ata66 = 0; + return (unsigned int) ata66; #endif /* CONFIG_AMD74XX_OVERRIDE */ - -#if 0 - pci_read_config_byte(hwif->pci_dev, 0x48, &ata66); - return ((ata66 & 0x02) ? 0 : 1); -#endif - return ata66; } void __init ide_init_amd74xx (ide_hwif_t *hwif) diff -urN linux-2.4.17-rc2-virgin/drivers/ide/cmd64x.c linux-2.4.17-rc2-wli2/drivers/ide/cmd64x.c --- linux-2.4.17-rc2-virgin/drivers/ide/cmd64x.c Thu Jul 27 16:40:57 2000 +++ linux-2.4.17-rc2-wli2/drivers/ide/cmd64x.c Thu Dec 20 19:49:13 2001 @@ -86,6 +86,7 @@ #include static int cmd64x_get_info(char *, char **, off_t, int); +static int cmd680_get_info(char *, char **, off_t, int); extern int (*cmd64x_display_info)(char *, char **, off_t, int); /* ide-proc.c */ extern char *ide_media_verbose(ide_drive_t *); static struct pci_dev *bmide_dev; @@ -180,24 +181,21 @@ return p-buffer; /* => must be less than 4k! */ } -#if 0 -static char * cmd64x_chipset_data (char *buf, struct pci_dev *dev) -{ - char *p = buf; - p += sprintf(p, "thingy stuff\n"); - return (char *)p; -} -static int __init cmd64x_get_info (char *buffer, char **addr, off_t offset, int count) +static int cmd680_get_info (char *buffer, char **addr, off_t offset, int count) { char *p = buffer; - p = cmd64x_chipset_data(buffer, bmide_dev); - return p-buffer; /* hoping it is less than 4K... */ + p += sprintf(p, "\n CMD680 Chipset.\n"); + p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n"); + p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n"); + p += sprintf(p, "PIO Mode: %s %s %s %s\n", + "?", "?", "?", "?"); + return p-buffer; /* => must be less than 4k! */ } -#endif #endif /* defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_PROC_FS) */ byte cmd64x_proc = 0; +byte cmd680_proc = 0; /* * Registers and masks for easy access by drive index: @@ -345,10 +343,58 @@ setup_count, active_count, recovery_count); } -static void config_chipset_for_pio (ide_drive_t *drive, byte set_speed) +static byte cmd680_taskfile_timing (ide_hwif_t *hwif) { - byte speed= 0x00; - byte set_pio= ide_get_best_pio_mode(drive, 4, 5, NULL); + struct pci_dev *dev = hwif->pci_dev; + byte addr_mask = (hwif->channel) ? 0xB2 : 0xA2; + unsigned short timing; + + pci_read_config_word(dev, addr_mask, &timing); + + switch (timing) { + case 0x10c1: return 4; + case 0x10c3: return 3; + case 0x1281: return 2; + case 0x2283: return 1; + case 0x328a: + default: return 0; + } +} + +static void cmd680_tuneproc (ide_drive_t *drive, byte mode_wanted) +{ + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + byte drive_pci; + unsigned short speedt; + + switch (drive->dn) { + case 0: drive_pci = 0xA4; break; + case 1: drive_pci = 0xA6; break; + case 2: drive_pci = 0xB4; break; + case 3: drive_pci = 0xB6; break; + default: return; + } + + pci_read_config_word(dev, drive_pci, &speedt); + + /* cheat for now and use the docs */ +// switch(cmd680_taskfile_timing(hwif)) { + switch(mode_wanted) { + case 4: speedt = 0x10c1; break; + case 3: speedt = 0x10C3; break; + case 2: speedt = 0x1104; break; + case 1: speedt = 0x2283; break; + case 0: + default: speedt = 0x328A; break; + } + pci_write_config_word(dev, drive_pci, speedt); +} + +static void config_cmd64x_chipset_for_pio (ide_drive_t *drive, byte set_speed) +{ + byte speed = 0x00; + byte set_pio = ide_get_best_pio_mode(drive, 4, 5, NULL); cmd64x_tuneproc(drive, set_pio); speed = XFER_PIO_0 + set_pio; @@ -356,6 +402,41 @@ (void) ide_config_drive_speed(drive, speed); } +static void config_cmd680_chipset_for_pio (ide_drive_t *drive, byte set_speed) +{ + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + u8 unit = (drive->select.b.unit & 0x01); + u8 addr_mask = (hwif->channel) ? 0x84 : 0x80; + u8 speed = 0x00; + u8 mode_pci = 0x00; + u8 channel_timings = cmd680_taskfile_timing(hwif); + u8 set_pio = ide_get_best_pio_mode(drive, 4, 5, NULL); + + pci_read_config_byte(dev, addr_mask, &mode_pci); + mode_pci &= ~((unit) ? 0x30 : 0x03); + + /* WARNING PIO timing mess is going to happen b/w devices, argh */ + if ((channel_timings != set_pio) && (set_pio > channel_timings)) + set_pio = channel_timings; + + cmd680_tuneproc(drive, set_pio); + speed = XFER_PIO_0 + set_pio; + if (set_speed) { + (void) ide_config_drive_speed(drive, speed); + drive->current_speed = speed; + } +} + +static void config_chipset_for_pio (ide_drive_t *drive, byte set_speed) +{ + if (HWIF(drive)->pci_dev->device == PCI_DEVICE_ID_CMD_680) { + config_cmd680_chipset_for_pio(drive, set_speed); + } else { + config_cmd64x_chipset_for_pio(drive, set_speed); + } +} + static int cmd64x_tune_chipset (ide_drive_t *drive, byte speed) { #ifdef CONFIG_BLK_DEV_IDEDMA @@ -363,7 +444,7 @@ struct pci_dev *dev = hwif->pci_dev; int err = 0; - byte unit = (drive->select.b.unit & 0x01); + u8 unit = (drive->select.b.unit & 0x01); u8 pciU = (hwif->channel) ? UDIDETCR1 : UDIDETCR0; u8 pciD = (hwif->channel) ? BMIDESR1 : BMIDESR0; u8 regU = 0; @@ -424,8 +505,123 @@ return err; } +static int cmd680_tune_chipset (ide_drive_t *drive, byte speed) +{ + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + u8 addr_mask = (hwif->channel) ? 0x84 : 0x80; + u8 unit = (drive->select.b.unit & 0x01); + u8 dma_pci = 0; + u8 udma_pci = 0; + u8 mode_pci = 0; + u8 scsc = 0; + u16 ultra = 0; + u16 multi = 0; + int err = 0; + + pci_read_config_byte(dev, addr_mask, &mode_pci); + pci_read_config_byte(dev, 0x8A, &scsc); + + switch (drive->dn) { + case 0: dma_pci = 0xA8; udma_pci = 0xAC; break; + case 1: dma_pci = 0xAA; udma_pci = 0xAE; break; + case 2: dma_pci = 0xB8; udma_pci = 0xBC; break; + case 3: dma_pci = 0xBA; udma_pci = 0xBE; break; + default: return 1; + } + + pci_read_config_byte(dev, addr_mask, &mode_pci); + mode_pci &= ~((unit) ? 0x30 : 0x03); + pci_read_config_word(dev, dma_pci, &multi); + pci_read_config_word(dev, udma_pci, &ultra); + + if ((speed == XFER_UDMA_6) && (scsc & 0x30) == 0x00) { + pci_write_config_byte(dev, 0x8A, scsc|0x01); + pci_read_config_byte(dev, 0x8A, &scsc); + } + + switch(speed) { +#ifdef CONFIG_BLK_DEV_IDEDMA + case XFER_UDMA_6: + if ((scsc & 0x30) == 0x00) + goto speed_break; + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= 0x01; + break; +speed_break : + speed = XFER_UDMA_5; + case XFER_UDMA_5: + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= (((scsc & 0x30) == 0x00) ? 0x01 : 0x02); + break; + case XFER_UDMA_4: + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= (((scsc & 0x30) == 0x00) ? 0x02 : 0x03); + break; + case XFER_UDMA_3: + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= (((scsc & 0x30) == 0x00) ? 0x04 : 0x05); + break; + case XFER_UDMA_2: + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= (((scsc & 0x30) == 0x00) ? 0x05 : 0x07); + break; + case XFER_UDMA_1: + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= (((scsc & 0x30) == 0x00) ? 0x07 : 0x0B); + break; + case XFER_UDMA_0: + multi = 0x10C1; + ultra &= ~0x3F; + ultra |= (((scsc & 0x30) == 0x00) ? 0x0C : 0x0F); + break; + case XFER_MW_DMA_2: + multi = 0x10C1; + break; + case XFER_MW_DMA_1: + multi = 0x10C2; + break; + case XFER_MW_DMA_0: + multi = 0x2208; + break; +#endif /* CONFIG_BLK_DEV_IDEDMA */ + case XFER_PIO_4: cmd680_tuneproc(drive, 4); break; + case XFER_PIO_3: cmd680_tuneproc(drive, 3); break; + case XFER_PIO_2: cmd680_tuneproc(drive, 2); break; + case XFER_PIO_1: cmd680_tuneproc(drive, 1); break; + case XFER_PIO_0: cmd680_tuneproc(drive, 0); break; + default: + return 1; + } + + + if (speed >= XFER_MW_DMA_0) + config_cmd680_chipset_for_pio(drive, 0); + + if (speed >= XFER_UDMA_0) + mode_pci |= ((unit) ? 0x30 : 0x03); + else if (speed >= XFER_MW_DMA_0) + mode_pci |= ((unit) ? 0x20 : 0x02); + else + mode_pci |= ((unit) ? 0x10 : 0x01); + + pci_write_config_byte(dev, addr_mask, mode_pci); + pci_write_config_word(dev, dma_pci, multi); + pci_write_config_word(dev, udma_pci, ultra); + + err = ide_config_drive_speed(drive, speed); + drive->current_speed = speed; + return err; +} + #ifdef CONFIG_BLK_DEV_IDEDMA -static int config_chipset_for_dma (ide_drive_t *drive, unsigned int rev, byte ultra_66) +static int config_cmd64x_chipset_for_dma (ide_drive_t *drive, unsigned int rev, byte ultra_66) { struct hd_driveid *id = drive->id; ide_hwif_t *hwif = HWIF(drive); @@ -510,6 +706,55 @@ return rval; } +static int config_cmd680_chipset_for_dma (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + byte udma_66 = eighty_ninty_three(drive); + byte speed = 0x00; + byte set_pio = 0x00; + int rval; + + if ((id->dma_ultra & 0x0040) && (udma_66)) speed = XFER_UDMA_6; + else if ((id->dma_ultra & 0x0020) && (udma_66)) speed = XFER_UDMA_5; + else if ((id->dma_ultra & 0x0010) && (udma_66)) speed = XFER_UDMA_4; + else if ((id->dma_ultra & 0x0008) && (udma_66)) speed = XFER_UDMA_3; + else if (id->dma_ultra & 0x0004) speed = XFER_UDMA_2; + else if (id->dma_ultra & 0x0002) speed = XFER_UDMA_1; + else if (id->dma_ultra & 0x0001) speed = XFER_UDMA_0; + else if (id->dma_mword & 0x0004) speed = XFER_MW_DMA_2; + else if (id->dma_mword & 0x0002) speed = XFER_MW_DMA_1; + else if (id->dma_mword & 0x0001) speed = XFER_MW_DMA_0; + else { + set_pio = 1; + } + + if (!drive->init_speed) + drive->init_speed = speed; + + config_chipset_for_pio(drive, set_pio); + + if (set_pio) + return ((int) ide_dma_off_quietly); + + if (cmd680_tune_chipset(drive, speed)) + return ((int) ide_dma_off); + + rval = (int)( ((id->dma_ultra >> 14) & 3) ? ide_dma_on : + ((id->dma_ultra >> 11) & 7) ? ide_dma_on : + ((id->dma_ultra >> 8) & 7) ? ide_dma_on : + ((id->dma_mword >> 8) & 7) ? ide_dma_on : + ((id->dma_1word >> 8) & 7) ? ide_dma_on : + ide_dma_off_quietly); + return rval; +} + +static int config_chipset_for_dma (ide_drive_t *drive, unsigned int rev, byte ultra_66) +{ + if (HWIF(drive)->pci_dev->device == PCI_DEVICE_ID_CMD_680) + return (config_cmd680_chipset_for_dma(drive)); + return (config_cmd64x_chipset_for_dma(drive, rev, ultra_66)); +} + static int cmd64x_config_drive_for_dma (ide_drive_t *drive) { struct hd_driveid *id = drive->id; @@ -519,12 +764,15 @@ byte can_ultra_33 = 0; byte can_ultra_66 = 0; byte can_ultra_100 = 0; + byte can_ultra_133 = 0; ide_dma_action_t dma_func = ide_dma_on; pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); class_rev &= 0xff; switch(dev->device) { + case PCI_DEVICE_ID_CMD_680: + can_ultra_133 = 1; case PCI_DEVICE_ID_CMD_649: can_ultra_100 = 1; case PCI_DEVICE_ID_CMD_648: @@ -550,7 +798,7 @@ } dma_func = ide_dma_off_quietly; if ((id->field_valid & 4) && (can_ultra_33)) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x007F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive, class_rev, can_ultra_66); if ((id->field_valid & 2) && @@ -586,6 +834,18 @@ return HWIF(drive)->dmaproc(dma_func, drive); } +static int cmd680_dmaproc (ide_dma_action_t func, ide_drive_t *drive) +{ + switch (func) { + case ide_dma_check: + return cmd64x_config_drive_for_dma(drive); + default: + break; + } + /* Other cases are done by generic IDE-DMA code. */ + return ide_dmaproc(func, drive); +} + static int cmd64x_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { byte dma_stat = 0; @@ -663,7 +923,78 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA */ -unsigned int __init pci_init_cmd64x (struct pci_dev *dev, const char *name) +static int cmd680_busproc (ide_drive_t * drive, int state) +{ +#if 0 + ide_hwif_t *hwif = HWIF(drive); + u8 addr_mask = (hwif->channel) ? 0xB0 : 0xA0; + u32 stat_config = 0; + + pci_read_config_dword(hwif->pci_dev, addr_mask, &stat_config); + + if (!hwif) + return -EINVAL; + + switch (state) { + case BUSSTATE_ON: + hwif->drives[0].failures = 0; + hwif->drives[1].failures = 0; + break; + case BUSSTATE_OFF: + hwif->drives[0].failures = hwif->drives[0].max_failures + 1; + hwif->drives[1].failures = hwif->drives[1].max_failures + 1; + break; + case BUSSTATE_TRISTATE: + hwif->drives[0].failures = hwif->drives[0].max_failures + 1; + hwif->drives[1].failures = hwif->drives[1].max_failures + 1; + break; + default: + return 0; + } + hwif->bus_state = state; +#endif + return 0; +} + +void cmd680_reset (ide_drive_t *drive) +{ +#if 0 + ide_hwif_t *hwif = HWIF(drive); + u8 addr_mask = (hwif->channel) ? 0xB0 : 0xA0; + byte reset = 0; + + pci_read_config_byte(hwif->pci_dev, addr_mask, &reset); + pci_write_config_byte(hwif->pci_dev, addr_mask, reset|0x03); +#endif +} + +unsigned int cmd680_pci_init (struct pci_dev *dev, const char *name) +{ + u8 tmpbyte = 0; + pci_write_config_byte(dev, 0x80, 0x00); + pci_write_config_byte(dev, 0x84, 0x00); + pci_read_config_byte(dev, 0x8A, &tmpbyte); + pci_write_config_byte(dev, 0x8A, tmpbyte|0x01); + pci_write_config_word(dev, 0xA2, 0x328A); + pci_write_config_dword(dev, 0xA4, 0x328A); + pci_write_config_dword(dev, 0xA8, 0x4392); + pci_write_config_dword(dev, 0xAC, 0x4009); + pci_write_config_word(dev, 0xB2, 0x328A); + pci_write_config_dword(dev, 0xB4, 0x328A); + pci_write_config_dword(dev, 0xB8, 0x4392); + pci_write_config_dword(dev, 0xBC, 0x4009); + +#if defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_PROC_FS) + if (!cmd64x_proc) { + cmd64x_proc = 1; + bmide_dev = dev; + cmd64x_display_info = &cmd680_get_info; + } +#endif /* DISPLAY_CMD64X_TIMINGS && CONFIG_PROC_FS */ + return 0; +} + +unsigned int cmd64x_pci_init (struct pci_dev *dev, const char *name) { unsigned char mrdmode; unsigned int class_rev; @@ -752,7 +1083,23 @@ return 0; } -unsigned int __init ata66_cmd64x (ide_hwif_t *hwif) +unsigned int __init pci_init_cmd64x (struct pci_dev *dev, const char *name) +{ + if (dev->device == PCI_DEVICE_ID_CMD_680) + return cmd680_pci_init (dev, name); + return cmd64x_pci_init (dev, name); +} + +unsigned int cmd680_ata66 (ide_hwif_t *hwif) +{ + byte ata66 = 0; + byte addr_mask = (hwif->channel) ? 0xB0 : 0xA0; + + pci_read_config_byte(hwif->pci_dev, addr_mask, &ata66); + return (ata66 & 0x01) ? 1 : 0; +} + +unsigned int cmd64x_ata66 (ide_hwif_t *hwif) { byte ata66 = 0; byte mask = (hwif->channel) ? 0x02 : 0x01; @@ -761,6 +1108,14 @@ return (ata66 & mask) ? 1 : 0; } +unsigned int __init ata66_cmd64x (ide_hwif_t *hwif) +{ + struct pci_dev *dev = hwif->pci_dev; + if (dev->device == PCI_DEVICE_ID_CMD_680) + return cmd680_ata66(hwif); + return cmd64x_ata66(hwif); +} + void __init ide_init_cmd64x (ide_hwif_t *hwif) { struct pci_dev *dev = hwif->pci_dev; @@ -769,8 +1124,6 @@ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); class_rev &= 0xff; - hwif->tuneproc = &cmd64x_tuneproc; - hwif->speedproc = &cmd64x_tune_chipset; hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; @@ -779,10 +1132,19 @@ #ifdef CONFIG_BLK_DEV_IDEDMA switch(dev->device) { + case PCI_DEVICE_ID_CMD_680: + hwif->busproc = &cmd680_busproc; + hwif->dmaproc = &cmd680_dmaproc; + hwif->resetproc = &cmd680_reset; + hwif->speedproc = &cmd680_tune_chipset; + hwif->tuneproc = &cmd680_tuneproc; + break; case PCI_DEVICE_ID_CMD_649: case PCI_DEVICE_ID_CMD_648: case PCI_DEVICE_ID_CMD_643: - hwif->dmaproc = &cmd64x_dmaproc; + hwif->dmaproc = &cmd64x_dmaproc; + hwif->tuneproc = &cmd64x_tuneproc; + hwif->speedproc = &cmd64x_tune_chipset; break; case PCI_DEVICE_ID_CMD_646: hwif->chipset = ide_cmd646; @@ -791,6 +1153,8 @@ } else { hwif->dmaproc = &cmd64x_dmaproc; } + hwif->tuneproc = &cmd64x_tuneproc; + hwif->speedproc = &cmd64x_tune_chipset; break; default: break; diff -urN linux-2.4.17-rc2-virgin/drivers/ide/hpt366.c linux-2.4.17-rc2-wli2/drivers/ide/hpt366.c --- linux-2.4.17-rc2-virgin/drivers/ide/hpt366.c Tue Aug 14 20:01:07 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/hpt366.c Thu Dec 20 19:49:13 2001 @@ -1,7 +1,8 @@ /* - * linux/drivers/ide/hpt366.c Version 0.18 June. 9, 2000 + * linux/drivers/ide/hpt366.c Version 0.22 20 Sep 2001 * * Copyright (C) 1999-2000 Andre Hedrick + * Portions Copyright (C) 2001 Sun Microsystems, Inc. * May be copied or modified under the terms of the GNU General Public License * * Thanks to HighPoint Technologies for their assistance, and hardware. @@ -11,6 +12,34 @@ * * Note that final HPT370 support was done by force extraction of GPL. * + * - add function for getting/setting power status of drive + * - the HPT370's state machine can get confused. reset it before each dma + * xfer to prevent that from happening. + * - reset state engine whenever we get an error. + * - check for busmaster state at end of dma. + * - use new highpoint timings. + * - detect bus speed using highpoint register. + * - use pll if we don't have a clock table. added a 66MHz table that's + * just 2x the 33MHz table. + * - removed turnaround. NOTE: we never want to switch between pll and + * pci clocks as the chip can glitch in those cases. the highpoint + * approved workaround slows everything down too much to be useful. in + * addition, we would have to serialize access to each chip. + * Adrian Sun + * + * add drive timings for 66MHz PCI bus, + * fix ATA Cable signal detection, fix incorrect /proc info + * add /proc display for per-drive PIO/DMA/UDMA mode and + * per-channel ATA-33/66 Cable detect. + * Duncan Laurie + * + * fixup /proc output for multiple controllers + * Tim Hockin + * + * On hpt366: + * Reset the hpt366 on error, reset on dma + * Fix disabling Fast Interrupt hpt366. + * Mike Waychison */ #include @@ -28,6 +57,7 @@ #include #include +#include #include #include @@ -35,6 +65,11 @@ #define DISPLAY_HPT366_TIMINGS +/* various tuning parameters */ +#define HPT_RESET_STATE_ENGINE +/*#define HPT_DELAY_INTERRUPT*/ +/*#define HPT_SERIALIZE_IO*/ + #if defined(DISPLAY_HPT366_TIMINGS) && defined(CONFIG_PROC_FS) #include #include @@ -106,146 +141,302 @@ struct chipset_bus_clock_list_entry { byte xfer_speed; - unsigned int chipset_settings_write; - unsigned int chipset_settings_read; + unsigned int chipset_settings; }; +/* key for bus clock timings + * bit + * 0:3 data_high_time. inactive time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 4:8 data_low_time. active time of DIOW_/DIOR_ for PIO and MW + * DMA. cycles = value + 1 + * 9:12 cmd_high_time. inactive time of DIOW_/DIOR_ during task file + * register access. + * 13:17 cmd_low_time. active time of DIOW_/DIOR_ during task file + * register access. + * 18:21 udma_cycle_time. clock freq and clock cycles for UDMA xfer. + * during task file register access. + * 22:24 pre_high_time. time to initialize 1st cycle for PIO and MW DMA + * xfer. + * 25:27 cmd_pre_high_time. time to initialize 1st PIO cycle for task + * register access. + * 28 UDMA enable + * 29 DMA enable + * 30 PIO_MST enable. if set, the chip is in bus master mode during + * PIO. + * 31 FIFO enable. + */ struct chipset_bus_clock_list_entry forty_base [] = { - { XFER_UDMA_4, 0x900fd943, 0x900fd943 }, - { XFER_UDMA_3, 0x900ad943, 0x900ad943 }, - { XFER_UDMA_2, 0x900bd943, 0x900bd943 }, - { XFER_UDMA_1, 0x9008d943, 0x9008d943 }, - { XFER_UDMA_0, 0x9008d943, 0x9008d943 }, - - { XFER_MW_DMA_2, 0xa008d943, 0xa008d943 }, - { XFER_MW_DMA_1, 0xa010d955, 0xa010d955 }, - { XFER_MW_DMA_0, 0xa010d9fc, 0xa010d9fc }, - - { XFER_PIO_4, 0xc008d963, 0xc008d963 }, - { XFER_PIO_3, 0xc010d974, 0xc010d974 }, - { XFER_PIO_2, 0xc010d997, 0xc010d997 }, - { XFER_PIO_1, 0xc010d9c7, 0xc010d9c7 }, - { XFER_PIO_0, 0xc018d9d9, 0xc018d9d9 }, - { 0, 0x0120d9d9, 0x0120d9d9 } + { XFER_UDMA_4, 0x900fd943 }, + { XFER_UDMA_3, 0x900ad943 }, + { XFER_UDMA_2, 0x900bd943 }, + { XFER_UDMA_1, 0x9008d943 }, + { XFER_UDMA_0, 0x9008d943 }, + + { XFER_MW_DMA_2, 0xa008d943 }, + { XFER_MW_DMA_1, 0xa010d955 }, + { XFER_MW_DMA_0, 0xa010d9fc }, + + { XFER_PIO_4, 0xc008d963 }, + { XFER_PIO_3, 0xc010d974 }, + { XFER_PIO_2, 0xc010d997 }, + { XFER_PIO_1, 0xc010d9c7 }, + { XFER_PIO_0, 0xc018d9d9 }, + { 0, 0x0120d9d9 } }; struct chipset_bus_clock_list_entry thirty_three_base [] = { - { XFER_UDMA_4, 0x90c9a731, 0x90c9a731 }, - { XFER_UDMA_3, 0x90cfa731, 0x90cfa731 }, - { XFER_UDMA_2, 0x90caa731, 0x90caa731 }, - { XFER_UDMA_1, 0x90cba731, 0x90cba731 }, - { XFER_UDMA_0, 0x90c8a731, 0x90c8a731 }, - - { XFER_MW_DMA_2, 0xa0c8a731, 0xa0c8a731 }, - { XFER_MW_DMA_1, 0xa0c8a732, 0xa0c8a732 }, /* 0xa0c8a733 */ - { XFER_MW_DMA_0, 0xa0c8a797, 0xa0c8a797 }, - - { XFER_PIO_4, 0xc0c8a731, 0xc0c8a731 }, - { XFER_PIO_3, 0xc0c8a742, 0xc0c8a742 }, - { XFER_PIO_2, 0xc0d0a753, 0xc0d0a753 }, - { XFER_PIO_1, 0xc0d0a7a3, 0xc0d0a7a3 }, /* 0xc0d0a793 */ - { XFER_PIO_0, 0xc0d0a7aa, 0xc0d0a7aa }, /* 0xc0d0a7a7 */ - { 0, 0x0120a7a7, 0x0120a7a7 } + { XFER_UDMA_4, 0x90c9a731 }, + { XFER_UDMA_3, 0x90cfa731 }, + { XFER_UDMA_2, 0x90caa731 }, + { XFER_UDMA_1, 0x90cba731 }, + { XFER_UDMA_0, 0x90c8a731 }, + + { XFER_MW_DMA_2, 0xa0c8a731 }, + { XFER_MW_DMA_1, 0xa0c8a732 }, /* 0xa0c8a733 */ + { XFER_MW_DMA_0, 0xa0c8a797 }, + + { XFER_PIO_4, 0xc0c8a731 }, + { XFER_PIO_3, 0xc0c8a742 }, + { XFER_PIO_2, 0xc0d0a753 }, + { XFER_PIO_1, 0xc0d0a7a3 }, /* 0xc0d0a793 */ + { XFER_PIO_0, 0xc0d0a7aa }, /* 0xc0d0a7a7 */ + { 0, 0x0120a7a7 } }; struct chipset_bus_clock_list_entry twenty_five_base [] = { - { XFER_UDMA_4, 0x90c98521, 0x90c98521 }, - { XFER_UDMA_3, 0x90cf8521, 0x90cf8521 }, - { XFER_UDMA_2, 0x90cf8521, 0x90cf8521 }, - { XFER_UDMA_1, 0x90cb8521, 0x90cb8521 }, - { XFER_UDMA_0, 0x90cb8521, 0x90cb8521 }, - - { XFER_MW_DMA_2, 0xa0ca8521, 0xa0ca8521 }, - { XFER_MW_DMA_1, 0xa0ca8532, 0xa0ca8532 }, - { XFER_MW_DMA_0, 0xa0ca8575, 0xa0ca8575 }, - - { XFER_PIO_4, 0xc0ca8521, 0xc0ca8521 }, - { XFER_PIO_3, 0xc0ca8532, 0xc0ca8532 }, - { XFER_PIO_2, 0xc0ca8542, 0xc0ca8542 }, - { XFER_PIO_1, 0xc0d08572, 0xc0d08572 }, - { XFER_PIO_0, 0xc0d08585, 0xc0d08585 }, - { 0, 0x01208585, 0x01208585 } + { XFER_UDMA_4, 0x90c98521 }, + { XFER_UDMA_3, 0x90cf8521 }, + { XFER_UDMA_2, 0x90cf8521 }, + { XFER_UDMA_1, 0x90cb8521 }, + { XFER_UDMA_0, 0x90cb8521 }, + + { XFER_MW_DMA_2, 0xa0ca8521 }, + { XFER_MW_DMA_1, 0xa0ca8532 }, + { XFER_MW_DMA_0, 0xa0ca8575 }, + + { XFER_PIO_4, 0xc0ca8521 }, + { XFER_PIO_3, 0xc0ca8532 }, + { XFER_PIO_2, 0xc0ca8542 }, + { XFER_PIO_1, 0xc0d08572 }, + { XFER_PIO_0, 0xc0d08585 }, + { 0, 0x01208585 } }; +#if 1 +/* these are the current (4 sep 2001) timings from highpoint */ struct chipset_bus_clock_list_entry thirty_three_base_hpt370[] = { - { XFER_UDMA_5, 0x1A85F442, 0x16454e31 }, - { XFER_UDMA_4, 0x16454e31, 0x16454e31 }, - { XFER_UDMA_3, 0x166d4e31, 0x166d4e31 }, - { XFER_UDMA_2, 0x16494e31, 0x16494e31 }, - { XFER_UDMA_1, 0x164d4e31, 0x164d4e31 }, - { XFER_UDMA_0, 0x16514e31, 0x16514e31 }, - - { XFER_MW_DMA_2, 0x26514e21, 0x26514e21 }, - { XFER_MW_DMA_1, 0x26514e33, 0x26514e33 }, - { XFER_MW_DMA_0, 0x26514e97, 0x26514e97 }, - - { XFER_PIO_4, 0x06514e21, 0x06514e21 }, - { XFER_PIO_3, 0x06514e22, 0x06514e22 }, - { XFER_PIO_2, 0x06514e33, 0x06514e33 }, - { XFER_PIO_1, 0x06914e43, 0x06914e43 }, - { XFER_PIO_0, 0x06914e57, 0x06914e57 }, - { 0, 0x06514e57, 0x06514e57 } + { XFER_UDMA_5, 0x12446231 }, + { XFER_UDMA_4, 0x12446231 }, + { XFER_UDMA_3, 0x126c6231 }, + { XFER_UDMA_2, 0x12486231 }, + { XFER_UDMA_1, 0x124c6233 }, + { XFER_UDMA_0, 0x12506297 }, + + { XFER_MW_DMA_2, 0x22406c31 }, + { XFER_MW_DMA_1, 0x22406c33 }, + { XFER_MW_DMA_0, 0x22406c97 }, + + { XFER_PIO_4, 0x06414e31 }, + { XFER_PIO_3, 0x06414e42 }, + { XFER_PIO_2, 0x06414e53 }, + { XFER_PIO_1, 0x06814e93 }, + { XFER_PIO_0, 0x06814ea7 }, + { 0, 0x06814ea7 } +}; + +/* 2x 33MHz timings */ +struct chipset_bus_clock_list_entry sixty_six_base_hpt370[] = { + { XFER_UDMA_5, 0x1488e673 }, + { XFER_UDMA_4, 0x1488e673 }, + { XFER_UDMA_3, 0x1498e673 }, + { XFER_UDMA_2, 0x1490e673 }, + { XFER_UDMA_1, 0x1498e677 }, + { XFER_UDMA_0, 0x14a0e73f }, + + { XFER_MW_DMA_2, 0x2480fa73 }, + { XFER_MW_DMA_1, 0x2480fa77 }, + { XFER_MW_DMA_0, 0x2480fb3f }, + + { XFER_PIO_4, 0x0c82be73 }, + { XFER_PIO_3, 0x0c82be95 }, + { XFER_PIO_2, 0x0c82beb7 }, + { XFER_PIO_1, 0x0d02bf37 }, + { XFER_PIO_0, 0x0d02bf5f }, + { 0, 0x0d02bf5f } +}; +#else +/* from highpoint documentation. these are old values */ +struct chipset_bus_clock_list_entry thirty_three_base_hpt370[] = { + { XFER_UDMA_5, 0x16454e31 }, + { XFER_UDMA_4, 0x16454e31 }, + { XFER_UDMA_3, 0x166d4e31 }, + { XFER_UDMA_2, 0x16494e31 }, + { XFER_UDMA_1, 0x164d4e31 }, + { XFER_UDMA_0, 0x16514e31 }, + + { XFER_MW_DMA_2, 0x26514e21 }, + { XFER_MW_DMA_1, 0x26514e33 }, + { XFER_MW_DMA_0, 0x26514e97 }, + + { XFER_PIO_4, 0x06514e21 }, + { XFER_PIO_3, 0x06514e22 }, + { XFER_PIO_2, 0x06514e33 }, + { XFER_PIO_1, 0x06914e43 }, + { XFER_PIO_0, 0x06914e57 }, + { 0, 0x06514e57 } +}; + +struct chipset_bus_clock_list_entry sixty_six_base_hpt370[] = { + { XFER_UDMA_5, 0x14846231 }, + { XFER_UDMA_4, 0x14886231 }, + { XFER_UDMA_3, 0x148c6231 }, + { XFER_UDMA_2, 0x148c6231 }, + { XFER_UDMA_1, 0x14906231 }, + { XFER_UDMA_0, 0x14986231 }, + + { XFER_MW_DMA_2, 0x26514e21 }, + { XFER_MW_DMA_1, 0x26514e33 }, + { XFER_MW_DMA_0, 0x26514e97 }, + + { XFER_PIO_4, 0x06514e21 }, + { XFER_PIO_3, 0x06514e22 }, + { XFER_PIO_2, 0x06514e33 }, + { XFER_PIO_1, 0x06914e43 }, + { XFER_PIO_0, 0x06914e57 }, + { 0, 0x06514e57 } +}; +#endif + +struct chipset_bus_clock_list_entry fifty_base_hpt370[] = { + { XFER_UDMA_5, 0x12848242 }, + { XFER_UDMA_4, 0x12ac8242 }, + { XFER_UDMA_3, 0x128c8242 }, + { XFER_UDMA_2, 0x120c8242 }, + { XFER_UDMA_1, 0x12148254 }, + { XFER_UDMA_0, 0x121882ea }, + + { XFER_MW_DMA_2, 0x22808242 }, + { XFER_MW_DMA_1, 0x22808254 }, + { XFER_MW_DMA_0, 0x228082ea }, + + { XFER_PIO_4, 0x0a81f442 }, + { XFER_PIO_3, 0x0a81f443 }, + { XFER_PIO_2, 0x0a81f454 }, + { XFER_PIO_1, 0x0ac1f465 }, + { XFER_PIO_0, 0x0ac1f48a }, + { 0, 0x0ac1f48a } }; #define HPT366_DEBUG_DRIVE_INFO 0 #define HPT370_ALLOW_ATA100_5 1 #define HPT366_ALLOW_ATA66_4 1 #define HPT366_ALLOW_ATA66_3 1 +#define HPT366_MAX_DEVS 8 + +#define F_LOW_PCI_33 0x23 +#define F_LOW_PCI_40 0x29 +#define F_LOW_PCI_50 0x2d +#define F_LOW_PCI_66 0x42 + +static struct pci_dev *hpt_devs[HPT366_MAX_DEVS]; +static int n_hpt_devs; + +static unsigned int pci_rev_check_hpt3xx(struct pci_dev *dev); +static unsigned int pci_rev2_check_hpt3xx(struct pci_dev *dev); +byte hpt366_proc = 0; +byte hpt363_shared_irq; +byte hpt363_shared_pin; +extern char *ide_xfer_verbose (byte xfer_rate); #if defined(DISPLAY_HPT366_TIMINGS) && defined(CONFIG_PROC_FS) static int hpt366_get_info(char *, char **, off_t, int); extern int (*hpt366_display_info)(char *, char **, off_t, int); /* ide-proc.c */ extern char *ide_media_verbose(ide_drive_t *); -static struct pci_dev *bmide_dev; -static struct pci_dev *bmide2_dev; static int hpt366_get_info (char *buffer, char **addr, off_t offset, int count) { - char *p = buffer; - u32 bibma = bmide_dev->resource[4].start; - u32 bibma2 = bmide2_dev->resource[4].start; - char *chipset_names[] = {"HPT366", "HPT366", "HPT368", "HPT370", "HPT370A"}; - u8 c0 = 0, c1 = 0; - u32 class_rev; - - pci_read_config_dword(bmide_dev, PCI_CLASS_REVISION, &class_rev); - class_rev &= 0xff; - - /* - * at that point bibma+0x2 et bibma+0xa are byte registers - * to investigate: - */ - c0 = inb_p((unsigned short)bibma + 0x02); - if (bmide2_dev) - c1 = inb_p((unsigned short)bibma2 + 0x02); - - p += sprintf(p, "\n %s Chipset.\n", chipset_names[class_rev]); - p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n"); - p += sprintf(p, " %sabled %sabled\n", - (c0&0x80) ? "dis" : " en", - (c1&0x80) ? "dis" : " en"); - p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n"); - p += sprintf(p, "DMA enabled: %s %s %s %s\n", - (c0&0x20) ? "yes" : "no ", (c0&0x40) ? "yes" : "no ", - (c1&0x20) ? "yes" : "no ", (c1&0x40) ? "yes" : "no " ); - - p += sprintf(p, "UDMA\n"); - p += sprintf(p, "DMA\n"); - p += sprintf(p, "PIO\n"); + char *p = buffer; + char *chipset_nums[] = {"366", "366", "368", "370", "370A"}; + int i; + + p += sprintf(p, "\n " + "HighPoint HPT366/368/370\n"); + for (i = 0; i < n_hpt_devs; i++) { + struct pci_dev *dev = hpt_devs[i]; + unsigned short iobase = dev->resource[4].start; + u32 class_rev; + u8 c0, c1; + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xff; + + p += sprintf(p, "\nController: %d\n", i); + p += sprintf(p, "Chipset: HPT%s\n", chipset_nums[class_rev]); + p += sprintf(p, "--------------- Primary Channel " + "--------------- Secondary Channel " + "--------------\n"); + + /* get the bus master status registers */ + c0 = inb_p(iobase + 0x2); + c1 = inb_p(iobase + 0xa); + p += sprintf(p, "Enabled: %s" + " %s\n", + (c0 & 0x80) ? "no" : "yes", + (c1 & 0x80) ? "no" : "yes"); + + if (pci_rev_check_hpt3xx(dev)) { + u8 cbl; + cbl = inb_p(iobase + 0x7b); + outb_p(cbl | 1, iobase + 0x7b); + outb_p(cbl & ~1, iobase + 0x7b); + cbl = inb_p(iobase + 0x7a); + p += sprintf(p, "Cable: ATA-%d" + " ATA-%d\n", + (cbl & 0x02) ? 33 : 66, + (cbl & 0x01) ? 33 : 66); + p += sprintf(p, "\n"); + } + p += sprintf(p, "--------------- drive0 --------- drive1 " + "------- drive0 ---------- drive1 -------\n"); + p += sprintf(p, "DMA capable: %s %s" + " %s %s\n", + (c0 & 0x20) ? "yes" : "no ", + (c0 & 0x40) ? "yes" : "no ", + (c1 & 0x20) ? "yes" : "no ", + (c1 & 0x40) ? "yes" : "no "); + + { + u8 c2, c3; + /* older revs don't have these registers mapped + * into io space */ + pci_read_config_byte(dev, 0x43, &c0); + pci_read_config_byte(dev, 0x47, &c1); + pci_read_config_byte(dev, 0x4b, &c2); + pci_read_config_byte(dev, 0x4f, &c3); + + p += sprintf(p, "Mode: %s %s" + " %s %s\n", + (c0 & 0x10) ? "UDMA" : (c0 & 0x20) ? "DMA " : + (c0 & 0x80) ? "PIO " : "off ", + (c1 & 0x10) ? "UDMA" : (c1 & 0x20) ? "DMA " : + (c1 & 0x80) ? "PIO " : "off ", + (c2 & 0x10) ? "UDMA" : (c2 & 0x20) ? "DMA " : + (c2 & 0x80) ? "PIO " : "off ", + (c3 & 0x10) ? "UDMA" : (c3 & 0x20) ? "DMA " : + (c3 & 0x80) ? "PIO " : "off "); + } + } + p += sprintf(p, "\n"); + return p-buffer;/* => must be less than 4k! */ } #endif /* defined(DISPLAY_HPT366_TIMINGS) && defined(CONFIG_PROC_FS) */ -byte hpt366_proc = 0; - -extern char *ide_xfer_verbose (byte xfer_rate); -byte hpt363_shared_irq; -byte hpt363_shared_pin; - static unsigned int pci_rev_check_hpt3xx (struct pci_dev *dev) { unsigned int class_rev; @@ -282,16 +473,16 @@ return 0; } -static unsigned int pci_bus_clock_list (byte speed, int direction, struct chipset_bus_clock_list_entry * chipset_table) +static unsigned int pci_bus_clock_list (byte speed, struct chipset_bus_clock_list_entry * chipset_table) { for ( ; chipset_table->xfer_speed ; chipset_table++) if (chipset_table->xfer_speed == speed) { - return (direction) ? chipset_table->chipset_settings_write : chipset_table->chipset_settings_read; + return chipset_table->chipset_settings; } - return (direction) ? chipset_table->chipset_settings_write : chipset_table->chipset_settings_read; + return chipset_table->chipset_settings; } -static void hpt366_tune_chipset (ide_drive_t *drive, byte speed, int direction) +static void hpt366_tune_chipset (ide_drive_t *drive, byte speed) { byte regtime = (drive->select.b.unit & 0x01) ? 0x44 : 0x40; byte regfast = (HWIF(drive)->channel) ? 0x55 : 0x51; @@ -304,7 +495,7 @@ byte drive_fast = 0; /* - * Disable the "fast interrupt" prediction. + * Disable the "fast interrupt" prediction. */ pci_read_config_byte(HWIF(drive)->pci_dev, regfast, &drive_fast); if (drive_fast & 0x02) @@ -314,16 +505,22 @@ /* detect bus speed by looking at control reg timing: */ switch((reg1 >> 8) & 7) { case 5: - reg2 = pci_bus_clock_list(speed, direction, forty_base); + reg2 = pci_bus_clock_list(speed, forty_base); break; case 9: - reg2 = pci_bus_clock_list(speed, direction, twenty_five_base); + reg2 = pci_bus_clock_list(speed, twenty_five_base); break; default: case 7: - reg2 = pci_bus_clock_list(speed, direction, thirty_three_base); + reg2 = pci_bus_clock_list(speed, thirty_three_base); break; } +#if 0 + /* this is a nice idea ... */ + list_conf = pci_bus_clock_list(speed, + (struct chipset_bus_clock_list_entry *) + dev->sysdata); +#endif /* * Disable on-chip PIO FIFO/buffer (to avoid problems handling I/O errors later) */ @@ -337,40 +534,47 @@ pci_write_config_dword(HWIF(drive)->pci_dev, regtime, reg2); } -static void hpt370_tune_chipset (ide_drive_t *drive, byte speed, int direction) +static void hpt370_tune_chipset (ide_drive_t *drive, byte speed) { byte regfast = (HWIF(drive)->channel) ? 0x55 : 0x51; - byte reg5bh = (speed != XFER_UDMA_5) ? 0x22 : (direction) ? 0x20 : 0x22; - unsigned int list_conf = pci_bus_clock_list(speed, direction, thirty_three_base_hpt370); + unsigned int list_conf = 0; unsigned int drive_conf = 0; unsigned int conf_mask = (speed >= XFER_MW_DMA_0) ? 0xc0000000 : 0x30070000; - byte drive_pci = 0; - byte drive_fast = 0; + byte drive_pci = 0x40 + (drive->dn * 4); + byte new_fast, drive_fast = 0; + struct pci_dev *dev = HWIF(drive)->pci_dev; - switch (drive->dn) { - case 0: drive_pci = 0x40; break; - case 1: drive_pci = 0x44; break; - case 2: drive_pci = 0x48; break; - case 3: drive_pci = 0x4c; break; - default: return; - } /* * Disable the "fast interrupt" prediction. + * don't holdoff on interrupts. (== 0x01 despite what the docs say) */ - pci_read_config_byte(HWIF(drive)->pci_dev, regfast, &drive_fast); - if (drive_fast & 0x80) - pci_write_config_byte(HWIF(drive)->pci_dev, regfast, drive_fast & ~0x80); + pci_read_config_byte(dev, regfast, &drive_fast); + new_fast = drive_fast; + if (new_fast & 0x02) + new_fast &= ~0x02; + +#ifdef HPT_DELAY_INTERRUPT + if (new_fast & 0x01) + new_fast &= ~0x01; +#else + if ((new_fast & 0x01) == 0) + new_fast |= 0x01; +#endif + if (new_fast != drive_fast) + pci_write_config_byte(HWIF(drive)->pci_dev, regfast, new_fast); - pci_read_config_dword(HWIF(drive)->pci_dev, drive_pci, &drive_conf); - pci_write_config_byte(HWIF(drive)->pci_dev, 0x5b, reg5bh); + list_conf = pci_bus_clock_list(speed, + (struct chipset_bus_clock_list_entry *) + dev->sysdata); + pci_read_config_dword(dev, drive_pci, &drive_conf); list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask); - /* - * Disable on-chip PIO FIFO/buffer (to avoid problems handling I/O errors later) - */ - list_conf &= ~0x80000000; + + if (speed < XFER_MW_DMA_0) { + list_conf &= ~0x80000000; /* Disable on-chip PIO FIFO/buffer */ + } - pci_write_config_dword(HWIF(drive)->pci_dev, drive_pci, list_conf); + pci_write_config_dword(dev, drive_pci, list_conf); } static int hpt3xx_tune_chipset (ide_drive_t *drive, byte speed) @@ -382,9 +586,9 @@ drive->init_speed = speed; if (pci_rev_check_hpt3xx(HWIF(drive)->pci_dev)) { - hpt370_tune_chipset(drive, speed, 0); + hpt370_tune_chipset(drive, speed); } else { - hpt366_tune_chipset(drive, speed, 0); + hpt366_tune_chipset(drive, speed); } drive->current_speed = speed; return ((int) ide_config_drive_speed(drive, speed)); @@ -541,13 +745,6 @@ } } -void hpt370_rw_proc (ide_drive_t *drive, ide_dma_action_t func) -{ - if ((func != ide_dma_write) || (func != ide_dma_read)) - return; - hpt370_tune_chipset(drive, drive->current_speed, (func == ide_dma_write)); -} - static int config_drive_xfer_rate (ide_drive_t *drive) { struct hd_driveid *id = drive->id; @@ -624,6 +821,13 @@ reg50h, reg52h, reg5ah); if (reg5ah & 0x10) pci_write_config_byte(HWIF(drive)->pci_dev, 0x5a, reg5ah & ~0x10); + /* fall through to a reset */ +#if 0 + case ide_dma_begin: + case ide_dma_end: + /* reset the chips state over and over.. */ + pci_write_config_byte(HWIF(drive)->pci_dev, 0x51, 0x13); +#endif break; case ide_dma_timeout: default: @@ -634,9 +838,52 @@ int hpt370_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + byte regstate = hwif->channel ? 0x54 : 0x50; + byte reginfo = hwif->channel ? 0x56 : 0x52; + byte dma_stat; + switch (func) { case ide_dma_check: return config_drive_xfer_rate(drive); + case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */ + dma_stat = inb(dma_base+2); + return (dma_stat & 4) == 4; /* return 1 if INTR asserted */ + + case ide_dma_end: + dma_stat = inb(dma_base + 2); + if (dma_stat & 0x01) { + udelay(20); /* wait a little */ + dma_stat = inb(dma_base + 2); + } + if ((dma_stat & 0x01) == 0) + break; + + func = ide_dma_timeout; + /* fallthrough */ + + case ide_dma_timeout: + case ide_dma_lostirq: + pci_read_config_byte(hwif->pci_dev, reginfo, + &dma_stat); + printk("%s: %d bytes in FIFO\n", drive->name, + dma_stat); + pci_write_config_byte(hwif->pci_dev, regstate, 0x37); + udelay(10); + dma_stat = inb(dma_base); + outb(dma_stat & ~0x1, dma_base); /* stop dma */ + dma_stat = inb(dma_base + 2); + outb(dma_stat | 0x6, dma_base+2); /* clear errors */ + /* fallthrough */ + +#ifdef HPT_RESET_STATE_ENGINE + case ide_dma_begin: +#endif + pci_write_config_byte(hwif->pci_dev, regstate, 0x37); + udelay(10); + break; + default: break; } @@ -644,6 +891,210 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA */ +/* + * Since SUN Cobalt is attempting to do this operation, I should disclose + * this has been a long time ago Thu Jul 27 16:40:57 2000 was the patch date + * HOTSWAP ATA Infrastructure. + */ +void hpt3xx_reset (ide_drive_t *drive) +{ +#if 0 + unsigned long high_16 = pci_resource_start(HWIF(drive)->pci_dev, 4); + byte reset = (HWIF(drive)->channel) ? 0x80 : 0x40; + byte reg59h = 0; + + pci_read_config_byte(HWIF(drive)->pci_dev, 0x59, ®59h); + pci_write_config_byte(HWIF(drive)->pci_dev, 0x59, reg59h|reset); + pci_write_config_byte(HWIF(drive)->pci_dev, 0x59, reg59h); +#endif +} + +static int hpt3xx_tristate (ide_drive_t * drive, int state) +{ + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + byte reset = (hwif->channel) ? 0x80 : 0x40; + byte state_reg = (hwif->channel) ? 0x57 : 0x53; + byte reg59h = 0; + byte regXXh = 0; + + if (!hwif) + return -EINVAL; + +// hwif->bus_state = state; + + pci_read_config_byte(dev, 0x59, ®59h); + pci_read_config_byte(dev, state_reg, ®XXh); + + if (state) { + (void) ide_do_reset(drive); + pci_write_config_byte(dev, state_reg, regXXh|0x80); + pci_write_config_byte(dev, 0x59, reg59h|reset); + } else { + pci_write_config_byte(dev, 0x59, reg59h & ~(reset)); + pci_write_config_byte(dev, state_reg, regXXh & ~(0x80)); + (void) ide_do_reset(drive); + } + return 0; +} + +/* + * set/get power state for a drive. + * turning the power off does the following things: + * 1) soft-reset the drive + * 2) tri-states the ide bus + * + * when we turn things back on, we need to re-initialize things. + */ +#define TRISTATE_BIT 0x8000 +static int hpt370_busproc(ide_drive_t * drive, int state) +{ + ide_hwif_t *hwif = HWIF(drive); + byte tristate, resetmask, bus_reg; + u16 tri_reg; + + if (!hwif) + return -EINVAL; + + hwif->bus_state = state; + + if (hwif->channel) { + /* secondary channel */ + tristate = 0x56; + resetmask = 0x80; + } else { + /* primary channel */ + tristate = 0x52; + resetmask = 0x40; + } + + /* grab status */ + pci_read_config_word(hwif->pci_dev, tristate, &tri_reg); + pci_read_config_byte(hwif->pci_dev, 0x59, &bus_reg); + + /* set the state. we don't set it if we don't need to do so. + * make sure that the drive knows that it has failed if it's off */ + switch (state) { + case BUSSTATE_ON: + hwif->drives[0].failures = 0; + hwif->drives[1].failures = 0; + if ((bus_reg & resetmask) == 0) + return 0; + tri_reg &= ~TRISTATE_BIT; + bus_reg &= ~resetmask; + break; + case BUSSTATE_OFF: + hwif->drives[0].failures = hwif->drives[0].max_failures + 1; + hwif->drives[1].failures = hwif->drives[1].max_failures + 1; + if ((tri_reg & TRISTATE_BIT) == 0 && (bus_reg & resetmask)) + return 0; + tri_reg &= ~TRISTATE_BIT; + bus_reg |= resetmask; + break; + case BUSSTATE_TRISTATE: + hwif->drives[0].failures = hwif->drives[0].max_failures + 1; + hwif->drives[1].failures = hwif->drives[1].max_failures + 1; + if ((tri_reg & TRISTATE_BIT) && (bus_reg & resetmask)) + return 0; + tri_reg |= TRISTATE_BIT; + bus_reg |= resetmask; + break; + } + pci_write_config_byte(hwif->pci_dev, 0x59, bus_reg); + pci_write_config_word(hwif->pci_dev, tristate, tri_reg); + + return 0; +} + +static void __init init_hpt370(struct pci_dev *dev) +{ + int adjust, i; + u16 freq; + u32 pll; + byte reg5bh; + + /* + * default to pci clock. make sure MA15/16 are set to output + * to prevent drives having problems with 40-pin cables. + */ + pci_write_config_byte(dev, 0x5b, 0x23); + + /* + * set up the PLL. we need to adjust it so that it's stable. + * freq = Tpll * 192 / Tpci + */ + pci_read_config_word(dev, 0x78, &freq); + freq &= 0x1FF; + if (freq < 0x9c) { + pll = F_LOW_PCI_33; + dev->sysdata = (void *) thirty_three_base_hpt370; + printk("HPT370: using 33MHz PCI clock\n"); + } else if (freq < 0xb0) { + pll = F_LOW_PCI_40; + } else if (freq < 0xc8) { + pll = F_LOW_PCI_50; + dev->sysdata = (void *) fifty_base_hpt370; + printk("HPT370: using 50MHz PCI clock\n"); + } else { + pll = F_LOW_PCI_66; + dev->sysdata = (void *) sixty_six_base_hpt370; + printk("HPT370: using 66MHz PCI clock\n"); + } + + /* + * only try the pll if we don't have a table for the clock + * speed that we're running at. NOTE: the internal PLL will + * result in slow reads when using a 33MHz PCI clock. we also + * don't like to use the PLL because it will cause glitches + * on PRST/SRST when the HPT state engine gets reset. + */ + if (dev->sysdata) + goto init_hpt370_done; + + /* + * adjust PLL based upon PCI clock, enable it, and wait for + * stabilization. + */ + adjust = 0; + freq = (pll < F_LOW_PCI_50) ? 2 : 4; + while (adjust++ < 6) { + pci_write_config_dword(dev, 0x5c, (freq + pll) << 16 | + pll | 0x100); + + /* wait for clock stabilization */ + for (i = 0; i < 0x50000; i++) { + pci_read_config_byte(dev, 0x5b, ®5bh); + if (reg5bh & 0x80) { + /* spin looking for the clock to destabilize */ + for (i = 0; i < 0x1000; ++i) { + pci_read_config_byte(dev, 0x5b, + ®5bh); + if ((reg5bh & 0x80) == 0) + goto pll_recal; + } + pci_read_config_dword(dev, 0x5c, &pll); + pci_write_config_dword(dev, 0x5c, + pll & ~0x100); + pci_write_config_byte(dev, 0x5b, 0x21); + dev->sysdata = (void *) fifty_base_hpt370; + printk("HPT370: using 50MHz internal PLL\n"); + goto init_hpt370_done; + } + } +pll_recal: + if (adjust & 1) + pll -= (adjust >> 1); + else + pll += (adjust >> 1); + } + +init_hpt370_done: + /* reset state engine */ + pci_write_config_byte(dev, 0x50, 0x37); + pci_write_config_byte(dev, 0x54, 0x37); + udelay(100); +} + unsigned int __init pci_init_hpt366 (struct pci_dev *dev, const char *name) { byte test = 0; @@ -652,14 +1103,8 @@ pci_write_config_byte(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE); pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &test); - -#if 0 - if (test != 0x08) - pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 0x08); -#else if (test != (L1_CACHE_BYTES / 4)) pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4)); -#endif pci_read_config_byte(dev, PCI_LATENCY_TIMER, &test); if (test != 0x78) @@ -673,17 +1118,18 @@ if (test != 0x08) pci_write_config_byte(dev, PCI_MAX_LAT, 0x08); + if (pci_rev_check_hpt3xx(dev)) { + init_hpt370(dev); + hpt_devs[n_hpt_devs++] = dev; + } else { + hpt_devs[n_hpt_devs++] = dev; + } + #if defined(DISPLAY_HPT366_TIMINGS) && defined(CONFIG_PROC_FS) if (!hpt366_proc) { hpt366_proc = 1; - bmide_dev = dev; - if (pci_rev_check_hpt3xx(dev)) - bmide2_dev = dev; hpt366_display_info = &hpt366_get_info; } - if ((hpt366_proc) && ((dev->devfn - bmide_dev->devfn) == 1)) { - bmide2_dev = dev; - } #endif /* DISPLAY_HPT366_TIMINGS && CONFIG_PROC_FS */ return dev->irq; @@ -691,38 +1137,58 @@ unsigned int __init ata66_hpt366 (ide_hwif_t *hwif) { - byte ata66 = 0; + byte ata66 = 0; + byte regmask = (hwif->channel) ? 0x01 : 0x02; pci_read_config_byte(hwif->pci_dev, 0x5a, &ata66); #ifdef DEBUG printk("HPT366: reg5ah=0x%02x ATA-%s Cable Port%d\n", - ata66, (ata66 & 0x02) ? "33" : "66", + ata66, (ata66 & regmask) ? "33" : "66", PCI_FUNC(hwif->pci_dev->devfn)); #endif /* DEBUG */ - return ((ata66 & 0x02) ? 0 : 1); + return ((ata66 & regmask) ? 0 : 1); } void __init ide_init_hpt366 (ide_hwif_t *hwif) { + int hpt_rev; + hwif->tuneproc = &hpt3xx_tune_drive; hwif->speedproc = &hpt3xx_tune_chipset; hwif->quirkproc = &hpt3xx_quirkproc; hwif->intrproc = &hpt3xx_intrproc; hwif->maskproc = &hpt3xx_maskproc; +#ifdef HPT_SERIALIZE_IO + /* serialize access to this device */ + if (hwif->mate) + hwif->serialized = hwif->mate->serialized = 1; +#endif + + hpt_rev = pci_rev_check_hpt3xx(hwif->pci_dev); + if (hpt_rev) { + /* set up ioctl for power status. note: power affects both + * drives on each channel */ + hwif->busproc = &hpt370_busproc; + } + if (pci_rev2_check_hpt3xx(hwif->pci_dev)) { /* do nothing now but will split device types */ + hwif->resetproc = &hpt3xx_reset; +/* + * don't do until we can parse out the cobalt box argh ... + * hwif->busproc = &hpt3xx_tristate; + */ } #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { - if (pci_rev_check_hpt3xx(hwif->pci_dev)) { + if (hpt_rev) { byte reg5ah = 0; pci_read_config_byte(hwif->pci_dev, 0x5a, ®5ah); if (reg5ah & 0x10) /* interrupt force enable */ pci_write_config_byte(hwif->pci_dev, 0x5a, reg5ah & ~0x10); hwif->dmaproc = &hpt370_dmaproc; - hwif->rwproc = &hpt370_rw_proc; } else { hwif->dmaproc = &hpt366_dmaproc; } diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-cd.c linux-2.4.17-rc2-wli2/drivers/ide/ide-cd.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-cd.c Wed Oct 24 23:53:51 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-cd.c Thu Dec 20 19:49:13 2001 @@ -2956,11 +2956,7 @@ return 0; } -static -int ide_cdrom_reinit (ide_drive_t *drive) -{ - return 0; -} +int ide_cdrom_reinit (ide_drive_t *drive); static ide_driver_t ide_cdrom_driver = { name: "ide-cdrom", @@ -2970,6 +2966,8 @@ supports_dma: 1, supports_dsc_overlap: 1, cleanup: ide_cdrom_cleanup, + standby: NULL, + flushcache: NULL, do_request: ide_do_rw_cdrom, end_request: NULL, ioctl: ide_cdrom_ioctl, @@ -2981,7 +2979,9 @@ capacity: ide_cdrom_capacity, special: NULL, proc: NULL, - driver_reinit: ide_cdrom_reinit, + reinit: ide_cdrom_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, }; int ide_cdrom_init(void); @@ -2997,6 +2997,39 @@ MODULE_PARM(ignore, "s"); MODULE_DESCRIPTION("ATAPI CD-ROM Driver"); + +int ide_cdrom_reinit (ide_drive_t *drive) +{ + struct cdrom_info *info; + int failed = 0; + + MOD_INC_USE_COUNT; + info = (struct cdrom_info *) kmalloc (sizeof (struct cdrom_info), GFP_KERNEL); + if (info == NULL) { + printk ("%s: Can't allocate a cdrom structure\n", drive->name); + return 1; + } + if (ide_register_subdriver (drive, &ide_cdrom_driver, IDE_SUBDRIVER_VERSION)) { + printk ("%s: Failed to register the driver with ide.c\n", drive->name); + kfree (info); + return 1; + } + memset (info, 0, sizeof (struct cdrom_info)); + drive->driver_data = info; + DRIVER(drive)->busy++; + if (ide_cdrom_setup (drive)) { + DRIVER(drive)->busy--; + if (ide_cdrom_cleanup (drive)) + printk ("%s: ide_cdrom_cleanup failed in ide_cdrom_init\n", drive->name); + return 1; + } + DRIVER(drive)->busy--; + failed--; + + ide_register_module(&ide_cdrom_module); + MOD_DEC_USE_COUNT; + return 0; +} static void __exit ide_cdrom_exit(void) { diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-cd.h linux-2.4.17-rc2-wli2/drivers/ide/ide-cd.h --- linux-2.4.17-rc2-virgin/drivers/ide/ide-cd.h Thu Nov 22 11:46:58 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-cd.h Thu Dec 20 19:49:13 2001 @@ -38,7 +38,9 @@ /************************************************************************/ #define SECTOR_BITS 9 +#ifndef SECTOR_SIZE #define SECTOR_SIZE (1 << SECTOR_BITS) +#endif #define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS) #define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32) #define SECTORS_BUFFER (SECTOR_BUFFER_SIZE >> SECTOR_BITS) diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-cs.c linux-2.4.17-rc2-wli2/drivers/ide/ide-cs.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-cs.c Sun Sep 30 12:26:05 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-cs.c Thu Dec 20 19:49:13 2001 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,15 @@ #define CFG_CHECK(fn, args...) \ if (CardServices(fn, args) != 0) goto next_entry +int idecs_register (int arg1, int arg2, int irq) +{ + hw_regs_t hw; + ide_init_hwif_ports(&hw, (ide_ioreg_t) arg1, (ide_ioreg_t) arg2, NULL); + hw.irq = irq; + hw.chipset = ide_pci; /* this enables IRQ sharing w/ PCI irqs */ + return ide_register_hw(&hw, NULL); +} + void ide_config(dev_link_t *link) { client_handle_t handle = link->handle; @@ -329,10 +339,14 @@ /* retry registration in case device is still spinning up */ for (i = 0; i < 10; i++) { - hd = ide_register(io_base, ctl_base, link->irq.AssignedIRQ); + if (ctl_base) + outb(0x02, ctl_base); /* Set nIEN = disable device interrupts */ + hd = idecs_register(io_base, ctl_base, link->irq.AssignedIRQ); if (hd >= 0) break; if (link->io.NumPorts1 == 0x20) { - hd = ide_register(io_base+0x10, ctl_base+0x10, + if (ctl_base) + outb(0x02, ctl_base+0x10); + hd = idecs_register(io_base+0x10, ctl_base+0x10, link->irq.AssignedIRQ); if (hd >= 0) { io_base += 0x10; ctl_base += 0x10; diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-disk.c linux-2.4.17-rc2-wli2/drivers/ide/ide-disk.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-disk.c Tue Dec 18 21:21:42 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-disk.c Thu Dec 20 19:49:13 2001 @@ -27,9 +27,11 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 added 48-bit lba + * Version 1.12 adding taskfile io access method */ -#define IDEDISK_VERSION "1.10" +#define IDEDISK_VERSION "1.12" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -59,6 +61,14 @@ #define IS_PDC4030_DRIVE (0) /* auto-NULLs out pdc4030 code */ #endif +#ifdef CONFIG_IDE_TASKFILE_IO +# undef __TASKFILE__IO /* define __TASKFILE__IO */ +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#ifndef __TASKFILE__IO + static void idedisk_bswap_data (void *buffer, int wcount) { u16 *p = buffer; @@ -86,6 +96,8 @@ ide_output_data(drive, buffer, wcount); } +#endif /* __TASKFILE__IO */ + /* * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" * value for this drive (from its reported identification information). @@ -99,6 +111,11 @@ { unsigned long lba_sects, chs_sects, head, tail; + if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) { + printk("48-bit Drive: %llu \n", id->lba_capacity_2); + return 1; + } + /* * The ATA spec tells large drives to return * C/H/S = 16383/16/63 independent of their size. @@ -131,6 +148,8 @@ return 0; /* lba_capacity value may be bad */ } +#ifndef __TASKFILE__IO + /* * read_intr() is the handler for disk read/multread interrupts */ @@ -313,53 +332,218 @@ } return ide_error(drive, "multwrite_intr", stat); } +#endif /* __TASKFILE__IO */ + +#ifdef __TASKFILE__IO + +static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block); +static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block); +static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block); /* - * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. + * do_rw_disk() issues READ and WRITE commands to a disk, + * using LBA if supported, or CHS otherwise, to address sectors. + * It also takes care of issuing special DRIVE_CMDs. */ -static ide_startstop_t set_multmode_intr (ide_drive_t *drive) +static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { - byte stat; + if (rq->cmd == READ) + goto good_command; + if (rq->cmd == WRITE) + goto good_command; - if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) { - drive->mult_count = drive->mult_req; - } else { - drive->mult_req = drive->mult_count = 0; - drive->special.b.recalibrate = 1; - (void) ide_dump_status(drive, "set_multmode", stat); - } + printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); return ide_stopped; + +good_command: + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long); + return promise_rw_disk(drive, rq, block); + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ + + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) /* 48-bit LBA */ + return lba_48_rw_disk(drive, rq, (unsigned long long) block); + if (drive->select.b.lba) /* 28-bit LBA */ + return lba_28_rw_disk(drive, rq, (unsigned long) block); + + /* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */ + return chs_rw_disk(drive, rq, (unsigned long) block); } -/* - * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. - */ -static ide_startstop_t set_geometry_intr (ide_drive_t *drive) +static task_ioreg_t get_command (ide_drive_t *drive, int cmd) { - byte stat; + int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0; - if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) - return ide_stopped; +#if 1 + lba48bit = drive->addressing; +#endif - if (stat & (ERR_STAT|DRQ_STAT)) - return ide_error(drive, "set_geometry_intr", stat); + if ((cmd == READ) && (drive->using_dma)) + return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA; + else if ((cmd == READ) && (drive->mult_count)) + return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD; + else if (cmd == READ) + return (lba48bit) ? WIN_READ_EXT : WIN_READ; + else if ((cmd == WRITE) && (drive->using_dma)) + return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA; + else if ((cmd == WRITE) && (drive->mult_count)) + return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE; + else if (cmd == WRITE) + return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE; + else + return WIN_NOP; +} + +static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + unsigned int track = (block / drive->sect); + unsigned int sect = (block % drive->sect) + 1; + unsigned int head = (track % drive->head); + unsigned int cyl = (track / drive->head); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors; + taskfile.sector_number = sect; + taskfile.low_cylinder = cyl; + taskfile.high_cylinder = (cyl>>8); + taskfile.device_head = head; + taskfile.device_head |= drive->select.all; + taskfile.command = command; + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors; + taskfile.sector_number = block; + taskfile.low_cylinder = (block>>=8); + taskfile.high_cylinder = (block>>=8); + taskfile.device_head = ((block>>8)&0x0f); + taskfile.device_head |= drive->select.all; + taskfile.command = command; + + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif - ide_set_handler(drive, &set_geometry_intr, WAIT_CMD, NULL); - return ide_started; + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); } /* - * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. + * 268435455 == 137439 MB or 28bit limit + * 320173056 == 163929 MB or 48bit addressing + * 1073741822 == 549756 MB or 48bit addressing fake drive */ -static ide_startstop_t recal_intr (ide_drive_t *drive) + +static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block) { - byte stat = GET_STAT(); + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = rq->nr_sectors; + hobfile.sector_count = (rq->nr_sectors>>8); + + if (rq->nr_sectors == 65536) { + taskfile.sector_count = 0x00; + hobfile.sector_count = 0x00; + } + + taskfile.sector_number = block; /* low lba */ + taskfile.low_cylinder = (block>>=8); /* mid lba */ + taskfile.high_cylinder = (block>>=8); /* hi lba */ + hobfile.sector_number = (block>>=8); /* low lba */ + hobfile.low_cylinder = (block>>=8); /* mid lba */ + hobfile.high_cylinder = (block>>=8); /* hi lba */ + taskfile.device_head = drive->select.all; + hobfile.device_head = taskfile.device_head; + hobfile.control = (drive->ctl|0x80); + taskfile.command = command; - if (!OK_STAT(stat,READY_STAT,BAD_STAT)) - return ide_error(drive, "recal_intr", stat); - return ide_stopped; +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); } +#else /* !__TASKFILE__IO */ /* * do_rw_disk() issues READ and WRITE commands to a disk, * using LBA if supported, or CHS otherwise, to address sectors. @@ -369,22 +553,70 @@ { if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); - OUT_BYTE(0x00, IDE_FEATURE_REG); - OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG); + #ifdef CONFIG_BLK_DEV_PDC4030 if (drive->select.b.lba || IS_PDC4030_DRIVE) { #else /* !CONFIG_BLK_DEV_PDC4030 */ if (drive->select.b.lba) { #endif /* CONFIG_BLK_DEV_PDC4030 */ + + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + task_ioreg_t tasklets[10]; + + tasklets[0] = 0; + tasklets[1] = 0; + tasklets[2] = rq->nr_sectors; + tasklets[3] = (rq->nr_sectors>>8); + if (rq->nr_sectors == 65536) { + tasklets[2] = 0x00; + tasklets[3] = 0x00; + } + tasklets[4] = (task_ioreg_t) block; + tasklets[5] = (task_ioreg_t) (block>>8); + tasklets[6] = (task_ioreg_t) (block>>16); + tasklets[7] = (task_ioreg_t) (block>>24); + tasklets[8] = (task_ioreg_t) 0; + tasklets[9] = (task_ioreg_t) 0; +// tasklets[8] = (task_ioreg_t) (block>>32); +// tasklets[9] = (task_ioreg_t) (block>>40); #ifdef DEBUG - printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", - drive->name, (rq->cmd==READ)?"read":"writ", - block, rq->nr_sectors, (unsigned long) rq->buffer); -#endif - OUT_BYTE(block,IDE_SECTOR_REG); - OUT_BYTE(block>>=8,IDE_LCYL_REG); - OUT_BYTE(block>>=8,IDE_HCYL_REG); - OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG); + printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n", + drive->name, + (rq->cmd==READ)?"read":"writ", + block, + rq->nr_sectors, + (unsigned long) rq->buffer, + block); + printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n", + drive->name, tasklets[3], tasklets[2], + tasklets[9], tasklets[8], tasklets[7], + tasklets[6], tasklets[5], tasklets[4]); +#endif + OUT_BYTE(tasklets[1], IDE_FEATURE_REG); + OUT_BYTE(tasklets[3], IDE_NSECTOR_REG); + OUT_BYTE(tasklets[7], IDE_SECTOR_REG); + OUT_BYTE(tasklets[8], IDE_LCYL_REG); + OUT_BYTE(tasklets[9], IDE_HCYL_REG); + + OUT_BYTE(tasklets[0], IDE_FEATURE_REG); + OUT_BYTE(tasklets[2], IDE_NSECTOR_REG); + OUT_BYTE(tasklets[4], IDE_SECTOR_REG); + OUT_BYTE(tasklets[5], IDE_LCYL_REG); + OUT_BYTE(tasklets[6], IDE_HCYL_REG); + OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG); + } else { +#ifdef DEBUG + printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", + block, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + OUT_BYTE(0x00, IDE_FEATURE_REG); + OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG); + OUT_BYTE(block,IDE_SECTOR_REG); + OUT_BYTE(block>>=8,IDE_LCYL_REG); + OUT_BYTE(block>>=8,IDE_HCYL_REG); + OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG); + } } else { unsigned int sect,head,cyl,track; track = block / drive->sect; @@ -392,6 +624,9 @@ OUT_BYTE(sect,IDE_SECTOR_REG); head = track % drive->head; cyl = track / drive->head; + + OUT_BYTE(0x00, IDE_FEATURE_REG); + OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG); OUT_BYTE(cyl,IDE_LCYL_REG); OUT_BYTE(cyl>>8,IDE_HCYL_REG); OUT_BYTE(head|drive->select.all,IDE_SELECT_REG); @@ -413,7 +648,11 @@ return ide_started; #endif /* CONFIG_BLK_DEV_IDEDMA */ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); - OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG); + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG); + } return ide_started; } if (rq->cmd == WRITE) { @@ -422,7 +661,11 @@ if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive))) return ide_started; #endif /* CONFIG_BLK_DEV_IDEDMA */ - OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG); + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG); + } if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, drive->mult_count ? "MULTWRITE" : "WRITE"); @@ -432,17 +675,17 @@ __cli(); /* local CPU only */ if (drive->mult_count) { ide_hwgroup_t *hwgroup = HWGROUP(drive); - /* - * Ugh.. this part looks ugly because we MUST set up - * the interrupt handler before outputting the first block - * of data to be written. If we hit an error (corrupted buffer list) - * in ide_multwrite(), then we need to remove the handler/timer - * before returning. Fortunately, this NEVER happens (right?). - * - * Except when you get an error it seems... - */ + /* + * Ugh.. this part looks ugly because we MUST set up + * the interrupt handler before outputting the first block + * of data to be written. If we hit an error (corrupted buffer list) + * in ide_multwrite(), then we need to remove the handler/timer + * before returning. Fortunately, this NEVER happens (right?). + * + * Except when you get an error it seems... + */ hwgroup->wrq = *rq; /* scratchpad */ - ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); + ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { unsigned long flags; spin_lock_irqsave(&io_request_lock, flags); @@ -462,29 +705,47 @@ return ide_stopped; } +#endif /* __TASKFILE__IO */ + static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive) { MOD_INC_USE_COUNT; if (drive->removable && drive->usage == 1) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_DOORLOCK; check_disk_change(inode->i_rdev); /* * Ignore the return code from door_lock, * since the open() has already succeeded, * and the door_lock is irrelevant at this point. */ - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL)) drive->doorlocking = 0; } return 0; } +static int do_idedisk_flushcache(ide_drive_t *drive); + static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive) { if (drive->removable && !drive->usage) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_DOORUNLOCK; invalidate_bdev(inode->i_bdev, 0); - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL)) drive->doorlocking = 0; } + if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) + if (do_idedisk_flushcache(drive)) + printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", + drive->name); MOD_DEC_USE_COUNT; } @@ -501,27 +762,234 @@ } /* + * Queries for true maximum capacity of the drive. + * Returns maximum LBA address (> 0) of the drive, 0 if failed. + */ +static unsigned long idedisk_read_native_max_address(ide_drive_t *drive) +{ + ide_task_t args; + unsigned long addr = 0; + + if (!(drive->id->command_set_1 & 0x0400) && + !(drive->id->cfs_enable_2 & 0x0100)) + return addr; + + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX; + args.handler = task_no_data_intr; + + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24) + | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16) + | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8) + | ((args.tfRegister[IDE_SECTOR_OFFSET] )); + } + addr++; /* since the return value is (maxlba - 1), we add 1 */ + return addr; +} + +static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive) +{ + ide_task_t args; + unsigned long long addr = 0; + + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX_EXT; + args.handler = task_no_data_intr; + + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) | + ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) | + (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); + u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) | + ((args.tfRegister[IDE_LCYL_OFFSET])<<8) | + (args.tfRegister[IDE_SECTOR_OFFSET]); + addr = ((__u64)high << 24) | low; + } + addr++; /* since the return value is (maxlba - 1), we add 1 */ + return addr; +} + +#ifdef CONFIG_IDEDISK_STROKE +/* + * Sets maximum virtual LBA address of the drive. + * Returns new maximum virtual LBA address (> 0) or 0 on failure. + */ +static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req) +{ + ide_task_t args; + unsigned long addr_set = 0; + + addr_req--; + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff); + args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >> 8) & 0xff); + args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >> 16) & 0xff); + args.tfRegister[IDE_SELECT_OFFSET] = ((addr_req >> 24) & 0x0f) | 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX; + args.handler = task_no_data_intr; + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + /* if OK, read new maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24) + | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16) + | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8) + | ((args.tfRegister[IDE_SECTOR_OFFSET] )); + } + addr_set++; + return addr_set; +} + +static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req) +{ + ide_task_t args; + unsigned long long addr_set = 0; + + addr_req--; + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff); + args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >>= 8) & 0xff); + args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >>= 8) & 0xff); + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX_EXT; + args.hobRegister[IDE_SECTOR_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_LCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_HCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_SELECT_OFFSET_HOB] = 0x40; + args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80); + args.handler = task_no_data_intr; + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) | + ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) | + (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); + u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) | + ((args.tfRegister[IDE_LCYL_OFFSET])<<8) | + (args.tfRegister[IDE_SECTOR_OFFSET]); + addr_set = ((__u64)high << 24) | low; + } + return addr_set; +} + +/* + * Tests if the drive supports Host Protected Area feature. + * Returns true if supported, false otherwise. + */ +static inline int idedisk_supports_host_protected_area(ide_drive_t *drive) +{ + int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0; + printk("%s: host protected area => %d\n", drive->name, flag); + return flag; +} + +#endif /* CONFIG_IDEDISK_STROKE */ + +/* * Compute drive->capacity, the full capacity of the drive * Called with drive->id != NULL. + * + * To compute capacity, this uses either of + * + * 1. CHS value set by user (whatever user sets will be trusted) + * 2. LBA value from target drive (require new ATA feature) + * 3. LBA value from system BIOS (new one is OK, old one may break) + * 4. CHS value from system BIOS (traditional style) + * + * in above order (i.e., if value of higher priority is available, + * reset will be ignored). */ static void init_idedisk_capacity (ide_drive_t *drive) { struct hd_driveid *id = drive->id; unsigned long capacity = drive->cyl * drive->head * drive->sect; + unsigned long set_max = idedisk_read_native_max_address(drive); + unsigned long long capacity_2 = capacity; + unsigned long long set_max_ext; + drive->capacity48 = 0; drive->select.b.lba = 0; + if (id->cfs_enable_2 & 0x0400) { + capacity_2 = id->lba_capacity_2; + drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect); + drive->head = drive->bios_head = 255; + drive->sect = drive->bios_sect = 63; + drive->select.b.lba = 1; + set_max_ext = idedisk_read_native_max_address_ext(drive); + if (set_max_ext > capacity_2) { +#ifdef CONFIG_IDEDISK_STROKE + set_max_ext = idedisk_read_native_max_address_ext(drive); + set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext); + if (set_max_ext) { + drive->capacity48 = capacity_2 = set_max_ext; + drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect); + drive->select.b.lba = 1; + drive->id->lba_capacity_2 = capacity_2; + } +#else /* !CONFIG_IDEDISK_STROKE */ + printk("%s: setmax_ext LBA %llu, native %llu\n", + drive->name, set_max_ext, capacity_2); +#endif /* CONFIG_IDEDISK_STROKE */ + } + drive->bios_cyl = drive->cyl; + drive->capacity48 = capacity_2; + drive->capacity = (unsigned long) capacity_2; + return; /* Determine capacity, and use LBA if the drive properly supports it */ - if ((id->capability & 2) && lba_capacity_is_ok(id)) { + } else if ((id->capability & 2) && lba_capacity_is_ok(id)) { capacity = id->lba_capacity; drive->cyl = capacity / (drive->head * drive->sect); drive->select.b.lba = 1; } + + if (set_max > capacity) { +#ifdef CONFIG_IDEDISK_STROKE + set_max = idedisk_read_native_max_address(drive); + set_max = idedisk_set_max_address(drive, set_max); + if (set_max) { + drive->capacity = capacity = set_max; + drive->cyl = set_max / (drive->head * drive->sect); + drive->select.b.lba = 1; + drive->id->lba_capacity = capacity; + } +#else /* !CONFIG_IDEDISK_STROKE */ + printk("%s: setmax LBA %lu, native %lu\n", + drive->name, set_max, capacity); +#endif /* CONFIG_IDEDISK_STROKE */ + } + drive->capacity = capacity; + + if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) { + drive->capacity48 = id->lba_capacity_2; + drive->head = 255; + drive->sect = 63; + drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect); + } } -static unsigned long idedisk_capacity (ide_drive_t *drive) +static unsigned long idedisk_capacity (ide_drive_t *drive) { + if (drive->id->cfs_enable_2 & 0x0400) + return (drive->capacity48 - drive->sect0); return (drive->capacity - drive->sect0); } @@ -530,23 +998,48 @@ special_t *s = &drive->special; if (s->b.set_geometry) { - s->b.set_geometry = 0; - OUT_BYTE(drive->sect,IDE_SECTOR_REG); - OUT_BYTE(drive->cyl,IDE_LCYL_REG); - OUT_BYTE(drive->cyl>>8,IDE_HCYL_REG); - OUT_BYTE(((drive->head-1)|drive->select.all)&0xBF,IDE_SELECT_REG); - if (!IS_PDC4030_DRIVE) - ide_cmd(drive, WIN_SPECIFY, drive->sect, &set_geometry_intr); + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_handler_t *handler = NULL; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + s->b.set_geometry = 0; + taskfile.sector_number = drive->sect; + taskfile.low_cylinder = drive->cyl; + taskfile.high_cylinder = drive->cyl>>8; + taskfile.device_head = ((drive->head-1)|drive->select.all)&0xBF; + if (!IS_PDC4030_DRIVE) { + taskfile.sector_count = drive->sect; + taskfile.command = WIN_SPECIFY; + handler = ide_handler_parser(&taskfile, &hobfile); + } + do_taskfile(drive, &taskfile, &hobfile, handler); } else if (s->b.recalibrate) { s->b.recalibrate = 0; - if (!IS_PDC4030_DRIVE) - ide_cmd(drive, WIN_RESTORE, drive->sect, &recal_intr); + if (!IS_PDC4030_DRIVE) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->sect; + taskfile.command = WIN_RESTORE; + do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile)); + } } else if (s->b.set_multmode) { s->b.set_multmode = 0; if (drive->id && drive->mult_req > drive->id->max_multsect) drive->mult_req = drive->id->max_multsect; - if (!IS_PDC4030_DRIVE) - ide_cmd(drive, WIN_SETMULT, drive->mult_req, &set_multmode_intr); + if (!IS_PDC4030_DRIVE) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->mult_req; + taskfile.command = WIN_SETMULT; + do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile)); + } } else if (s->all) { int special = s->all; s->all = 0; @@ -558,9 +1051,11 @@ static void idedisk_pre_reset (ide_drive_t *drive) { + int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1; + drive->special.all = 0; - drive->special.b.set_geometry = 1; - drive->special.b.recalibrate = 1; + drive->special.b.set_geometry = legacy; + drive->special.b.recalibrate = legacy; if (OK_TO_RESET_CONTROLLER) drive->mult_count = 0; if (!drive->keep_settings && !drive->using_dma) @@ -573,19 +1068,45 @@ static int smart_enable(ide_drive_t *drive) { - return ide_wait_cmd(drive, WIN_SMART, 0, SMART_ENABLE, 0, NULL); + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_ENABLE; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); } static int get_smart_values(ide_drive_t *drive, byte *buf) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_READ_VALUES; + taskfile.sector_count = 0x01; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; (void) smart_enable(drive); - return ide_wait_cmd(drive, WIN_SMART, 0, SMART_READ_VALUES, 1, buf); + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); } static int get_smart_thresholds(ide_drive_t *drive, byte *buf) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_READ_THRESHOLDS; + taskfile.sector_count = 0x01; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; (void) smart_enable(drive); - return ide_wait_cmd(drive, WIN_SMART, 0, SMART_READ_THRESHOLDS, 1, buf); + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); } static int proc_idedisk_read_cache @@ -609,7 +1130,7 @@ int len = 0, i = 0; if (!get_smart_thresholds(drive, page)) { - unsigned short *val = ((unsigned short *)page) + 2; + unsigned short *val = (unsigned short *) page; char *out = ((char *)val) + (SECTOR_WORDS * 4); page = out; do { @@ -628,7 +1149,7 @@ int len = 0, i = 0; if (!get_smart_values(drive, page)) { - unsigned short *val = ((unsigned short *)page) + 2; + unsigned short *val = (unsigned short *) page; char *out = ((char *)val) + (SECTOR_WORDS * 4); page = out; do { @@ -656,14 +1177,31 @@ static int set_multcount(ide_drive_t *drive, int arg) { +#ifdef __TASKFILE__IO + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + + if (drive->special.b.set_multmode) + return -EBUSY; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->mult_req; + taskfile.command = WIN_SETMULT; + drive->mult_req = arg; + drive->special.b.set_multmode = 1; + ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +#else /* !__TASKFILE__IO */ struct request rq; if (drive->special.b.set_multmode) return -EBUSY; ide_init_drive_cmd (&rq); + rq.cmd = IDE_DRIVE_CMD; drive->mult_req = arg; drive->special.b.set_multmode = 1; (void) ide_do_drive_cmd (drive, &rq, ide_wait); +#endif /* __TASKFILE__IO */ return (drive->mult_count == arg) ? 0 : -EIO; } @@ -677,6 +1215,79 @@ return 0; } +static int write_cache (ide_drive_t *drive, int arg) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; + taskfile.command = WIN_SETFEATURES; + + if (!(drive->id->cfs_enable_2 & 0x3000)) + return 1; + + (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); + drive->wcache = arg; + return 0; +} + +static int do_idedisk_standby (ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_STANDBYNOW1; + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int do_idedisk_flushcache (ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + if (drive->id->cfs_enable_2 & 0x2400) { + taskfile.command = WIN_FLUSH_CACHE_EXT; + } else { + taskfile.command = WIN_FLUSH_CACHE; + } + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int set_acoustic (ide_drive_t *drive, int arg) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + taskfile.feature = (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM; + taskfile.sector_count = arg; + + taskfile.command = WIN_SETFEATURES; + (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); + drive->acoustic = arg; + return 0; +} + +static int probe_lba_addressing (ide_drive_t *drive, int arg) +{ + drive->addressing = 0; + + if (!(drive->id->cfs_enable_2 & 0x0400)) + return -EIO; + + drive->addressing = arg; + return 0; +} + +static int set_lba_addressing (ide_drive_t *drive, int arg) +{ + return (probe_lba_addressing(drive, arg)); +} + static void idedisk_add_settings(ide_drive_t *drive) { struct hd_driveid *id = drive->id; @@ -686,15 +1297,18 @@ ide_add_setting(drive, "bios_cyl", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->bios_cyl, NULL); ide_add_setting(drive, "bios_head", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); + ide_add_setting(drive, "address", SETTING_RW, HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, TYPE_INTA, 0, 2, 1, 1, &drive->addressing, set_lba_addressing); ide_add_setting(drive, "bswap", SETTING_READ, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->bswap, NULL); - ide_add_setting(drive, "multcount", id ? SETTING_RW : SETTING_READ, HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, TYPE_BYTE, 0, id ? id->max_multsect : 0, 1, 2, &drive->mult_count, set_multcount); + ide_add_setting(drive, "multcount", id ? SETTING_RW : SETTING_READ, HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, TYPE_BYTE, 0, id ? id->max_multsect : 0, 1, 1, &drive->mult_count, set_multcount); ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); - ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); + ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 1, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); + ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 1, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); - ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); - ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); + ide_add_setting(drive, "wcache", SETTING_RW, HDIO_GET_WCACHE, HDIO_SET_WCACHE, TYPE_BYTE, 0, 1, 1, 1, &drive->wcache, write_cache); + ide_add_setting(drive, "acoustic", SETTING_RW, HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, TYPE_BYTE, 0, 254, 1, 1, &drive->acoustic, set_acoustic); + ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); + ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); } static void idedisk_setup (ide_drive_t *drive) @@ -764,7 +1378,6 @@ if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) && (!drive->forced_geom) && drive->bios_sect && drive->bios_head) drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head; - printk (KERN_INFO "%s: %ld sectors", drive->name, capacity); /* Give size in megabytes (MB), not mebibytes (MiB). */ @@ -796,21 +1409,25 @@ drive->mult_req = id->max_multsect; if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) drive->special.b.set_multmode = 1; -#endif +#endif /* CONFIG_IDEDISK_MULTI_MODE */ } drive->no_io_32bit = id->dword_io ? 1 : 0; -} - -static int idedisk_reinit (ide_drive_t *drive) -{ - return 0; + if (drive->id->cfs_enable_2 & 0x3000) + write_cache(drive, (id->cfs_enable_2 & 0x3000)); + (void) probe_lba_addressing(drive, 1); } static int idedisk_cleanup (ide_drive_t *drive) { + if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) + if (do_idedisk_flushcache(drive)) + printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", + drive->name); return ide_unregister_subdriver(drive); } +int idedisk_reinit(ide_drive_t *drive); + /* * IDE subdriver functions, registered with ide.c */ @@ -822,6 +1439,8 @@ supports_dma: 1, supports_dsc_overlap: 0, cleanup: idedisk_cleanup, + standby: do_idedisk_standby, + flushcache: do_idedisk_flushcache, do_request: do_rw_disk, end_request: NULL, ioctl: NULL, @@ -833,7 +1452,9 @@ capacity: idedisk_capacity, special: idedisk_special, proc: idedisk_proc, - driver_reinit: idedisk_reinit, + reinit: idedisk_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, }; int idedisk_init (void); @@ -846,6 +1467,32 @@ MODULE_DESCRIPTION("ATA DISK Driver"); +int idedisk_reinit (ide_drive_t *drive) +{ + int failed = 0; + + MOD_INC_USE_COUNT; + + if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); + return 1; + } + DRIVER(drive)->busy++; + idedisk_setup(drive); + if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { + printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); + (void) idedisk_cleanup(drive); + DRIVER(drive)->busy--; + return 1; + } + DRIVER(drive)->busy--; + failed--; + + ide_register_module(&idedisk_module); + MOD_DEC_USE_COUNT; + return 0; +} + static void __exit idedisk_exit (void) { ide_drive_t *drive; @@ -877,12 +1524,15 @@ printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); continue; } + DRIVER(drive)->busy++; idedisk_setup(drive); if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); (void) idedisk_cleanup(drive); + DRIVER(drive)->busy--; continue; } + DRIVER(drive)->busy--; failed--; } ide_register_module(&idedisk_module); diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-dma.c linux-2.4.17-rc2-wli2/drivers/ide/ide-dma.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-dma.c Sun Sep 9 10:43:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-dma.c Thu Dec 20 19:49:13 2001 @@ -282,6 +282,37 @@ return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } +static int ide_raw_build_sglist (ide_hwif_t *hwif, struct request *rq) +{ + struct scatterlist *sg = hwif->sg_table; + int nents = 0; + ide_task_t *args = rq->special; + unsigned char *virt_addr = rq->buffer; + int sector_count = rq->nr_sectors; + +// if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA) || +// (args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA_EXT)) + if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE) + hwif->sg_dma_direction = PCI_DMA_TODEVICE; + else + hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; + + if (sector_count > 128) { + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].address = virt_addr; + sg[nents].length = 128 * SECTOR_SIZE; + nents++; + virt_addr = virt_addr + (128 * SECTOR_SIZE); + sector_count -= 128; + } + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].address = virt_addr; + sg[nents].length = sector_count * SECTOR_SIZE; + nents++; + + return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); +} + /* * ide_build_dmatable() prepares a dma request. * Returns 0 if all went okay, returns 1 otherwise. @@ -299,7 +330,10 @@ int i; struct scatterlist *sg; - HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); + if (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) + HWIF(drive)->sg_nents = i = ide_raw_build_sglist(HWIF(drive), HWGROUP(drive)->rq); + else + HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); if (!i) return 0; @@ -429,7 +463,14 @@ struct hd_driveid *id = drive->id; if ((id->field_valid & 4) && (eighty_ninty_three(drive)) && - (id->dma_ultra & (id->dma_ultra >> 11) & 7)) { + (id->dma_ultra & (id->dma_ultra >> 14) & 3)) { + if ((id->dma_ultra >> 15) & 1) { + printk(", UDMA(mode 7)"); /* UDMA BIOS-enabled! */ + } else { + printk(", UDMA(133)"); /* UDMA BIOS-enabled! */ + } + } else if ((id->field_valid & 4) && (eighty_ninty_three(drive)) && + (id->dma_ultra & (id->dma_ultra >> 11) & 7)) { if ((id->dma_ultra >> 13) & 1) { printk(", UDMA(100)"); /* UDMA BIOS-enabled! */ } else if ((id->dma_ultra >> 12) & 1) { @@ -456,14 +497,24 @@ static int config_drive_for_dma (ide_drive_t *drive) { + int config_allows_dma = 1; struct hd_driveid *id = drive->id; ide_hwif_t *hwif = HWIF(drive); - if (id && (id->capability & 1) && hwif->autodma) { +#ifdef CONFIG_IDEDMA_ONLYDISK + if (drive->media != ide_disk) + config_allows_dma = 0; +#endif + + if (id && (id->capability & 1) && hwif->autodma && config_allows_dma) { /* Consult the list of known "bad" drives */ if (ide_dmaproc(ide_dma_bad_drive, drive)) return hwif->dmaproc(ide_dma_off, drive); + /* Enable DMA on any drive that has UltraDMA (mode 6/7/?) enabled */ + if ((id->field_valid & 4) && (eighty_ninty_three(drive))) + if ((id->dma_ultra & (id->dma_ultra >> 14) & 2)) + return hwif->dmaproc(ide_dma_on, drive); /* Enable DMA on any drive that has UltraDMA (mode 3/4/5) enabled */ if ((id->field_valid & 4) && (eighty_ninty_three(drive))) if ((id->dma_ultra & (id->dma_ultra >> 11) & 7)) @@ -495,7 +546,7 @@ printk("%s: dma_timer_expiry: dma status == 0x%02x\n", drive->name, dma_stat); #endif /* DEBUG */ -#if 1 +#if 0 HWGROUP(drive)->expiry = NULL; /* one free ride for now */ #endif @@ -550,7 +601,7 @@ */ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { -// ide_hwgroup_t *hwgroup = HWGROUP(drive); +// ide_hwgroup_t *hwgroup = HWGROUP(drive); ide_hwif_t *hwif = HWIF(drive); unsigned long dma_base = hwif->dma_base; byte unit = (drive->select.b.unit & 0x01); @@ -582,11 +633,20 @@ if (drive->media != ide_disk) return 0; #ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT - ide_set_handler(drive, &ide_dma_intr, WAIT_CMD, NULL); /* issue cmd to drive */ + ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, NULL); /* issue cmd to drive */ #else /* !CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ ide_set_handler(drive, &ide_dma_intr, WAIT_CMD, dma_timer_expiry); /* issue cmd to drive */ #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ - OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG); + if ((HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) && + (drive->addressing == 1)) { + ide_task_t *args = HWGROUP(drive)->rq->special; + OUT_BYTE(args->tfRegister[IDE_COMMAND_OFFSET], IDE_COMMAND_REG); + } else if (drive->addressing) { + OUT_BYTE(reading ? WIN_READDMA_EXT : WIN_WRITEDMA_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG); + } + return HWIF(drive)->dmaproc(ide_dma_begin, drive); case ide_dma_begin: /* Note that this is done *after* the cmd has * been issued to the drive, as per the BM-IDE spec. @@ -604,7 +664,7 @@ return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0; /* verify good DMA status */ case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */ dma_stat = inb(dma_base+2); -#if 0 /* do not set unless you know what you are doing */ +#if 0 /* do not set unless you know what you are doing */ if (dma_stat & 4) { byte stat = GET_STAT(); outb(dma_base+2, dma_stat & 0xE4); @@ -649,6 +709,8 @@ GET_STAT(), dma_stat); return restart_request(drive); // BUG: return types do not match!! +//#else +// return HWGROUP(drive)->handler(drive); #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ case ide_dma_retune: case ide_dma_lostirq: diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-features.c linux-2.4.17-rc2-wli2/drivers/ide/ide-features.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-features.c Fri Feb 9 11:40:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-features.c Thu Dec 20 19:49:13 2001 @@ -127,30 +127,14 @@ case XFER_UDMA_3: return XFER_UDMA_2; case XFER_UDMA_2: return XFER_UDMA_1; case XFER_UDMA_1: return XFER_UDMA_0; + /* + * OOPS we do not goto non Ultra DMA modes + * without iCRC's available we force + * the system to PIO and make the user + * invoke the ATA-1 ATA-2 DMA modes. + */ case XFER_UDMA_0: - if (drive->id->dma_mword & 0x0004) return XFER_MW_DMA_2; - else if (drive->id->dma_mword & 0x0002) return XFER_MW_DMA_1; - else if (drive->id->dma_mword & 0x0001) return XFER_MW_DMA_0; - else return XFER_PIO_4; - case XFER_MW_DMA_2: return XFER_MW_DMA_1; - case XFER_MW_DMA_1: return XFER_MW_DMA_0; - case XFER_MW_DMA_0: - if (drive->id->dma_1word & 0x0004) return XFER_SW_DMA_2; - else if (drive->id->dma_1word & 0x0002) return XFER_SW_DMA_1; - else if (drive->id->dma_1word & 0x0001) return XFER_SW_DMA_0; - else return XFER_PIO_4; - case XFER_SW_DMA_2: return XFER_SW_DMA_1; - case XFER_SW_DMA_1: return XFER_SW_DMA_0; - case XFER_SW_DMA_0: - { - return XFER_PIO_4; - } - case XFER_PIO_4: return XFER_PIO_3; - case XFER_PIO_3: return XFER_PIO_2; - case XFER_PIO_2: return XFER_PIO_1; - case XFER_PIO_1: return XFER_PIO_0; - case XFER_PIO_0: - default: return XFER_PIO_SLOW; + default: return XFER_PIO_4; } } @@ -216,11 +200,11 @@ * in combination with the device (usually a disk) properly detect * and acknowledge each end of the ribbon. */ -int ide_ata66_check (ide_drive_t *drive, byte cmd, byte nsect, byte feature) +int ide_ata66_check (ide_drive_t *drive, ide_task_t *args) { - if ((cmd == WIN_SETFEATURES) && - (nsect > XFER_UDMA_2) && - (feature == SETFEATURES_XFER)) { + if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) && + (args->tfRegister[IDE_SECTOR_OFFSET] > XFER_UDMA_2) && + (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER)) { if (!HWIF(drive)->udma_four) { printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", HWIF(drive)->name); return 1; @@ -243,11 +227,11 @@ * 1 : Safe to update drive->id DMA registers. * 0 : OOPs not allowed. */ -int set_transfer (ide_drive_t *drive, byte cmd, byte nsect, byte feature) +int set_transfer (ide_drive_t *drive, ide_task_t *args) { - if ((cmd == WIN_SETFEATURES) && - (nsect >= XFER_SW_DMA_0) && - (feature == SETFEATURES_XFER) && + if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) && + (args->tfRegister[IDE_SECTOR_OFFSET] >= XFER_SW_DMA_0) && + (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER) && (drive->id->dma_ultra || drive->id->dma_mword || drive->id->dma_1word)) @@ -389,3 +373,4 @@ EXPORT_SYMBOL(set_transfer); EXPORT_SYMBOL(eighty_ninty_three); EXPORT_SYMBOL(ide_config_drive_speed); + diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-floppy.c linux-2.4.17-rc2-wli2/drivers/ide/ide-floppy.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-floppy.c Tue Dec 18 21:21:42 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-floppy.c Thu Dec 20 19:49:13 2001 @@ -71,9 +71,14 @@ * including set_bit patch from Rusty Russel * Ver 0.97 Jul 22 01 Merge 0.91-0.96 onto 0.9.sv for ac series * Ver 0.97.sv Aug 3 01 Backported from 2.4.7-ac3 + * Ver 0.98 Oct 26 01 Split idefloppy_transfer_pc into two pieces to + * fix a lost interrupt problem. It appears the busy + * bit was being deasserted by my IOMEGA ATAPI ZIP 100 + * drive before the drive was actually ready. + * Ver 0.98a Oct 29 01 Expose delay value so we can play. */ -#define IDEFLOPPY_VERSION "0.97.sv" +#define IDEFLOPPY_VERSION "0.98a" #include #include @@ -276,6 +281,7 @@ * Last error information */ byte sense_key, asc, ascq; + byte ticks; /* delay this long before sending packet command */ int progress_indication; /* @@ -289,6 +295,8 @@ unsigned long flags; /* Status/Action flags */ } idefloppy_floppy_t; +#define IDEFLOPPY_TICKS_DELAY 3 /* default delay for ZIP 100 */ + /* * Floppy flag bits values. */ @@ -297,7 +305,7 @@ #define IDEFLOPPY_USE_READ12 2 /* Use READ12/WRITE12 or READ10/WRITE10 */ #define IDEFLOPPY_FORMAT_IN_PROGRESS 3 /* Format in progress */ #define IDEFLOPPY_CLIK_DRIVE 4 /* Avoid commands not supported in Clik drive */ - +#define IDEFLOPPY_ZIP_DRIVE 5 /* Requires BH algorithm for packets */ /* * ATAPI floppy drive packet commands @@ -1001,6 +1009,11 @@ return ide_started; } +/* + * This is the original routine that did the packet transfer. + * It fails at high speeds on the Iomega ZIP drive, so there's a slower version + * for that drive below. The algorithm is chosen based on drive type + */ static ide_startstop_t idefloppy_transfer_pc (ide_drive_t *drive) { ide_startstop_t startstop; @@ -1021,6 +1034,56 @@ return ide_started; } + +/* + * What we have here is a classic case of a top half / bottom half + * interrupt service routine. In interrupt mode, the device sends + * an interrupt to signal it's ready to receive a packet. However, + * we need to delay about 2-3 ticks before issuing the packet or we + * gets in trouble. + * + * So, follow carefully. transfer_pc1 is called as an interrupt (or + * directly). In either case, when the device says it's ready for a + * packet, we schedule the packet transfer to occur about 2-3 ticks + * later in transfer_pc2. + */ +static int idefloppy_transfer_pc2 (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + atapi_output_bytes (drive, floppy->pc->c, 12); /* Send the actual packet */ + return IDEFLOPPY_WAIT_CMD; /* Timeout for the packet command */ +} + +static ide_startstop_t idefloppy_transfer_pc1 (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + ide_startstop_t startstop; + idefloppy_ireason_reg_t ireason; + + if (ide_wait_stat (&startstop,drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) { + printk (KERN_ERR "ide-floppy: Strange, packet command initiated yet DRQ isn't asserted\n"); + return startstop; + } + ireason.all=IN_BYTE (IDE_IREASON_REG); + if (!ireason.b.cod || ireason.b.io) { + printk (KERN_ERR "ide-floppy: (IO,CoD) != (0,1) while issuing a packet command\n"); + return ide_do_reset (drive); + } + /* + * The following delay solves a problem with ATAPI Zip 100 drives where the + * Busy flag was apparently being deasserted before the unit was ready to + * receive data. This was happening on a 1200 MHz Athlon system. 10/26/01 + * 25msec is too short, 40 and 50msec work well. idefloppy_pc_intr will + * not be actually used until after the packet is moved in about 50 msec. + */ + ide_set_handler (drive, + &idefloppy_pc_intr, /* service routine for packet command */ + floppy->ticks, /* wait this long before "failing" */ + &idefloppy_transfer_pc2); /* fail == transfer_pc2 */ + return ide_started; +} + /* * Issue a packet command */ @@ -1029,6 +1092,7 @@ idefloppy_floppy_t *floppy = drive->driver_data; idefloppy_bcount_reg_t bcount; int dma_ok = 0; + ide_handler_t *pkt_xfer_routine; #if IDEFLOPPY_DEBUG_BUGS if (floppy->pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD && pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD) { @@ -1088,13 +1152,20 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA */ + /* Can we transfer the packet when we get the interrupt or wait? */ + if (test_bit (IDEFLOPPY_ZIP_DRIVE, &floppy->flags)) { + pkt_xfer_routine = &idefloppy_transfer_pc1; /* wait */ + } else { + pkt_xfer_routine = &idefloppy_transfer_pc; /* immediate */ + } + if (test_bit (IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags)) { - ide_set_handler (drive, &idefloppy_transfer_pc, IDEFLOPPY_WAIT_CMD, NULL); + ide_set_handler (drive, pkt_xfer_routine, IDEFLOPPY_WAIT_CMD, NULL); OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* Issue the packet command */ return ide_started; } else { OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); - return idefloppy_transfer_pc (drive); + return (*pkt_xfer_routine) (drive); } } @@ -1914,13 +1985,18 @@ { int major = HWIF(drive)->major; int minor = drive->select.b.unit << PARTN_BITS; + idefloppy_floppy_t *floppy = drive->driver_data; +/* + * drive setting name read/write ioctl ioctl data type min max mul_factor div_factor data pointer set function + */ ide_add_setting(drive, "bios_cyl", SETTING_RW, -1, -1, TYPE_INT, 0, 1023, 1, 1, &drive->bios_cyl, NULL); ide_add_setting(drive, "bios_head", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); + ide_add_setting(drive, "ticks", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &floppy->ticks, NULL); } @@ -1954,20 +2030,12 @@ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) { + set_bit(IDEFLOPPY_ZIP_DRIVE, &floppy->flags); + /* This value will be visible in the /proc/ide/hdx/settings */ + floppy->ticks = IDEFLOPPY_TICKS_DELAY; for (i = 0; i < 1 << PARTN_BITS; i++) max_sectors[major][minor + i] = 64; } - /* - * Guess what? The IOMEGA Clik! drive also needs the - * above fix. It makes nasty clicking noises without - * it, so please don't remove this. - */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); - } /* * Guess what? The IOMEGA Clik! drive also needs the @@ -2019,10 +2087,7 @@ #endif /* CONFIG_PROC_FS */ -static int idefloppy_reinit (ide_drive_t *drive) -{ - return 0; -} +int idefloppy_reinit(ide_drive_t *drive); /* * IDE subdriver functions, registered with ide.c @@ -2035,6 +2100,8 @@ supports_dma: 1, supports_dsc_overlap: 0, cleanup: idefloppy_cleanup, + standby: NULL, + flushcache: NULL, do_request: idefloppy_do_request, end_request: idefloppy_end_request, ioctl: idefloppy_ioctl, @@ -2046,7 +2113,9 @@ capacity: idefloppy_capacity, special: NULL, proc: idefloppy_proc, - driver_reinit: idefloppy_reinit, + reinit: idefloppy_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, }; int idefloppy_init (void); @@ -2057,6 +2126,40 @@ NULL }; +int idefloppy_reinit (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy; + int failed = 0; + + MOD_INC_USE_COUNT; + while ((drive = ide_scan_devices (ide_floppy, idefloppy_driver.name, NULL, failed++)) != NULL) { + if (!idefloppy_identify_device (drive, drive->id)) { + printk (KERN_ERR "ide-floppy: %s: not supported by this version of ide-floppy\n", drive->name); + continue; + } + if (drive->scsi) { + printk("ide-floppy: passing drive %s to ide-scsi emulation.\n", drive->name); + continue; + } + if ((floppy = (idefloppy_floppy_t *) kmalloc (sizeof (idefloppy_floppy_t), GFP_KERNEL)) == NULL) { + printk (KERN_ERR "ide-floppy: %s: Can't allocate a floppy structure\n", drive->name); + continue; + } + if (ide_register_subdriver (drive, &idefloppy_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-floppy: %s: Failed to register the driver with ide.c\n", drive->name); + kfree (floppy); + continue; + } + DRIVER(drive)->busy++; + idefloppy_setup (drive, floppy); + DRIVER(drive)->busy--; + failed--; + } + ide_register_module(&idefloppy_module); + MOD_DEC_USE_COUNT; + return 0; +} + MODULE_DESCRIPTION("ATAPI FLOPPY Driver"); static void __exit idefloppy_exit (void) @@ -2108,7 +2211,9 @@ kfree (floppy); continue; } + DRIVER(drive)->busy++; idefloppy_setup (drive, floppy); + DRIVER(drive)->busy--; failed--; } ide_register_module(&idefloppy_module); diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-geometry.c linux-2.4.17-rc2-wli2/drivers/ide/ide-geometry.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-geometry.c Fri Nov 9 14:23:34 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-geometry.c Thu Dec 20 19:49:13 2001 @@ -6,6 +6,8 @@ #include #include +#ifdef CONFIG_BLK_DEV_IDE + /* * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc * controller that is BIOS compatible with ST-506, and thus showing up in our @@ -40,7 +42,11 @@ * Consequently, also the former "drive->present = 1" below was a mistake. * * Eventually the entire routine below should be removed. + * + * 17-OCT-2000 rjohnson@analogic.com Added spin-locks for reading CMOS + * chip. */ + void probe_cmos_for_drives (ide_hwif_t *hwif) { #ifdef __i386__ @@ -80,9 +86,10 @@ } #endif } +#endif /* CONFIG_BLK_DEV_IDE */ -#ifdef CONFIG_BLK_DEV_IDE +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) extern ide_drive_t * get_info_ptr(kdev_t); extern unsigned long current_capacity (ide_drive_t *); @@ -214,4 +221,4 @@ drive->bios_cyl, drive->bios_head, drive->bios_sect); return ret; } -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-pci.c linux-2.4.17-rc2-wli2/drivers/ide/ide-pci.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-pci.c Thu Oct 25 13:53:47 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-pci.c Thu Dec 20 19:49:13 2001 @@ -12,6 +12,13 @@ * configuration of all PCI IDE interfaces present in a system. */ +/* + * Chipsets that are on the IDE_IGNORE list because of problems of not being + * set at compile time. + * + * CONFIG_BLK_DEV_PDC202XX + */ + #include #include #include @@ -47,6 +54,8 @@ #define DEVID_PDC20267 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20267}) #define DEVID_PDC20268 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268}) #define DEVID_PDC20268R ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268R}) +#define DEVID_PDC20269 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20269}) +#define DEVID_PDC20275 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20275}) #define DEVID_RZ1000 ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000}) #define DEVID_RZ1001 ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001}) #define DEVID_SAMURAI ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE}) @@ -55,6 +64,7 @@ #define DEVID_CMD646 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_646}) #define DEVID_CMD648 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_648}) #define DEVID_CMD649 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_649}) +#define DEVID_CMD680 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_680}) #define DEVID_SIS5513 ((ide_pci_devid_t){PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513}) #define DEVID_OPTI621 ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621}) #define DEVID_OPTI621V ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558}) @@ -79,6 +89,7 @@ #define DEVID_AMD7401 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_COBRA_7401}) #define DEVID_AMD7409 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7409}) #define DEVID_AMD7411 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7411}) +#define DEVID_AMD7441 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7441}) #define DEVID_PDCADMA ((ide_pci_devid_t){PCI_VENDOR_ID_PDC, PCI_DEVICE_ID_PDC_1841}) #define DEVID_SLC90E66 ((ide_pci_devid_t){PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_1}) #define DEVID_OSB4 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE}) @@ -86,6 +97,7 @@ #define DEVID_ITE8172G ((ide_pci_devid_t){PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_IT8172G}) #define IDE_IGNORE ((void *)-1) +#define IDE_NO_DRIVER ((void *)-2) #ifdef CONFIG_BLK_DEV_AEC62XX extern unsigned int pci_init_aec62xx(struct pci_dev *, const char *); @@ -99,7 +111,7 @@ #else #define PCI_AEC62XX NULL #define ATA66_AEC62XX NULL -#define INIT_AEC62XX NULL +#define INIT_AEC62XX IDE_NO_DRIVER #define DMA_AEC62XX NULL #endif @@ -115,7 +127,7 @@ #else #define PCI_ALI15X3 NULL #define ATA66_ALI15X3 NULL -#define INIT_ALI15X3 NULL +#define INIT_ALI15X3 IDE_NO_DRIVER #define DMA_ALI15X3 NULL #endif @@ -131,7 +143,7 @@ #else #define PCI_AMD74XX NULL #define ATA66_AMD74XX NULL -#define INIT_AMD74XX NULL +#define INIT_AMD74XX IDE_NO_DRIVER #define DMA_AMD74XX NULL #endif @@ -149,7 +161,7 @@ #ifdef __sparc_v9__ #define INIT_CMD64X IDE_IGNORE #else -#define INIT_CMD64X NULL +#define INIT_CMD64X IDE_NO_DRIVER #endif #endif @@ -160,7 +172,7 @@ #define INIT_CY82C693 &ide_init_cy82c693 #else #define PCI_CY82C693 NULL -#define INIT_CY82C693 NULL +#define INIT_CY82C693 IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_CS5530 @@ -170,7 +182,7 @@ #define INIT_CS5530 &ide_init_cs5530 #else #define PCI_CS5530 NULL -#define INIT_CS5530 NULL +#define INIT_CS5530 IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_HPT34X @@ -199,7 +211,7 @@ static byte hpt363_shared_pin; #define PCI_HPT366 NULL #define ATA66_HPT366 NULL -#define INIT_HPT366 NULL +#define INIT_HPT366 IDE_NO_DRIVER #define DMA_HPT366 NULL #endif @@ -214,7 +226,7 @@ extern void ide_init_opti621(ide_hwif_t *); #define INIT_OPTI621 &ide_init_opti621 #else -#define INIT_OPTI621 NULL +#define INIT_OPTI621 IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_PDC_ADMA @@ -241,9 +253,9 @@ #define ATA66_PDC202XX &ata66_pdc202xx #define INIT_PDC202XX &ide_init_pdc202xx #else -#define PCI_PDC202XX NULL -#define ATA66_PDC202XX NULL -#define INIT_PDC202XX NULL +#define PCI_PDC202XX IDE_IGNORE +#define ATA66_PDC202XX IDE_IGNORE +#define INIT_PDC202XX IDE_IGNORE #endif #ifdef CONFIG_BLK_DEV_PIIX @@ -256,7 +268,7 @@ #else #define PCI_PIIX NULL #define ATA66_PIIX NULL -#define INIT_PIIX NULL +#define INIT_PIIX IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_IT8172 @@ -268,7 +280,7 @@ #else #define PCI_IT8172 NULL #define ATA66_IT8172 NULL -#define INIT_IT8172 NULL +#define INIT_IT8172 IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_RZ1000 @@ -290,7 +302,7 @@ #else #define PCI_SVWKS NULL #define ATA66_SVWKS NULL -#define INIT_SVWKS NULL +#define INIT_SVWKS IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_SIS5513 @@ -303,7 +315,7 @@ #else #define PCI_SIS5513 NULL #define ATA66_SIS5513 NULL -#define INIT_SIS5513 NULL +#define INIT_SIS5513 IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_SLC90E66 @@ -316,7 +328,7 @@ #else #define PCI_SLC90E66 NULL #define ATA66_SLC90E66 NULL -#define INIT_SLC90E66 NULL +#define INIT_SLC90E66 IDE_NO_DRIVER #endif #ifdef CONFIG_BLK_DEV_SL82C105 @@ -351,7 +363,7 @@ #else #define PCI_VIA82CXXX NULL #define ATA66_VIA82CXXX NULL -#define INIT_VIA82CXXX NULL +#define INIT_VIA82CXXX IDE_NO_DRIVER #define DMA_VIA82CXXX NULL #endif @@ -393,7 +405,7 @@ #ifdef CONFIG_PDC202XX_FORCE {DEVID_PDC20246,"PDC20246", PCI_PDC202XX, NULL, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 16 }, {DEVID_PDC20262,"PDC20262", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 }, - {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 }, + {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 48 }, {DEVID_PDC20267,"PDC20267", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 }, #else /* !CONFIG_PDC202XX_FORCE */ {DEVID_PDC20246,"PDC20246", PCI_PDC202XX, NULL, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 16 }, @@ -401,11 +413,13 @@ {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 }, {DEVID_PDC20267,"PDC20267", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 }, #endif - {DEVID_PDC20268,"PDC20268", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 16 }, + {DEVID_PDC20268,"PDC20268", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, /* Promise used a different PCI ident for the raid card apparently to try and prevent Linux detecting it and using our own raid code. We want to detect it for the ataraid drivers, so we have to list both here.. */ - {DEVID_PDC20268R,"PDC20268", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 16 }, + {DEVID_PDC20268R,"PDC20270", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_PDC20269,"PDC20269", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_PDC20275,"PDC20275", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, {DEVID_RZ1000, "RZ1000", NULL, NULL, INIT_RZ1000, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_RZ1001, "RZ1001", NULL, NULL, INIT_RZ1000, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_SAMURAI, "SAMURAI", NULL, NULL, INIT_SAMURAI, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, @@ -416,6 +430,11 @@ {DEVID_CMD646, "CMD646", PCI_CMD64X, NULL, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x51,0x80,0x80}}, ON_BOARD, 0 }, {DEVID_CMD648, "CMD648", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_CMD649, "CMD649", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, +#ifndef CONFIG_BLK_DEV_CMD680 + {DEVID_CMD680, "CMD680", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, +#else /* CONFIG_BLK_DEV_CMD680 */ + {DEVID_CMD680, "CMD680", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, +#endif /* !CONFIG_BLK_DEV_CMD680 */ {DEVID_HT6565, "HT6565", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_OPTI621, "OPTI621", NULL, NULL, INIT_OPTI621, NULL, {{0x45,0x80,0x00}, {0x40,0x08,0x00}}, ON_BOARD, 0 }, {DEVID_OPTI621X,"OPTI621X", NULL, NULL, INIT_OPTI621, NULL, {{0x45,0x80,0x00}, {0x40,0x08,0x00}}, ON_BOARD, 0 }, @@ -434,9 +453,10 @@ {DEVID_CY82C693,"CY82C693", PCI_CY82C693, NULL, INIT_CY82C693, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_HINT, "HINT_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_CS5530, "CS5530", PCI_CS5530, NULL, INIT_CS5530, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, - {DEVID_AMD7401, "AMD7401", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_AMD7401, "AMD7401", NULL, NULL, NULL, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_AMD7409, "AMD7409", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_AMD7411, "AMD7411", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_AMD7441, "AMD7441", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_PDCADMA, "PDCADMA", PCI_PDCADMA, ATA66_PDCADMA, INIT_PDCADMA, DMA_PDCADMA, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, {DEVID_SLC90E66,"SLC90E66", PCI_SLC90E66, ATA66_SLC90E66, INIT_SLC90E66, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, {DEVID_OSB4, "ServerWorks OSB4", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, @@ -458,6 +478,9 @@ case PCI_DEVICE_ID_PROMISE_20265: case PCI_DEVICE_ID_PROMISE_20267: case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20268R: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20275: case PCI_DEVICE_ID_ARTOP_ATP850UF: case PCI_DEVICE_ID_ARTOP_ATP860: case PCI_DEVICE_ID_ARTOP_ATP860R: @@ -592,7 +615,15 @@ autodma = 1; #endif - pci_enable_device(dev); + if (d->init_hwif == IDE_NO_DRIVER) { + printk(KERN_WARNING "%s: detected chipset, but driver not compiled in!\n", d->name); + d->init_hwif = NULL; + } + + if (pci_enable_device(dev)) { + printk(KERN_WARNING "%s: (ide_setup_pci_device:) Could not enable device.\n", d->name); + return; + } check_if_enabled: if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) { @@ -752,7 +783,8 @@ } if (IDE_PCI_DEVID_EQ(d->devid, DEVID_MPIIX)) goto bypass_piix_dma; - + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDCADMA)) + goto bypass_legacy_dma; if (hwif->udma_four) { printk("%s: ATA-66/100 forced bit set (WARNING)!!\n", d->name); } else { @@ -769,12 +801,15 @@ autodma = 0; if (autodma) hwif->autodma = 1; + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20246) || IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262) || IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265) || IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20267) || IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268) || IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268R) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20269) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20275) || IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210) || IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) || IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260R) || @@ -785,6 +820,7 @@ IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD646) || IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD648) || IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD649) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD680) || IDE_PCI_DEVID_EQ(d->devid, DEVID_OSB4) || ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 0x80))) { unsigned long dma_base = ide_get_or_set_dma_base(hwif, (!mate && d->extra) ? d->extra : 0, d->name); @@ -811,6 +847,7 @@ } } #endif /* CONFIG_BLK_DEV_IDEDMA */ +bypass_legacy_dma: bypass_piix_dma: bypass_umc_dma: if (d->init_hwif) /* Call chipset-specific routine for each enabled hwif */ @@ -822,6 +859,44 @@ printk("%s: neither IDE port enabled (BIOS)\n", d->name); } +static void __init pdc20270_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d) +{ + struct pci_dev *dev2 = NULL, *findev; + ide_pci_device_t *d2; + + if ((dev->bus->self && + dev->bus->self->vendor == PCI_VENDOR_ID_DEC) && + (dev->bus->self->device == PCI_DEVICE_ID_DEC_21150)) { + if (PCI_SLOT(dev->devfn) & 2) { + return; + } + d->extra = 0; + pci_for_each_dev(findev) { + if ((findev->vendor == dev->vendor) && + (findev->device == dev->device) && + (PCI_SLOT(findev->devfn) & 2)) { + byte irq = 0, irq2 = 0; + dev2 = findev; + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); + pci_read_config_byte(dev2, PCI_INTERRUPT_LINE, &irq2); + if (irq != irq2) { + dev2->irq = dev->irq; + pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, irq); + } + + } + } + } + + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn); + ide_setup_pci_device(dev, d); + if (!dev2) + return; + d2 = d; + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn); + ide_setup_pci_device(dev2, d2); +} + static void __init hpt366_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d) { struct pci_dev *dev2 = NULL, *findev; @@ -902,6 +977,8 @@ return; /* UM8886A/BF pair */ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366)) hpt366_device_order_fixup(dev, d); + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268R)) + pdc20270_device_order_fixup(dev, d); else if (!IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL) || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE) { if (IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL)) printk("%s: unknown IDE controller on PCI bus %02x device %02x, VID=%04x, DID=%04x\n", diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-probe.c linux-2.4.17-rc2-wli2/drivers/ide/ide-probe.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-probe.c Mon Nov 26 05:29:17 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-probe.c Thu Dec 20 19:49:13 2001 @@ -58,6 +58,11 @@ struct hd_driveid *id; id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC); /* called with interrupts disabled! */ + if (!id) { + printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n"); + goto err_kmalloc; + } + ide_input_data(drive, id, SECTOR_WORDS); /* read 512 bytes of id info */ ide__sti(); /* local CPU only */ ide_fix_driveid(id); @@ -76,8 +81,7 @@ if ((id->model[0] == 'P' && id->model[1] == 'M') || (id->model[0] == 'S' && id->model[1] == 'K')) { printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model); - drive->present = 0; - return; + goto err_misc; } #endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */ @@ -96,7 +100,7 @@ ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap); if (strstr(id->model, "E X A B Y T E N E S T")) - return; + goto err_misc; id->model[sizeof(id->model)-1] = '\0'; /* we depend on this a lot! */ printk("%s: %s, ", drive->name, id->model); @@ -111,8 +115,7 @@ #ifdef CONFIG_BLK_DEV_PDC4030 if (HWIF(drive)->channel == 1 && HWIF(drive)->chipset == ide_pdc4030) { printk(" -- not supported on 2nd Promise port\n"); - drive->present = 0; - return; + goto err_misc; } #endif /* CONFIG_BLK_DEV_PDC4030 */ switch (type) { @@ -174,6 +177,12 @@ printk("ATA DISK drive\n"); QUIRK_LIST(HWIF(drive),drive); return; + +err_misc: + kfree(id); +err_kmalloc: + drive->present = 0; + return; } /* @@ -597,6 +606,12 @@ q->queuedata = HWGROUP(drive); blk_init_queue(q, do_ide_request); + + if (drive->media == ide_disk) { +#ifdef CONFIG_BLK_DEV_ELEVATOR_NOOP + elevator_init(&q->elevator, ELEVATOR_NOOP); +#endif + } } /* @@ -685,6 +700,10 @@ #else /* !CONFIG_IDEPCI_SHARE_IRQ */ int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_INTERRUPT|SA_SHIRQ : SA_INTERRUPT; #endif /* CONFIG_IDEPCI_SHARE_IRQ */ + + if (hwif->io_ports[IDE_CONTROL_OFFSET]) + OUT_BYTE(0x08, hwif->io_ports[IDE_CONTROL_OFFSET]); /* clear nIEN */ + if (ide_request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwgroup)) { if (!match) kfree(hwgroup); @@ -752,18 +771,35 @@ int *bs, *max_sect, *max_ra; extern devfs_handle_t ide_devfs_handle; +#if 1 + units = MAX_DRIVES; +#else /* figure out maximum drive number on the interface */ for (units = MAX_DRIVES; units > 0; --units) { if (hwif->drives[units-1].present) break; } +#endif + minors = units * (1<sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + if (!gd->sizes) + goto err_kmalloc_gd_sizes; gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + if (!gd->part) + goto err_kmalloc_gd_part; bs = kmalloc (minors*sizeof(int), GFP_KERNEL); + if (!bs) + goto err_kmalloc_bs; max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); + if (!max_sect) + goto err_kmalloc_max_sect; max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); + if (!max_ra) + goto err_kmalloc_max_ra; memset(gd->part, 0, minors * sizeof(struct hd_struct)); @@ -773,12 +809,10 @@ max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif + /* + * IDE can do up to 128K per request == 256 + */ + *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 128); *max_ra++ = vm_max_readahead; } @@ -804,6 +838,17 @@ add_gendisk(gd); for (unit = 0; unit < units; ++unit) { +#if 1 + char name[64]; + ide_add_generic_settings(hwif->drives + unit); + hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit); + sprintf (name, "host%d/bus%d/target%d/lun%d", + (hwif->channel && hwif->mate) ? + hwif->mate->index : hwif->index, + hwif->channel, unit, hwif->drives[unit].lun); + if (hwif->drives[unit].present) + hwif->drives[unit].de = devfs_mk_dir(ide_devfs_handle, name, NULL); +#else if (hwif->drives[unit].present) { char name[64]; @@ -815,7 +860,23 @@ hwif->drives[unit].de = devfs_mk_dir (ide_devfs_handle, name, NULL); } +#endif } + return; + +err_kmalloc_max_ra: + kfree(max_sect); +err_kmalloc_max_sect: + kfree(bs); +err_kmalloc_bs: + kfree(gd->part); +err_kmalloc_gd_part: + kfree(gd->sizes); +err_kmalloc_gd_sizes: + kfree(gd); +err_kmalloc_gd: + printk(KERN_WARNING "(ide::init_gendisk) Out of memory\n"); + return; } static int hwif_init (ide_hwif_t *hwif) @@ -880,6 +941,19 @@ return hwif->present; } +void export_ide_init_queue (ide_drive_t *drive) +{ + ide_init_queue(drive); +} + +byte export_probe_for_drive (ide_drive_t *drive) +{ + return probe_for_drive(drive); +} + +EXPORT_SYMBOL(export_ide_init_queue); +EXPORT_SYMBOL(export_probe_for_drive); + int ideprobe_init (void); static ide_module_t ideprobe_module = { IDE_PROBE_MODULE, @@ -913,6 +987,8 @@ } #ifdef MODULE +extern int (*ide_xlate_1024_hook)(kdev_t, int, int, const char *); + int init_module (void) { unsigned int index; @@ -921,12 +997,14 @@ ide_unregister(index); ideprobe_init(); create_proc_ide_interfaces(); + ide_xlate_1024_hook = ide_xlate_1024; return 0; } void cleanup_module (void) { ide_probe = NULL; + ide_xlate_1024_hook = 0; } MODULE_LICENSE("GPL"); #endif /* MODULE */ diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-proc.c linux-2.4.17-rc2-wli2/drivers/ide/ide-proc.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-proc.c Fri Sep 7 09:28:38 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-proc.c Thu Dec 20 19:49:13 2001 @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -447,7 +448,15 @@ static int proc_ide_get_identify(ide_drive_t *drive, byte *buf) { - return ide_wait_cmd(drive, (drive->media == ide_disk) ? WIN_IDENTIFY : WIN_PIDENTIFY, 0, 0, 1, buf); + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + taskfile.sector_count = 0x01; + taskfile.command = (drive->media == ide_disk) ? WIN_IDENTIFY : WIN_PIDENTIFY ; + + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); } static int proc_ide_read_identify @@ -457,7 +466,7 @@ int len = 0, i = 0; if (drive && !proc_ide_get_identify(drive, page)) { - unsigned short *val = ((unsigned short *)page) + 2; + unsigned short *val = (unsigned short *) page; char *out = ((char *)val) + (SECTOR_WORDS * 4); page = out; do { @@ -588,7 +597,7 @@ if (!driver) len = sprintf(page, "(none)\n"); else - len = sprintf(page,"%li\n", ((ide_driver_t *)drive->driver)->capacity(drive)); + len = sprintf(page,"%llu\n", (__u64) ((ide_driver_t *)drive->driver)->capacity(drive)); PROC_IDE_READ_RETURN(page,start,off,count,eof,len); } @@ -732,22 +741,57 @@ } } -void destroy_proc_ide_drives(ide_hwif_t *hwif) +void recreate_proc_ide_device(ide_hwif_t *hwif, ide_drive_t *drive) { - int d; + struct proc_dir_entry *ent; + struct proc_dir_entry *parent = hwif->proc; + char name[64]; +// ide_driver_t *driver = drive->driver; - for (d = 0; d < MAX_DRIVES; d++) { - ide_drive_t *drive = &hwif->drives[d]; - ide_driver_t *driver = drive->driver; + if (drive->present && !drive->proc) { + drive->proc = proc_mkdir(drive->name, parent); + if (drive->proc) + ide_add_proc_entries(drive->proc, generic_drive_entries, drive); - if (!drive->proc) - continue; +/* + * assume that we have these already, however, should test FIXME! + * if (driver) { + * ide_add_proc_entries(drive->proc, generic_subdriver_entries, drive); + * ide_add_proc_entries(drive->proc, driver->proc, drive); + * } + * + */ + sprintf(name,"ide%d/%s", (drive->name[2]-'a')/2, drive->name); + ent = proc_symlink(drive->name, proc_ide_root, name); + if (!ent) + return; + } +} + +void destroy_proc_ide_device(ide_hwif_t *hwif, ide_drive_t *drive) +{ + ide_driver_t *driver = drive->driver; + + if (drive->proc) { if (driver) ide_remove_proc_entries(drive->proc, driver->proc); ide_remove_proc_entries(drive->proc, generic_drive_entries); remove_proc_entry(drive->name, proc_ide_root); remove_proc_entry(drive->name, hwif->proc); drive->proc = NULL; + } +} + +void destroy_proc_ide_drives(ide_hwif_t *hwif) +{ + int d; + + for (d = 0; d < MAX_DRIVES; d++) { + ide_drive_t *drive = &hwif->drives[d]; +// ide_driver_t *driver = drive->driver; + + if (drive->proc) + destroy_proc_ide_device(hwif, drive); } } diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-tape.c linux-2.4.17-rc2-wli2/drivers/ide/ide-tape.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-tape.c Tue Dec 18 21:21:42 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-tape.c Thu Dec 20 19:49:13 2001 @@ -6132,10 +6132,7 @@ #endif -static int idetape_reinit (ide_drive_t *drive) -{ - return 0; -} +int idetape_reinit(ide_drive_t *drive); /* * IDE subdriver functions, registered with ide.c @@ -6148,6 +6145,8 @@ supports_dma: 1, supports_dsc_overlap: 1, cleanup: idetape_cleanup, + standby: NULL, + flushcache: NULL, do_request: idetape_do_request, end_request: idetape_end_request, ioctl: idetape_blkdev_ioctl, @@ -6158,7 +6157,9 @@ pre_reset: idetape_pre_reset, capacity: NULL, proc: idetape_proc, - driver_reinit: idetape_reinit, + reinit: idetape_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, }; int idetape_init (void); @@ -6181,6 +6182,92 @@ release: idetape_chrdev_release, }; +int idetape_reinit (ide_drive_t *drive) +{ +#if 0 + idetape_tape_t *tape; + int minor, failed = 0, supported = 0; +/* DRIVER(drive)->busy++; */ + MOD_INC_USE_COUNT; +#if ONSTREAM_DEBUG + printk(KERN_INFO "ide-tape: MOD_INC_USE_COUNT in idetape_init\n"); +#endif + if (!idetape_chrdev_present) + for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++ ) + idetape_chrdevs[minor].drive = NULL; + + if ((drive = ide_scan_devices (ide_tape, idetape_driver.name, NULL, failed++)) == NULL) { + ide_register_module (&idetape_module); + MOD_DEC_USE_COUNT; +#if ONSTREAM_DEBUG + printk(KERN_INFO "ide-tape: MOD_DEC_USE_COUNT in idetape_init\n"); +#endif + return 0; + } + if (!idetape_chrdev_present && + devfs_register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) { + printk (KERN_ERR "ide-tape: Failed to register character device interface\n"); + MOD_DEC_USE_COUNT; +#if ONSTREAM_DEBUG + printk(KERN_INFO "ide-tape: MOD_DEC_USE_COUNT in idetape_init\n"); +#endif + return -EBUSY; + } + do { + if (!idetape_identify_device (drive, drive->id)) { + printk (KERN_ERR "ide-tape: %s: not supported by this version of ide-tape\n", drive->name); + continue; + } + if (drive->scsi) { + if (strstr(drive->id->model, "OnStream DI-30")) { + printk("ide-tape: ide-scsi emulation is not supported for %s.\n", drive->id->model); + } else { + printk("ide-tape: passing drive %s to ide-scsi emulation.\n", drive->name); + continue; + } + } + tape = (idetape_tape_t *) kmalloc (sizeof (idetape_tape_t), GFP_KERNEL); + if (tape == NULL) { + printk (KERN_ERR "ide-tape: %s: Can't allocate a tape structure\n", drive->name); + continue; + } + if (ide_register_subdriver (drive, &idetape_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-tape: %s: Failed to register the driver with ide.c\n", drive->name); + kfree (tape); + continue; + } + for (minor = 0; idetape_chrdevs[minor].drive != NULL; minor++); + idetape_setup (drive, tape, minor); + idetape_chrdevs[minor].drive = drive; + tape->de_r = + devfs_register (drive->de, "mt", DEVFS_FL_DEFAULT, + HWIF(drive)->major, minor, + S_IFCHR | S_IRUGO | S_IWUGO, + &idetape_fops, NULL); + tape->de_n = + devfs_register (drive->de, "mtn", DEVFS_FL_DEFAULT, + HWIF(drive)->major, minor + 128, + S_IFCHR | S_IRUGO | S_IWUGO, + &idetape_fops, NULL); + devfs_register_tape (tape->de_r); + supported++; failed--; + } while ((drive = ide_scan_devices (ide_tape, idetape_driver.name, NULL, failed++)) != NULL); + if (!idetape_chrdev_present && !supported) { + devfs_unregister_chrdev (IDETAPE_MAJOR, "ht"); + } else + idetape_chrdev_present = 1; + ide_register_module (&idetape_module); + MOD_DEC_USE_COUNT; +#if ONSTREAM_DEBUG + printk(KERN_INFO "ide-tape: MOD_DEC_USE_COUNT in idetape_init\n"); +#endif + + return 0; +#else + return 1; +#endif +} + MODULE_DESCRIPTION("ATAPI Streaming TAPE Driver"); MODULE_LICENSE("GPL"); @@ -6205,7 +6292,7 @@ ide_drive_t *drive; idetape_tape_t *tape; int minor, failed = 0, supported = 0; - +/* DRIVER(drive)->busy++; */ MOD_INC_USE_COUNT; #if ONSTREAM_DEBUG printk(KERN_INFO "ide-tape: MOD_INC_USE_COUNT in idetape_init\n"); diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide-taskfile.c linux-2.4.17-rc2-wli2/drivers/ide/ide-taskfile.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide-taskfile.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide-taskfile.c Thu Dec 20 19:49:13 2001 @@ -0,0 +1,1723 @@ +/* + * linux/drivers/ide/ide-taskfile.c Version 0.20 Oct 11, 2000 + * + * Copyright (C) 2000 Michael Cornwell + * Copyright (C) 2000 Andre Hedrick + * + * May be copied or modified under the terms of the GNU General Public License + * + * IDE_DEBUG(__LINE__); + */ + +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_IDE_TASKFILE_IO +# define __TASKFILE__IO +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#define DEBUG_TASKFILE 0 /* unset when fixed */ + +#if DEBUG_TASKFILE +#define DTF(x...) printk(##x) +#else +#define DTF(x...) +#endif + +inline u32 task_read_24 (ide_drive_t *drive) +{ + return (IN_BYTE(IDE_HCYL_REG)<<16) | + (IN_BYTE(IDE_LCYL_REG)<<8) | + IN_BYTE(IDE_SECTOR_REG); +} + +static void ata_bswap_data (void *buffer, int wcount) +{ + u16 *p = buffer; + + while (wcount--) { + *p = *p << 8 | *p >> 8; p++; + *p = *p << 8 | *p >> 8; p++; + } +} + +#if SUPPORT_VLB_SYNC +/* + * Some localbus EIDE interfaces require a special access sequence + * when using 32-bit I/O instructions to transfer data. We call this + * the "vlb_sync" sequence, which consists of three successive reads + * of the sector count register location, with interrupts disabled + * to ensure that the reads all happen together. + */ +static inline void task_vlb_sync (ide_ioreg_t port) { + (void) inb (port); + (void) inb (port); + (void) inb (port); +} +#endif /* SUPPORT_VLB_SYNC */ + +/* + * This is used for most PIO data transfers *from* the IDE interface + */ +void ata_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + byte io_32bit = drive->io_32bit; + + if (io_32bit) { +#if SUPPORT_VLB_SYNC + if (io_32bit & 2) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + task_vlb_sync(IDE_NSECTOR_REG); + insl(IDE_DATA_REG, buffer, wcount); + __restore_flags(flags); /* local CPU only */ + } else +#endif /* SUPPORT_VLB_SYNC */ + insl(IDE_DATA_REG, buffer, wcount); + } else { +#if SUPPORT_SLOW_DATA_PORTS + if (drive->slow) { + unsigned short *ptr = (unsigned short *) buffer; + while (wcount--) { + *ptr++ = inw_p(IDE_DATA_REG); + *ptr++ = inw_p(IDE_DATA_REG); + } + } else +#endif /* SUPPORT_SLOW_DATA_PORTS */ + insw(IDE_DATA_REG, buffer, wcount<<1); + } +} + +/* + * This is used for most PIO data transfers *to* the IDE interface + */ +void ata_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + byte io_32bit = drive->io_32bit; + + if (io_32bit) { +#if SUPPORT_VLB_SYNC + if (io_32bit & 2) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + task_vlb_sync(IDE_NSECTOR_REG); + outsl(IDE_DATA_REG, buffer, wcount); + __restore_flags(flags); /* local CPU only */ + } else +#endif /* SUPPORT_VLB_SYNC */ + outsl(IDE_DATA_REG, buffer, wcount); + } else { +#if SUPPORT_SLOW_DATA_PORTS + if (drive->slow) { + unsigned short *ptr = (unsigned short *) buffer; + while (wcount--) { + outw_p(*ptr++, IDE_DATA_REG); + outw_p(*ptr++, IDE_DATA_REG); + } + } else +#endif /* SUPPORT_SLOW_DATA_PORTS */ + outsw(IDE_DATA_REG, buffer, wcount<<1); + } +} + + +static inline void taskfile_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + ata_input_data(drive, buffer, wcount); + if (drive->bswap) + ata_bswap_data(buffer, wcount); +} + +static inline void taskfile_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + if (drive->bswap) { + ata_bswap_data(buffer, wcount); + ata_output_data(drive, buffer, wcount); + ata_bswap_data(buffer, wcount); + } else { + ata_output_data(drive, buffer, wcount); + } +} + +ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) +{ + task_struct_t *taskfile = (task_struct_t *) task->tfRegister; + hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister; + struct hd_driveid *id = drive->id; + byte HIHI = (drive->addressing) ? 0xE0 : 0xEF; + + /* (ks/hs): Moved to start, do not use for multiple out commands */ + if (task->handler != task_mulout_intr) { + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive), drive, 0); + } + + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(hobfile->feature, IDE_FEATURE_REG); + OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG); + OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG); + OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG); + } + + OUT_BYTE(taskfile->feature, IDE_FEATURE_REG); + OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG); + /* refers to number of sectors to transfer */ + OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG); + /* refers to sector offset or start sector */ + OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG); + + OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG); + if (task->handler != NULL) { +#if 0 + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + /* + * warning check for race between handler and prehandler for + * writing first block of data. however since we are well + * inside the boundaries of the seek, we should be okay. + */ + if (task->prehandler != NULL) { + return task->prehandler(drive, task->rq); + } +#else + ide_startstop_t startstop; + + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", + drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + /* (ks/hs): Fixed Multi Write */ + if ((taskfile->command != WIN_MULTWRITE) && + (taskfile->command != WIN_MULTWRITE_EXT)) { + struct request *rq = HWGROUP(drive)->rq; + /* For Write_sectors we need to stuff the first sector */ + taskfile_output_data(drive, rq->buffer, SECTOR_WORDS); + rq->current_nr_sectors--; + } else { + /* Stuff first sector(s) by implicitly calling the handler */ + if (!(drive_is_ready(drive))) { + /* FIXME: Replace hard-coded 100, error handling? */ + int i; + for (i=0; i<100; i++) { + if (drive_is_ready(drive)) + break; + } + } + return task->handler(drive); + } +#endif + } else { + /* for dma commands we down set the handler */ + if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive))); + } + + return ide_started; +} + +void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler) +{ + struct hd_driveid *id = drive->id; + byte HIHI = (drive->addressing) ? 0xE0 : 0xEF; + + /* (ks/hs): Moved to start, do not use for multiple out commands */ + if (*handler != task_mulout_intr) { + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive), drive, 0); + } + + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(hobfile->feature, IDE_FEATURE_REG); + OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG); + OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG); + OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG); + } + + OUT_BYTE(taskfile->feature, IDE_FEATURE_REG); + OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG); + /* refers to number of sectors to transfer */ + OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG); + /* refers to sector offset or start sector */ + OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG); + + OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG); + if (handler != NULL) { + ide_set_handler (drive, handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + } else { + /* for dma commands we down set the handler */ + if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive))); + } +} + +#if 0 +ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task) +{ + task_struct_t *taskfile = (task_struct_t *) task->tfRegister; + hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister; + struct hd_driveid *id = drive->id; + + /* + * (KS) Check taskfile in/out flags. + * If set, then execute as it is defined. + * If not set, then define default settings. + * The default values are: + * write and read all taskfile registers (except data) + * write and read the hob registers (sector,nsector,lcyl,hcyl) + */ + if (task->tf_out_flags.all == 0) { + task->tf_out_flags.all = IDE_TASKFILE_STD_OUT_FLAGS; + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + task->tf_out_flags.all != (IDE_HOB_STD_OUT_FLAGS << 8); + } + } + + if (task->tf_in_flags.all == 0) { + task->tf_in_flags.all = IDE_TASKFILE_STD_IN_FLAGS; + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + task->tf_in_flags.all != (IDE_HOB_STD_IN_FLAGS << 8); + } + } + + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive), drive, 0); + + if (task->tf_out_flags.b.data) { + unsigned short data = taskfile->data + (hobfile->data << 8); + OUT_WORD (data, IDE_DATA_REG); + } + + /* (KS) send hob registers first */ + if (task->tf_out_flags.b.nsector_hob) + OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG); + if (task->tf_out_flags.b.sector_hob) + OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG); + if (task->tf_out_flags.b.lcyl_hob) + OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG); + if (task->tf_out_flags.b.hcyl_hob) + OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG); + + + /* (KS) Send now the standard registers */ + if (task->tf_out_flags.b.error_feature) + OUT_BYTE(taskfile->feature, IDE_FEATURE_REG); + /* refers to number of sectors to transfer */ + if (task->tf_out_flags.b.nsector) + OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG); + /* refers to sector offset or start sector */ + if (task->tf_out_flags.b.sector) + OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG); + if (task->tf_out_flags.b.lcyl) + OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG); + if (task->tf_out_flags.b.hcyl) + OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG); + + /* + * (KS) Do not modify the specified taskfile. We want to have a + * universal pass through, so we must execute ALL specified values. + * + * (KS) The drive head register is mandatory. + * Don't care about the out flags ! + */ + OUT_BYTE(taskfile->device_head | drive->select.all, IDE_SELECT_REG); + if (task->handler != NULL) { +#if 0 + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + /* + * warning check for race between handler and prehandler for + * writing first block of data. however since we are well + * inside the boundaries of the seek, we should be okay. + */ + if (task->prehandler != NULL) { + return task->prehandler(drive, task->rq); + } +#else + ide_startstop_t startstop; + + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + + /* + * (KS) The drive command register is also mandatory. + * Don't care about the out flags ! + */ + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", + drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + /* (ks/hs): Fixed Multi Write */ + if ((taskfile->command != WIN_MULTWRITE) && + (taskfile->command != WIN_MULTWRITE_EXT)) { + struct request *rq = HWGROUP(drive)->rq; + /* For Write_sectors we need to stuff the first sector */ + taskfile_output_data(drive, rq->buffer, SECTOR_WORDS); + rq->current_nr_sectors--; + } else { + /* Stuff first sector(s) by implicitly calling the handler */ + if (!(drive_is_ready(drive))) { + /* FIXME: Replace hard-coded 100, error handling? */ + int i; + for (i=0; i<100; i++) { + if (drive_is_ready(drive)) + break; + } + } + return task->handler(drive); + } +#endif + } else { + /* for dma commands we down set the handler */ + if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive))); + } + + return ide_started; +} +#endif + +#if 0 +/* + * Error reporting, in human readable form (luxurious, but a memory hog). + */ +byte taskfile_dump_status (ide_drive_t *drive, const char *msg, byte stat) +{ + unsigned long flags; + byte err = 0; + + __save_flags (flags); /* local CPU only */ + ide__sti(); /* local CPU only */ + printk("%s: %s: status=0x%02x", drive->name, msg, stat); +#if FANCY_STATUS_DUMPS + printk(" { "); + if (stat & BUSY_STAT) + printk("Busy "); + else { + if (stat & READY_STAT) printk("DriveReady "); + if (stat & WRERR_STAT) printk("DeviceFault "); + if (stat & SEEK_STAT) printk("SeekComplete "); + if (stat & DRQ_STAT) printk("DataRequest "); + if (stat & ECC_STAT) printk("CorrectedError "); + if (stat & INDEX_STAT) printk("Index "); + if (stat & ERR_STAT) printk("Error "); + } + printk("}"); +#endif /* FANCY_STATUS_DUMPS */ + printk("\n"); + if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) { + err = GET_ERR(); + printk("%s: %s: error=0x%02x", drive->name, msg, err); +#if FANCY_STATUS_DUMPS + if (drive->media == ide_disk) { + printk(" { "); + if (err & ABRT_ERR) printk("DriveStatusError "); + if (err & ICRC_ERR) printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector "); + if (err & ECC_ERR) printk("UncorrectableError "); + if (err & ID_ERR) printk("SectorIdNotFound "); + if (err & TRK0_ERR) printk("TrackZeroNotFound "); + if (err & MARK_ERR) printk("AddrMarkNotFound "); + printk("}"); + if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) { + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + __u64 sectors = 0; + u32 low = 0, high = 0; + low = task_read_24(drive); + OUT_BYTE(0x80, IDE_CONTROL_REG); + high = task_read_24(drive); + sectors = ((__u64)high << 24) | low; + printk(", LBAsect=%lld", sectors); + } else { + byte cur = IN_BYTE(IDE_SELECT_REG); + if (cur & 0x40) { /* using LBA? */ + printk(", LBAsect=%ld", (unsigned long) + ((cur&0xf)<<24) + |(IN_BYTE(IDE_HCYL_REG)<<16) + |(IN_BYTE(IDE_LCYL_REG)<<8) + | IN_BYTE(IDE_SECTOR_REG)); + } else { + printk(", CHS=%d/%d/%d", + (IN_BYTE(IDE_HCYL_REG)<<8) + + IN_BYTE(IDE_LCYL_REG), + cur & 0xf, + IN_BYTE(IDE_SECTOR_REG)); + } + } + if (HWGROUP(drive)->rq) + printk(", sector=%llu", (__u64) HWGROUP(drive)->rq->sector); + } + } +#endif /* FANCY_STATUS_DUMPS */ + printk("\n"); + } + __restore_flags (flags); /* local CPU only */ + return err; +} + +/* + * Clean up after success/failure of an explicit taskfile operation. + */ +void ide_end_taskfile (ide_drive_t *drive, byte stat, byte err) +{ + unsigned long flags; + struct request *rq; + ide_task_t *args; + task_ioreg_t command; + + spin_lock_irqsave(&io_request_lock, flags); + rq = HWGROUP(drive)->rq; + spin_unlock_irqrestore(&io_request_lock, flags); + args = (ide_task_t *) rq->special; + + command = args->tfRegister[IDE_COMMAND_OFFSET]; + + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + + args->tfRegister[IDE_ERROR_OFFSET] = err; + args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG); + args->tfRegister[IDE_SECTOR_OFFSET] = IN_BYTE(IDE_SECTOR_REG); + args->tfRegister[IDE_LCYL_OFFSET] = IN_BYTE(IDE_LCYL_REG); + args->tfRegister[IDE_HCYL_OFFSET] = IN_BYTE(IDE_HCYL_REG); + args->tfRegister[IDE_SELECT_OFFSET] = IN_BYTE(IDE_SELECT_REG); + args->tfRegister[IDE_STATUS_OFFSET] = stat; + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB); + args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG); + args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG); + args->hobRegister[IDE_SECTOR_OFFSET_HOB] = IN_BYTE(IDE_SECTOR_REG); + args->hobRegister[IDE_LCYL_OFFSET_HOB] = IN_BYTE(IDE_LCYL_REG); + args->hobRegister[IDE_HCYL_OFFSET_HOB] = IN_BYTE(IDE_HCYL_REG); + } + +/* taskfile_settings_update(drive, args, command); */ + + spin_lock_irqsave(&io_request_lock, flags); + blkdev_dequeue_request(rq); + HWGROUP(drive)->rq = NULL; + end_that_request_last(rq); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * try_to_flush_leftover_data() is invoked in response to a drive + * unexpectedly having its DRQ_STAT bit set. As an alternative to + * resetting the drive, this routine tries to clear the condition + * by read a sector's worth of data from the drive. Of course, + * this may not help if the drive is *waiting* for data from *us*. + */ +void task_try_to_flush_leftover_data (ide_drive_t *drive) +{ + int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS; + + if (drive->media != ide_disk) + return; + while (i > 0) { + u32 buffer[16]; + unsigned int wcount = (i > 16) ? 16 : i; + i -= wcount; + taskfile_input_data (drive, buffer, wcount); + } +} + +/* + * taskfile_error() takes action based on the error returned by the drive. + */ +ide_startstop_t taskfile_error (ide_drive_t *drive, const char *msg, byte stat) +{ + struct request *rq; + byte err; + + err = taskfile_dump_status(drive, msg, stat); + if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) + return ide_stopped; + /* retry only "normal" I/O: */ + if (rq->cmd == IDE_DRIVE_TASKFILE) { + rq->errors = 1; + ide_end_taskfile(drive, stat, err); + return ide_stopped; + } + if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */ + rq->errors |= ERROR_RESET; + } else { + if (drive->media == ide_disk && (stat & ERR_STAT)) { + /* err has different meaning on cdrom and tape */ + if (err == ABRT_ERR) { + if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY) + return ide_stopped; /* some newer drives don't support WIN_SPECIFY */ + } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) { + drive->crc_count++; /* UDMA crc error -- just retry the operation */ + } else if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */ + rq->errors = ERROR_MAX; + else if (err & TRK0_ERR) /* help it find track zero */ + rq->errors |= ERROR_RECAL; + } + if ((stat & DRQ_STAT) && rq->cmd != WRITE) + task_try_to_flush_leftover_data(drive); + } + if (GET_STAT() & (BUSY_STAT|DRQ_STAT)) + OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG); /* force an abort */ + + if (rq->errors >= ERROR_MAX) { + if (drive->driver != NULL) + DRIVER(drive)->end_request(0, HWGROUP(drive)); + else + ide_end_request(0, HWGROUP(drive)); + } else { + if ((rq->errors & ERROR_RESET) == ERROR_RESET) { + ++rq->errors; + return ide_do_reset(drive); + } + if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) + drive->special.b.recalibrate = 1; + ++rq->errors; + } + return ide_stopped; +} +#endif + +/* + * Handler for special commands without a data phase from ide-disk + */ + +/* + * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. + */ +ide_startstop_t set_multmode_intr (ide_drive_t *drive) +{ + byte stat; + + if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) { + drive->mult_count = drive->mult_req; + } else { + drive->mult_req = drive->mult_count = 0; + drive->special.b.recalibrate = 1; + (void) ide_dump_status(drive, "set_multmode", stat); + } + return ide_stopped; +} + +/* + * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. + */ +ide_startstop_t set_geometry_intr (ide_drive_t *drive) +{ + byte stat; + + if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) + return ide_stopped; + + if (stat & (ERR_STAT|DRQ_STAT)) + return ide_error(drive, "set_geometry_intr", stat); + + ide_set_handler(drive, &set_geometry_intr, WAIT_CMD, NULL); + return ide_started; +} + +/* + * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. + */ +ide_startstop_t recal_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + + if (!OK_STAT(stat,READY_STAT,BAD_STAT)) + return ide_error(drive, "recal_intr", stat); + return ide_stopped; +} + +/* + * Handler for commands without a data phase + */ +ide_startstop_t task_no_data_intr (ide_drive_t *drive) +{ + ide_task_t *args = HWGROUP(drive)->rq->special; + byte stat = GET_STAT(); + + ide__sti(); /* local CPU only */ + + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) + return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ + + if (args) + ide_end_drive_cmd (drive, stat, GET_ERR()); + + return ide_stopped; +} + +/* + * Handler for command with PIO data-in phase + */ +ide_startstop_t task_in_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + char *pBuf = NULL; + + if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "task_in_intr", stat); + } + if (!(stat & BUSY_STAT)) { + DTF("task_in_intr to Soon wait for next interrupt\n"); + ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL); + return ide_started; + } + } + DTF("stat: %02x\n", stat); + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Read: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors); + + drive->io_32bit = 0; + taskfile_input_data(drive, pBuf, SECTOR_WORDS); + drive->io_32bit = io_32bit; + + if (--rq->current_nr_sectors <= 0) { + /* (hs): swapped next 2 lines */ + DTF("Request Ended stat: %02x\n", GET_STAT()); + ide_end_request(1, HWGROUP(drive)); + } else { + ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; +} + +#undef ALTSTAT_SCREW_UP + +#ifdef ALTSTAT_SCREW_UP +/* + * (ks/hs): Poll Alternate Status Register to ensure + * that drive is not busy. + */ +byte altstat_multi_busy (ide_drive_t *drive, byte stat, const char *msg) +{ + int i; + + DTF("multi%s: ASR = %x\n", msg, stat); + if (stat & BUSY_STAT) { + /* (ks/hs): FIXME: Replace hard-coded 100, error handling? */ + for (i=0; i<100; i++) { + stat = GET_ALTSTAT(); + if ((stat & BUSY_STAT) == 0) + break; + } + } + /* + * (ks/hs): Read Status AFTER Alternate Status Register + */ + return(GET_STAT()); +} + +/* + * (ks/hs): Poll Alternate status register to wait for drive + * to become ready for next transfer + */ +byte altstat_multi_poll (ide_drive_t *drive, byte stat, const char *msg) +{ + /* (ks/hs): FIXME: Error handling, time-out? */ + while (stat & BUSY_STAT) + stat = GET_ALTSTAT(); + DTF("multi%s: nsect=1, ASR = %x\n", msg, stat); + return(GET_STAT()); /* (ks/hs): Clear pending IRQ */ +} +#endif /* ALTSTAT_SCREW_UP */ + +/* + * Handler for command with Read Multiple + */ +ide_startstop_t task_mulin_intr (ide_drive_t *drive) +{ + unsigned int msect, nsect; + +#ifdef ALTSTAT_SCREW_UP + byte stat = altstat_multi_busy(drive, GET_ALTSTAT(), "read"); +#else + byte stat = GET_STAT(); +#endif /* ALTSTAT_SCREW_UP */ + + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + char *pBuf = NULL; + + if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "task_mulin_intr", stat); + } + /* no data yet, so wait for another interrupt */ + ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL); + return ide_started; + } + + /* (ks/hs): Fixed Multi-Sector transfer */ + msect = drive->mult_count; + +#ifdef ALTSTAT_SCREW_UP + /* + * Screw the request we do not support bad data-phase setups! + * Either read and learn the ATA standard or crash yourself! + */ + if (!msect) { + /* + * (ks/hs): Drive supports multi-sector transfer, + * drive->mult_count was not set + */ + nsect = 1; + while (rq->current_nr_sectors) { + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Multiread: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + stat = altstat_multi_poll(drive, GET_ALTSTAT(), "read"); + } + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } +#endif /* ALTSTAT_SCREW_UP */ + + nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors; + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + + DTF("Multiread: %p, nsect: %d , rq->current_nr_sectors: %ld\n", + pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + if (rq->current_nr_sectors != 0) { + ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL); + return ide_started; + } + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; +} + +ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq) +{ + ide_task_t *args = rq->special; + ide_startstop_t startstop; + + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + + /* (ks/hs): Fixed Multi Write */ + if ((args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE) && + (args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE_EXT)) { + /* For Write_sectors we need to stuff the first sector */ + taskfile_output_data(drive, rq->buffer, SECTOR_WORDS); + rq->current_nr_sectors--; + return ide_started; + } else { + /* + * (ks/hs): Stuff the first sector(s) + * by implicitly calling the handler + */ + if (!(drive_is_ready(drive))) { + int i; + /* + * (ks/hs): FIXME: Replace hard-coded + * 100, error handling? + */ + for (i=0; i<100; i++) { + if (drive_is_ready(drive)) + break; + } + } + return args->handler(drive); + } + return ide_started; +} + +/* + * Handler for command with PIO data-out phase + */ +ide_startstop_t task_out_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + char *pBuf = NULL; + + if (!rq->current_nr_sectors) { + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } + + if (!OK_STAT(stat,DRIVE_READY,drive->bad_wstat)) { + return ide_error(drive, "task_out_intr", stat); + } + if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) { + rq = HWGROUP(drive)->rq; + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("write: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_output_data(drive, pBuf, SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors--; + } + + if (rq->current_nr_sectors <= 0) { + ide_end_request(1, HWGROUP(drive)); + } else { + ide_set_handler(drive, &task_out_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; +} + +/* + * Handler for command write multiple + * Called directly from execute_drive_cmd for the first bunch of sectors, + * afterwards only by the ISR + */ +ide_startstop_t task_mulout_intr (ide_drive_t *drive) +{ + unsigned int msect, nsect; + +#ifdef ALTSTAT_SCREW_UP + byte stat = altstat_multi_busy(drive, GET_ALTSTAT(), "write"); +#else + byte stat = GET_STAT(); +#endif /* ALTSTAT_SCREW_UP */ + + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + char *pBuf = NULL; + + /* + * (ks/hs): Handle last IRQ on multi-sector transfer, + * occurs after all data was sent + */ + if (rq->current_nr_sectors == 0) { + if (stat & (ERR_STAT|DRQ_STAT)) + return ide_error(drive, "task_mulout_intr", stat); + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } + + if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "task_mulout_intr", stat); + } + /* no data yet, so wait for another interrupt */ + if (hwgroup->handler == NULL) + ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL); + return ide_started; + } + + /* (ks/hs): See task_mulin_intr */ + msect = drive->mult_count; + +#ifdef ALTSTAT_SCREW_UP + /* + * Screw the request we do not support bad data-phase setups! + * Either read and learn the ATA standard or crash yourself! + */ + if (!msect) { + nsect = 1; + while (rq->current_nr_sectors) { + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Multiwrite: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + stat = altstat_multi_poll(drive, GET_ALTSTAT(), "write"); + } + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } +#endif /* ALTSTAT_SCREW_UP */ + + nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors; + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Multiwrite: %p, nsect: %d , rq->current_nr_sectors: %ld\n", + pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + if (hwgroup->handler == NULL) + ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL); + return ide_started; +} + +/* Called by internal to feature out type of command being called */ +ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile) +{ + switch(taskfile->command) { + /* IDE_DRIVE_TASK_RAW_WRITE */ + case CFA_WRITE_MULTI_WO_ERASE: + case WIN_MULTWRITE: + case WIN_MULTWRITE_EXT: +// case WIN_WRITEDMA: +// case WIN_WRITEDMA_QUEUED: +// case WIN_WRITEDMA_EXT: +// case WIN_WRITEDMA_QUEUED_EXT: + /* IDE_DRIVE_TASK_OUT */ + case WIN_WRITE: + case WIN_WRITE_VERIFY: + case WIN_WRITE_BUFFER: + case CFA_WRITE_SECT_WO_ERASE: + case WIN_DOWNLOAD_MICROCODE: + return &pre_task_out_intr; + /* IDE_DRIVE_TASK_OUT */ + case WIN_SMART: + if (taskfile->feature == SMART_WRITE_LOG_SECTOR) + return &pre_task_out_intr; + default: + break; + } + return(NULL); +} + +/* Called by internal to feature out type of command being called */ +ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile) +{ + switch(taskfile->command) { + case WIN_IDENTIFY: + case WIN_PIDENTIFY: + case CFA_TRANSLATE_SECTOR: + case WIN_READ_BUFFER: + case WIN_READ: + case WIN_READ_EXT: + return &task_in_intr; + case WIN_SECURITY_DISABLE: + case WIN_SECURITY_ERASE_UNIT: + case WIN_SECURITY_SET_PASS: + case WIN_SECURITY_UNLOCK: + case WIN_DOWNLOAD_MICROCODE: + case CFA_WRITE_SECT_WO_ERASE: + case WIN_WRITE_BUFFER: + case WIN_WRITE_VERIFY: + case WIN_WRITE: + case WIN_WRITE_EXT: + return &task_out_intr; + case WIN_MULTREAD: + case WIN_MULTREAD_EXT: + return &task_mulin_intr; + case CFA_WRITE_MULTI_WO_ERASE: + case WIN_MULTWRITE: + case WIN_MULTWRITE_EXT: + return &task_mulout_intr; + case WIN_SMART: + switch(taskfile->feature) { + case SMART_READ_VALUES: + case SMART_READ_THRESHOLDS: + case SMART_READ_LOG_SECTOR: + return &task_in_intr; + case SMART_WRITE_LOG_SECTOR: + return &task_out_intr; + default: + return &task_no_data_intr; + } + case CFA_REQ_EXT_ERROR_CODE: + case CFA_ERASE_SECTORS: + case WIN_VERIFY: + case WIN_VERIFY_EXT: + case WIN_SEEK: + return &task_no_data_intr; + case WIN_SPECIFY: + return &set_geometry_intr; + case WIN_RESTORE: + return &recal_intr; + case WIN_DIAGNOSE: + case WIN_FLUSH_CACHE: + case WIN_FLUSH_CACHE_EXT: + case WIN_STANDBYNOW1: + case WIN_STANDBYNOW2: + case WIN_SLEEPNOW1: + case WIN_SLEEPNOW2: + case WIN_SETIDLE1: + case WIN_CHECKPOWERMODE1: + case WIN_CHECKPOWERMODE2: + case WIN_GETMEDIASTATUS: + case WIN_MEDIAEJECT: + return &task_no_data_intr; + case WIN_SETMULT: + return &set_multmode_intr; + case WIN_READ_NATIVE_MAX: + case WIN_SET_MAX: + case WIN_READ_NATIVE_MAX_EXT: + case WIN_SET_MAX_EXT: + case WIN_SECURITY_ERASE_PREPARE: + case WIN_SECURITY_FREEZE_LOCK: + case WIN_DOORLOCK: + case WIN_DOORUNLOCK: + case WIN_SETFEATURES: + return &task_no_data_intr; + case DISABLE_SEAGATE: + case EXABYTE_ENABLE_NEST: + return &task_no_data_intr; +#ifdef CONFIG_BLK_DEV_IDEDMA + case WIN_READDMA: + case WIN_IDENTIFY_DMA: + case WIN_READDMA_QUEUED: + case WIN_READDMA_EXT: + case WIN_READDMA_QUEUED_EXT: + case WIN_WRITEDMA: + case WIN_WRITEDMA_QUEUED: + case WIN_WRITEDMA_EXT: + case WIN_WRITEDMA_QUEUED_EXT: +#endif + case WIN_FORMAT: + case WIN_INIT: + case WIN_DEVICE_RESET: + case WIN_QUEUED_SERVICE: + case WIN_PACKETCMD: + default: + return(NULL); + } +} + +/* Called by ioctl to feature out type of command being called */ +int ide_cmd_type_parser (ide_task_t *args) +{ + struct hd_drive_task_hdr *taskfile = (struct hd_drive_task_hdr *) args->tfRegister; + struct hd_drive_hob_hdr *hobfile = (struct hd_drive_hob_hdr *) args->hobRegister; + + args->prehandler = ide_pre_handler_parser(taskfile, hobfile); + args->handler = ide_handler_parser(taskfile, hobfile); + + switch(args->tfRegister[IDE_COMMAND_OFFSET]) { + case WIN_IDENTIFY: + case WIN_PIDENTIFY: + return IDE_DRIVE_TASK_IN; + case CFA_TRANSLATE_SECTOR: + case WIN_READ: + case WIN_READ_BUFFER: + return IDE_DRIVE_TASK_IN; + case WIN_WRITE: + case WIN_WRITE_VERIFY: + case WIN_WRITE_BUFFER: + case CFA_WRITE_SECT_WO_ERASE: + case WIN_DOWNLOAD_MICROCODE: + return IDE_DRIVE_TASK_RAW_WRITE; + case WIN_MULTREAD: + return IDE_DRIVE_TASK_IN; + case CFA_WRITE_MULTI_WO_ERASE: + case WIN_MULTWRITE: + return IDE_DRIVE_TASK_RAW_WRITE; + case WIN_SECURITY_DISABLE: + case WIN_SECURITY_ERASE_UNIT: + case WIN_SECURITY_SET_PASS: + case WIN_SECURITY_UNLOCK: + return IDE_DRIVE_TASK_OUT; + case WIN_SMART: + args->tfRegister[IDE_LCYL_OFFSET] = SMART_LCYL_PASS; + args->tfRegister[IDE_HCYL_OFFSET] = SMART_HCYL_PASS; + switch(args->tfRegister[IDE_FEATURE_OFFSET]) { + case SMART_READ_VALUES: + case SMART_READ_THRESHOLDS: + case SMART_READ_LOG_SECTOR: + return IDE_DRIVE_TASK_IN; + case SMART_WRITE_LOG_SECTOR: + return IDE_DRIVE_TASK_OUT; + default: + return IDE_DRIVE_TASK_NO_DATA; + } +#ifdef CONFIG_BLK_DEV_IDEDMA + case WIN_READDMA: + case WIN_IDENTIFY_DMA: + case WIN_READDMA_QUEUED: + case WIN_READDMA_EXT: + case WIN_READDMA_QUEUED_EXT: + return IDE_DRIVE_TASK_IN; + case WIN_WRITEDMA: + case WIN_WRITEDMA_QUEUED: + case WIN_WRITEDMA_EXT: + case WIN_WRITEDMA_QUEUED_EXT: + return IDE_DRIVE_TASK_RAW_WRITE; +#endif + case WIN_SETFEATURES: + switch(args->tfRegister[IDE_FEATURE_OFFSET]) { + case SETFEATURES_XFER: + return IDE_DRIVE_TASK_SET_XFER; + case SETFEATURES_DIS_DEFECT: + case SETFEATURES_EN_APM: + case SETFEATURES_DIS_MSN: + case SETFEATURES_EN_RI: + case SETFEATURES_EN_SI: + case SETFEATURES_DIS_RPOD: + case SETFEATURES_DIS_WCACHE: + case SETFEATURES_EN_DEFECT: + case SETFEATURES_DIS_APM: + case SETFEATURES_EN_MSN: + case SETFEATURES_EN_RLA: + case SETFEATURES_PREFETCH: + case SETFEATURES_EN_RPOD: + case SETFEATURES_DIS_RI: + case SETFEATURES_DIS_SI: + default: + return IDE_DRIVE_TASK_NO_DATA; + } + case WIN_NOP: + case CFA_REQ_EXT_ERROR_CODE: + case CFA_ERASE_SECTORS: + case WIN_VERIFY: + case WIN_VERIFY_EXT: + case WIN_SEEK: + case WIN_SPECIFY: + case WIN_RESTORE: + case WIN_DIAGNOSE: + case WIN_FLUSH_CACHE: + case WIN_FLUSH_CACHE_EXT: + case WIN_STANDBYNOW1: + case WIN_STANDBYNOW2: + case WIN_SLEEPNOW1: + case WIN_SLEEPNOW2: + case WIN_SETIDLE1: + case DISABLE_SEAGATE: + case WIN_CHECKPOWERMODE1: + case WIN_CHECKPOWERMODE2: + case WIN_GETMEDIASTATUS: + case WIN_MEDIAEJECT: + case WIN_SETMULT: + case WIN_READ_NATIVE_MAX: + case WIN_SET_MAX: + case WIN_READ_NATIVE_MAX_EXT: + case WIN_SET_MAX_EXT: + case WIN_SECURITY_ERASE_PREPARE: + case WIN_SECURITY_FREEZE_LOCK: + case EXABYTE_ENABLE_NEST: + case WIN_DOORLOCK: + case WIN_DOORUNLOCK: + return IDE_DRIVE_TASK_NO_DATA; + case WIN_FORMAT: + case WIN_INIT: + case WIN_DEVICE_RESET: + case WIN_QUEUED_SERVICE: + case WIN_PACKETCMD: + default: + return IDE_DRIVE_TASK_INVALID; + } +} + +/* + * This function is intended to be used prior to invoking ide_do_drive_cmd(). + */ +void ide_init_drive_taskfile (struct request *rq) +{ + memset(rq, 0, sizeof(*rq)); + rq->cmd = IDE_DRIVE_TASK_NO_DATA; +} + +/* + * This is kept for internal use only !!! + * This is an internal call and nobody in user-space has a damn + * reason to call this taskfile. + * + * ide_raw_taskfile is the one that user-space executes. + */ +int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf) +{ + struct request rq; + ide_task_t args; + + memset(&args, 0, sizeof(ide_task_t)); + + args.tfRegister[IDE_DATA_OFFSET] = taskfile->data; + args.tfRegister[IDE_FEATURE_OFFSET] = taskfile->feature; + args.tfRegister[IDE_NSECTOR_OFFSET] = taskfile->sector_count; + args.tfRegister[IDE_SECTOR_OFFSET] = taskfile->sector_number; + args.tfRegister[IDE_LCYL_OFFSET] = taskfile->low_cylinder; + args.tfRegister[IDE_HCYL_OFFSET] = taskfile->high_cylinder; + args.tfRegister[IDE_SELECT_OFFSET] = taskfile->device_head; + args.tfRegister[IDE_COMMAND_OFFSET] = taskfile->command; + + args.hobRegister[IDE_DATA_OFFSET_HOB] = hobfile->data; + args.hobRegister[IDE_FEATURE_OFFSET_HOB] = hobfile->feature; + args.hobRegister[IDE_NSECTOR_OFFSET_HOB] = hobfile->sector_count; + args.hobRegister[IDE_SECTOR_OFFSET_HOB] = hobfile->sector_number; + args.hobRegister[IDE_LCYL_OFFSET_HOB] = hobfile->low_cylinder; + args.hobRegister[IDE_HCYL_OFFSET_HOB] = hobfile->high_cylinder; + args.hobRegister[IDE_SELECT_OFFSET_HOB] = hobfile->device_head; + args.hobRegister[IDE_CONTROL_OFFSET_HOB] = hobfile->control; + + ide_init_drive_taskfile(&rq); + /* This is kept for internal use only !!! */ + args.command_type = ide_cmd_type_parser (&args); + if (args.command_type != IDE_DRIVE_TASK_NO_DATA) + rq.current_nr_sectors = rq.nr_sectors = (hobfile->sector_count << 8) | taskfile->sector_count; + + rq.cmd = IDE_DRIVE_TASKFILE; + rq.buffer = buf; + rq.special = &args; + return ide_do_drive_cmd(drive, &rq, ide_wait); +} + +int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *args, byte *buf) +{ + struct request rq; + ide_init_drive_taskfile(&rq); + rq.cmd = IDE_DRIVE_TASKFILE; + rq.buffer = buf; + + if (args->command_type != IDE_DRIVE_TASK_NO_DATA) + rq.current_nr_sectors = rq.nr_sectors = (args->hobRegister[IDE_NSECTOR_OFFSET_HOB] << 8) | args->tfRegister[IDE_NSECTOR_OFFSET]; + + rq.special = args; + return ide_do_drive_cmd(drive, &rq, ide_wait); +} + + +#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG +char * ide_ioctl_verbose (unsigned int cmd) +{ + return("unknown"); +} + +char * ide_task_cmd_verbose (byte task) +{ + return("unknown"); +} +#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */ + +/* + * The taskfile glue table + * + * reqtask.data_phase reqtask.req_cmd + * args.command_type args.handler + * + * TASKFILE_P_OUT_DMAQ ?? ?? + * TASKFILE_P_IN_DMAQ ?? ?? + * TASKFILE_P_OUT_DMA ?? ?? + * TASKFILE_P_IN_DMA ?? ?? + * TASKFILE_P_OUT ?? ?? + * TASKFILE_P_IN ?? ?? + * + * TASKFILE_OUT_DMAQ IDE_DRIVE_TASK_RAW_WRITE NULL + * TASKFILE_IN_DMAQ IDE_DRIVE_TASK_IN NULL + * + * TASKFILE_OUT_DMA IDE_DRIVE_TASK_RAW_WRITE NULL + * TASKFILE_IN_DMA IDE_DRIVE_TASK_IN NULL + * + * TASKFILE_IN_OUT ?? ?? + * + * TASKFILE_MULTI_OUT IDE_DRIVE_TASK_RAW_WRITE task_mulout_intr + * TASKFILE_MULTI_IN IDE_DRIVE_TASK_IN task_mulin_intr + * + * TASKFILE_OUT IDE_DRIVE_TASK_RAW_WRITE task_out_intr + * TASKFILE_OUT IDE_DRIVE_TASK_OUT task_out_intr + * + * TASKFILE_IN IDE_DRIVE_TASK_IN task_in_intr + * TASKFILE_NO_DATA IDE_DRIVE_TASK_NO_DATA task_no_data_intr + * + * IDE_DRIVE_TASK_SET_XFER task_no_data_intr + * IDE_DRIVE_TASK_INVALID + * + */ + +#define MAX_DMA (256*SECTOR_WORDS) + +int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + ide_task_request_t *req_task; + ide_task_t args; + + byte *outbuf = NULL; + byte *inbuf = NULL; + task_ioreg_t *argsptr = args.tfRegister; + task_ioreg_t *hobsptr = args.hobRegister; + int err = 0; + int tasksize = sizeof(struct ide_task_request_s); + int taskin = 0; + int taskout = 0; + + req_task = kmalloc(tasksize, GFP_KERNEL); + if (req_task == NULL) return -ENOMEM; + memset(req_task, 0, tasksize); + if (copy_from_user(req_task, (void *) arg, tasksize)) { + kfree(req_task); + return -EFAULT; + } + + taskout = (int) req_task->out_size; + taskin = (int) req_task->in_size; + + if (taskout) { + int outtotal = tasksize; + outbuf = kmalloc(taskout, GFP_KERNEL); + if (outbuf == NULL) { + err = -ENOMEM; + goto abort; + } + memset(outbuf, 0, taskout); + if (copy_from_user(outbuf, (void *)arg + outtotal, taskout)) { + err = -EFAULT; + goto abort; + } + } + + if (taskin) { + int intotal = tasksize + taskout; + inbuf = kmalloc(taskin, GFP_KERNEL); + if (inbuf == NULL) { + err = -ENOMEM; + goto abort; + } + memset(inbuf, 0, taskin); + if (copy_from_user(inbuf, (void *)arg + intotal , taskin)) { + err = -EFAULT; + goto abort; + } + } + + memset(argsptr, 0, HDIO_DRIVE_TASK_HDR_SIZE); + memset(hobsptr, 0, HDIO_DRIVE_HOB_HDR_SIZE); + memcpy(argsptr, req_task->io_ports, HDIO_DRIVE_TASK_HDR_SIZE); + memcpy(hobsptr, req_task->hob_ports, HDIO_DRIVE_HOB_HDR_SIZE); + + args.tf_in_flags = req_task->in_flags; + args.tf_out_flags = req_task->out_flags; + args.data_phase = req_task->data_phase; + args.command_type = req_task->req_cmd; + +#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG + DTF("%s: ide_ioctl_cmd %s: ide_task_cmd %s\n", + drive->name, + ide_ioctl_verbose(cmd), + ide_task_cmd_verbose(args.tfRegister[IDE_COMMAND_OFFSET])); +#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */ + + switch(req_task->data_phase) { + case TASKFILE_OUT_DMAQ: + case TASKFILE_OUT_DMA: + args.prehandler = NULL; + args.handler = NULL; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + break; + case TASKFILE_IN_DMAQ: + case TASKFILE_IN_DMA: + args.prehandler = NULL; + args.handler = NULL; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + break; + case TASKFILE_IN_OUT: +#if 0 + args.prehandler = &pre_task_out_intr; + args.handler = &task_out_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + args.prehandler = NULL; + args.handler = &task_in_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + break; +#else + err = -EFAULT; + goto abort; +#endif + case TASKFILE_MULTI_OUT: + if (drive->mult_count) { + args.prehandler = &pre_task_out_intr; + args.handler = &task_mulout_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + } else { + /* (hs): give up if multcount is not set */ + printk("%s: %s Multimode Write " \ + "multcount is not set\n", + drive->name, __FUNCTION__); + err = -EPERM; + goto abort; + } + break; + case TASKFILE_OUT: + args.prehandler = &pre_task_out_intr; + args.handler = &task_out_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + break; + case TASKFILE_MULTI_IN: + if (drive->mult_count) { + args.prehandler = NULL; + args.handler = &task_mulin_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + } else { + /* (hs): give up if multcount is not set */ + printk("%s: %s Multimode Read failure " \ + "multcount is not set\n", + drive->name, __FUNCTION__); + err = -EPERM; + goto abort; + } + break; + case TASKFILE_IN: + args.prehandler = NULL; + args.handler = &task_in_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + break; + case TASKFILE_NO_DATA: + args.prehandler = NULL; + args.handler = &task_no_data_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, NULL); + break; + default: + args.prehandler = NULL; + args.handler = NULL; + args.posthandler = NULL; + err = -EFAULT; + goto abort; + } + + memcpy(req_task->io_ports, &(args.tfRegister), HDIO_DRIVE_TASK_HDR_SIZE); + memcpy(req_task->hob_ports, &(args.hobRegister), HDIO_DRIVE_HOB_HDR_SIZE); + req_task->in_flags = args.tf_in_flags; + req_task->out_flags = args.tf_out_flags; + + if (copy_to_user((void *)arg, req_task, tasksize)) { + err = -EFAULT; + goto abort; + } + if (taskout) { + int outtotal = tasksize; + if (copy_to_user((void *)arg+outtotal, outbuf, taskout)) { + err = -EFAULT; + goto abort; + } + } + if (taskin) { + int intotal = tasksize + taskout; + if (copy_to_user((void *)arg+intotal, inbuf, taskin)) { + err = -EFAULT; + goto abort; + } + } +abort: + kfree(req_task); + if (outbuf != NULL) + kfree(outbuf); + if (inbuf != NULL) + kfree(inbuf); + return err; +} + +EXPORT_SYMBOL(task_read_24); +EXPORT_SYMBOL(do_rw_taskfile); +EXPORT_SYMBOL(do_taskfile); +// EXPORT_SYMBOL(flagged_taskfile); + +//EXPORT_SYMBOL(ide_end_taskfile); + +EXPORT_SYMBOL(set_multmode_intr); +EXPORT_SYMBOL(set_geometry_intr); +EXPORT_SYMBOL(recal_intr); + +EXPORT_SYMBOL(task_no_data_intr); +EXPORT_SYMBOL(task_in_intr); +EXPORT_SYMBOL(task_mulin_intr); +EXPORT_SYMBOL(pre_task_out_intr); +EXPORT_SYMBOL(task_out_intr); +EXPORT_SYMBOL(task_mulout_intr); + +EXPORT_SYMBOL(ide_init_drive_taskfile); +EXPORT_SYMBOL(ide_wait_taskfile); +EXPORT_SYMBOL(ide_raw_taskfile); +EXPORT_SYMBOL(ide_pre_handler_parser); +EXPORT_SYMBOL(ide_handler_parser); +EXPORT_SYMBOL(ide_cmd_type_parser); +EXPORT_SYMBOL(ide_taskfile_ioctl); + +#ifdef CONFIG_PKT_TASK_IOCTL + +#if 0 +{ + +{ /* start cdrom */ + + struct cdrom_info *info = drive->driver_data; + + if (info->dma) { + if (info->cmd == READ) { + info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive); + } else if (info->cmd == WRITE) { + info->dma = !HWIF(drive)->dmaproc(ide_dma_write, drive); + } else { + printk("ide-cd: DMA set, but not allowed\n"); + } + } + + /* Set up the controller registers. */ + OUT_BYTE (info->dma, IDE_FEATURE_REG); + OUT_BYTE (0, IDE_NSECTOR_REG); + OUT_BYTE (0, IDE_SECTOR_REG); + + OUT_BYTE (xferlen & 0xff, IDE_LCYL_REG); + OUT_BYTE (xferlen >> 8 , IDE_HCYL_REG); + if (IDE_CONTROL_REG) + OUT_BYTE (drive->ctl, IDE_CONTROL_REG); + + if (info->dma) + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + + if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) { + ide_set_handler (drive, handler, WAIT_CMD, cdrom_timer_expiry); + OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */ + return ide_started; + } else { + OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */ + return (*handler) (drive); + } + +} /* end cdrom */ + +{ /* start floppy */ + + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_bcount_reg_t bcount; + int dma_ok = 0; + + floppy->pc=pc; /* Set the current packet command */ + + pc->retries++; + pc->actually_transferred=0; /* We haven't transferred any data yet */ + pc->current_position=pc->buffer; + bcount.all = IDE_MIN(pc->request_transfer, 63 * 1024); + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) { + (void) HWIF(drive)->dmaproc(ide_dma_off, drive); + } + if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) + dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + + if (IDE_CONTROL_REG) + OUT_BYTE (drive->ctl,IDE_CONTROL_REG); + OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ + OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG); + OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); + OUT_BYTE (drive->select.all,IDE_SELECT_REG); + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (dma_ok) { /* Begin DMA, if necessary */ + set_bit (PC_DMA_IN_PROGRESS, &pc->flags); + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + } +#endif /* CONFIG_BLK_DEV_IDEDMA */ + +} /* end floppy */ + +{ /* start tape */ + + idetape_tape_t *tape = drive->driver_data; + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) { + printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n"); + (void) HWIF(drive)->dmaproc(ide_dma_off, drive); + } + if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) + dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + + if (IDE_CONTROL_REG) + OUT_BYTE (drive->ctl,IDE_CONTROL_REG); + OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ + OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG); + OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); + OUT_BYTE (drive->select.all,IDE_SELECT_REG); +#ifdef CONFIG_BLK_DEV_IDEDMA + if (dma_ok) { /* Begin DMA, if necessary */ + set_bit (PC_DMA_IN_PROGRESS, &pc->flags); + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + } +#endif /* CONFIG_BLK_DEV_IDEDMA */ + if (test_bit(IDETAPE_DRQ_INTERRUPT, &tape->flags)) { + ide_set_handler(drive, &idetape_transfer_pc, IDETAPE_WAIT_CMD, NULL); + OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG); + return ide_started; + } else { + OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG); + return idetape_transfer_pc(drive); + } + +} /* end tape */ + +} +#endif + +int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ +#if 0 + switch(req_task->data_phase) { + case TASKFILE_P_OUT_DMAQ: + case TASKFILE_P_IN_DMAQ: + case TASKFILE_P_OUT_DMA: + case TASKFILE_P_IN_DMA: + case TASKFILE_P_OUT: + case TASKFILE_P_IN: + } +#endif + return -ENOMSG; +} + +EXPORT_SYMBOL(pkt_taskfile_ioctl); + +#endif /* CONFIG_PKT_TASK_IOCTL */ diff -urN linux-2.4.17-rc2-virgin/drivers/ide/ide.c linux-2.4.17-rc2-wli2/drivers/ide/ide.c --- linux-2.4.17-rc2-virgin/drivers/ide/ide.c Thu Oct 25 13:58:35 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/ide.c Thu Dec 20 19:49:13 2001 @@ -149,6 +149,7 @@ #include #include #include +#include #include #include @@ -162,6 +163,16 @@ #include #endif /* CONFIG_KMOD */ +#ifdef CONFIG_IDE_TASKFILE_IO +# define __TASKFILE__IO +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#ifdef __TASKFILE__IO +#else /* !__TASKFILE__IO */ +#endif /* __TASKFILE__IO */ + /* default maximum number of failures */ #define IDE_DEFAULT_MAX_FAILURES 1 @@ -515,7 +526,8 @@ /* * Needed for PCI irq sharing */ -static inline int drive_is_ready (ide_drive_t *drive) +//static inline +int drive_is_ready (ide_drive_t *drive) { byte stat = 0; if (drive->waiting_for_dma) @@ -809,7 +821,11 @@ */ OUT_BYTE(drive->ctl|6,IDE_CONTROL_REG); /* set SRST and nIEN */ udelay(10); /* more than enough time */ - OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* clear SRST, leave nIEN */ + if (drive->quirk_list == 2) { + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear SRST and nIEN */ + } else { + OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* clear SRST, leave nIEN */ + } udelay(10); /* more than enough time */ hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL); @@ -836,6 +852,13 @@ return do_reset1 (drive, 0); } +static inline u32 read_24 (ide_drive_t *drive) +{ + return (IN_BYTE(IDE_HCYL_REG)<<16) | + (IN_BYTE(IDE_LCYL_REG)<<8) | + IN_BYTE(IDE_SECTOR_REG); +} + /* * Clean up after success/failure of an explicit drive cmd */ @@ -848,26 +871,66 @@ rq = HWGROUP(drive)->rq; spin_unlock_irqrestore(&io_request_lock, flags); - if (rq->cmd == IDE_DRIVE_CMD) { - byte *args = (byte *) rq->buffer; - rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); - if (args) { - args[0] = stat; - args[1] = err; - args[2] = IN_BYTE(IDE_NSECTOR_REG); - } - } else if (rq->cmd == IDE_DRIVE_TASK) { - byte *args = (byte *) rq->buffer; - rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); - if (args) { - args[0] = stat; - args[1] = err; - args[2] = IN_BYTE(IDE_NSECTOR_REG); - args[3] = IN_BYTE(IDE_SECTOR_REG); - args[4] = IN_BYTE(IDE_LCYL_REG); - args[5] = IN_BYTE(IDE_HCYL_REG); - args[6] = IN_BYTE(IDE_SELECT_REG); + switch(rq->cmd) { + case IDE_DRIVE_CMD: + { + byte *args = (byte *) rq->buffer; + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + if (args) { + args[0] = stat; + args[1] = err; + args[2] = IN_BYTE(IDE_NSECTOR_REG); + } + break; + } + case IDE_DRIVE_TASK: + { + byte *args = (byte *) rq->buffer; + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + if (args) { + args[0] = stat; + args[1] = err; + args[2] = IN_BYTE(IDE_NSECTOR_REG); + args[3] = IN_BYTE(IDE_SECTOR_REG); + args[4] = IN_BYTE(IDE_LCYL_REG); + args[5] = IN_BYTE(IDE_HCYL_REG); + args[6] = IN_BYTE(IDE_SELECT_REG); + } + break; + } + case IDE_DRIVE_TASKFILE: + { + ide_task_t *args = (ide_task_t *) rq->special; + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + if (args) { + if (args->tf_in_flags.b.data) { + unsigned short data = IN_WORD(IDE_DATA_REG); + args->tfRegister[IDE_DATA_OFFSET] = (data) & 0xFF; + args->hobRegister[IDE_DATA_OFFSET_HOB] = (data >> 8) & 0xFF; + } + args->tfRegister[IDE_ERROR_OFFSET] = err; + args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG); + args->tfRegister[IDE_SECTOR_OFFSET] = IN_BYTE(IDE_SECTOR_REG); + args->tfRegister[IDE_LCYL_OFFSET] = IN_BYTE(IDE_LCYL_REG); + args->tfRegister[IDE_HCYL_OFFSET] = IN_BYTE(IDE_HCYL_REG); + args->tfRegister[IDE_SELECT_OFFSET] = IN_BYTE(IDE_SELECT_REG); + args->tfRegister[IDE_STATUS_OFFSET] = stat; + + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB); + args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG); + args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG); + args->hobRegister[IDE_SECTOR_OFFSET_HOB] = IN_BYTE(IDE_SECTOR_REG); + args->hobRegister[IDE_LCYL_OFFSET_HOB] = IN_BYTE(IDE_LCYL_REG); + args->hobRegister[IDE_HCYL_OFFSET_HOB] = IN_BYTE(IDE_HCYL_REG); + } + } + break; } + default: + break; } spin_lock_irqsave(&io_request_lock, flags); blkdev_dequeue_request(rq); @@ -917,19 +980,32 @@ if (err & MARK_ERR) printk("AddrMarkNotFound "); printk("}"); if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) { - byte cur = IN_BYTE(IDE_SELECT_REG); - if (cur & 0x40) { /* using LBA? */ - printk(", LBAsect=%ld", (unsigned long) - ((cur&0xf)<<24) - |(IN_BYTE(IDE_HCYL_REG)<<16) - |(IN_BYTE(IDE_LCYL_REG)<<8) - | IN_BYTE(IDE_SECTOR_REG)); + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + __u64 sectors = 0; + u32 low = 0, high = 0; + low = read_24(drive); + OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG); + high = read_24(drive); + + sectors = ((__u64)high << 24) | low; + printk(", LBAsect=%lld, high=%d, low=%d", sectors, high, low); } else { - printk(", CHS=%d/%d/%d", - (IN_BYTE(IDE_HCYL_REG)<<8) + - IN_BYTE(IDE_LCYL_REG), - cur & 0xf, - IN_BYTE(IDE_SECTOR_REG)); + byte cur = IN_BYTE(IDE_SELECT_REG); + if (cur & 0x40) { /* using LBA? */ + printk(", LBAsect=%ld", (unsigned long) + ((cur&0xf)<<24) + |(IN_BYTE(IDE_HCYL_REG)<<16) + |(IN_BYTE(IDE_LCYL_REG)<<8) + | IN_BYTE(IDE_SECTOR_REG)); + } else { + printk(", CHS=%d/%d/%d", + (IN_BYTE(IDE_HCYL_REG)<<8) + + IN_BYTE(IDE_LCYL_REG), + cur & 0xf, + IN_BYTE(IDE_SECTOR_REG)); + } } if (HWGROUP(drive) && HWGROUP(drive)->rq) printk(", sector=%ld", HWGROUP(drive)->rq->sector); @@ -980,6 +1056,13 @@ ide_end_drive_cmd(drive, stat, err); return ide_stopped; } + if (rq->cmd == IDE_DRIVE_TASKFILE) { + rq->errors = 1; + ide_end_drive_cmd(drive, stat, err); +// ide_end_taskfile(drive, stat, err); + return ide_stopped; + } + if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */ rq->errors |= ERROR_RESET; } else { @@ -1141,62 +1224,125 @@ */ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { - byte *args = rq->buffer; - if (args && rq->cmd == IDE_DRIVE_TASK) { - byte sel; + switch(rq->cmd) { + case IDE_DRIVE_TASKFILE: + { + ide_task_t *args = rq->special; + + if (!(args)) break; + +#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG + { + printk(KERN_INFO "%s: ", drive->name); +// printk("TF.0=x%02x ", args->tfRegister[IDE_DATA_OFFSET]); + printk("TF.1=x%02x ", args->tfRegister[IDE_FEATURE_OFFSET]); + printk("TF.2=x%02x ", args->tfRegister[IDE_NSECTOR_OFFSET]); + printk("TF.3=x%02x ", args->tfRegister[IDE_SECTOR_OFFSET]); + printk("TF.4=x%02x ", args->tfRegister[IDE_LCYL_OFFSET]); + printk("TF.5=x%02x ", args->tfRegister[IDE_HCYL_OFFSET]); + printk("TF.6=x%02x ", args->tfRegister[IDE_SELECT_OFFSET]); + printk("TF.7=x%02x\n", args->tfRegister[IDE_COMMAND_OFFSET]); + printk(KERN_INFO "%s: ", drive->name); +// printk("HTF.0=x%02x ", args->hobRegister[IDE_DATA_OFFSET_HOB]); + printk("HTF.1=x%02x ", args->hobRegister[IDE_FEATURE_OFFSET_HOB]); + printk("HTF.2=x%02x ", args->hobRegister[IDE_NSECTOR_OFFSET_HOB]); + printk("HTF.3=x%02x ", args->hobRegister[IDE_SECTOR_OFFSET_HOB]); + printk("HTF.4=x%02x ", args->hobRegister[IDE_LCYL_OFFSET_HOB]); + printk("HTF.5=x%02x ", args->hobRegister[IDE_HCYL_OFFSET_HOB]); + printk("HTF.6=x%02x ", args->hobRegister[IDE_SELECT_OFFSET_HOB]); + printk("HTF.7=x%02x\n", args->hobRegister[IDE_CONTROL_OFFSET_HOB]); + } +#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */ + +// if (args->tf_out_flags.all == 0) { + do_taskfile(drive, + (struct hd_drive_task_hdr *)&args->tfRegister, + (struct hd_drive_hob_hdr *)&args->hobRegister, + args->handler); +// } else { +// return flagged_taskfile(drive, args); +// } + + if (((args->command_type == IDE_DRIVE_TASK_RAW_WRITE) || + (args->command_type == IDE_DRIVE_TASK_OUT)) && + args->prehandler && args->handler) + return args->prehandler(drive, rq); + return ide_started; + } + case IDE_DRIVE_TASK: + { + byte *args = rq->buffer; + byte sel; + + if (!(args)) break; #ifdef DEBUG - printk("%s: DRIVE_TASK_CMD data=x%02x cmd=0x%02x fr=0x%02x ns=0x%02x sc=0x%02x lcyl=0x%02x hcyl=0x%02x sel=0x%02x\n", - drive->name, - args[0], args[1], args[2], args[3], - args[4], args[5], args[6], args[7]); + printk("%s: DRIVE_TASK_CMD ", drive->name); + printk("cmd=0x%02x ", args[0]); + printk("fr=0x%02x ", args[1]); + printk("ns=0x%02x ", args[2]); + printk("sc=0x%02x ", args[3]); + printk("lcyl=0x%02x ", args[4]); + printk("hcyl=0x%02x ", args[5]); + printk("sel=0x%02x\n", args[6]); #endif - OUT_BYTE(args[1], IDE_FEATURE_REG); - OUT_BYTE(args[3], IDE_SECTOR_REG); - OUT_BYTE(args[4], IDE_LCYL_REG); - OUT_BYTE(args[5], IDE_HCYL_REG); - sel = (args[6] & ~0x10); - if (drive->select.b.unit) - sel |= 0x10; - OUT_BYTE(sel, IDE_SELECT_REG); - ide_cmd(drive, args[0], args[2], &drive_cmd_intr); - return ide_started; - } else if (args) { + OUT_BYTE(args[1], IDE_FEATURE_REG); + OUT_BYTE(args[3], IDE_SECTOR_REG); + OUT_BYTE(args[4], IDE_LCYL_REG); + OUT_BYTE(args[5], IDE_HCYL_REG); + sel = (args[6] & ~0x10); + if (drive->select.b.unit) + sel |= 0x10; + OUT_BYTE(sel, IDE_SELECT_REG); + ide_cmd(drive, args[0], args[2], &drive_cmd_intr); + return ide_started; + } + case IDE_DRIVE_CMD: + { + byte *args = rq->buffer; + + if (!(args)) break; #ifdef DEBUG - printk("%s: DRIVE_CMD cmd=0x%02x sc=0x%02x fr=0x%02x xx=0x%02x\n", - drive->name, args[0], args[1], args[2], args[3]); + printk("%s: DRIVE_CMD ", drive->name); + printk("cmd=0x%02x ", args[0]); + printk("sc=0x%02x ", args[1]); + printk("fr=0x%02x ", args[2]); + printk("xx=0x%02x\n", args[3]); #endif - if (args[0] == WIN_SMART) { - OUT_BYTE(0x4f, IDE_LCYL_REG); - OUT_BYTE(0xc2, IDE_HCYL_REG); - OUT_BYTE(args[2],IDE_FEATURE_REG); - OUT_BYTE(args[1],IDE_SECTOR_REG); - ide_cmd(drive, args[0], args[3], &drive_cmd_intr); - return ide_started; - } - OUT_BYTE(args[2],IDE_FEATURE_REG); - ide_cmd(drive, args[0], args[1], &drive_cmd_intr); - return ide_started; - } else { - /* - * NULL is actually a valid way of waiting for - * all current requests to be flushed from the queue. - */ + if (args[0] == WIN_SMART) { + OUT_BYTE(0x4f, IDE_LCYL_REG); + OUT_BYTE(0xc2, IDE_HCYL_REG); + OUT_BYTE(args[2],IDE_FEATURE_REG); + OUT_BYTE(args[1],IDE_SECTOR_REG); + ide_cmd(drive, args[0], args[3], &drive_cmd_intr); + return ide_started; + } + OUT_BYTE(args[2],IDE_FEATURE_REG); + ide_cmd(drive, args[0], args[1], &drive_cmd_intr); + return ide_started; + } + default: + break; + } + /* + * NULL is actually a valid way of waiting for + * all current requests to be flushed from the queue. + */ #ifdef DEBUG - printk("%s: DRIVE_CMD (null)\n", drive->name); + printk("%s: DRIVE_CMD (null)\n", drive->name); #endif - ide_end_drive_cmd(drive, GET_STAT(), GET_ERR()); - return ide_stopped; - } + ide_end_drive_cmd(drive, GET_STAT(), GET_ERR()); + return ide_stopped; } /* * start_request() initiates handling of a new I/O request + * needed to reverse the perverted changes anonymously made back + * 2.3.99-pre6 */ -static ide_startstop_t start_request (ide_drive_t *drive) +static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); @@ -1245,8 +1391,13 @@ return startstop; } if (!drive->special.all) { - if (rq->cmd == IDE_DRIVE_CMD || rq->cmd == IDE_DRIVE_TASK) { - return execute_drive_cmd(drive, rq); + switch(rq->cmd) { + case IDE_DRIVE_CMD: + case IDE_DRIVE_TASK: + case IDE_DRIVE_TASKFILE: + return execute_drive_cmd(drive, rq); + default: + break; } if (drive->driver != NULL) { return (DRIVER(drive)->do_request(drive, rq, block)); @@ -1267,13 +1418,15 @@ { ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long flags; + struct request *rq; spin_lock_irqsave(&io_request_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); + rq = hwgroup->rq; spin_unlock_irqrestore(&io_request_lock, flags); - return start_request(drive); + return start_request(drive, rq); } /* @@ -1367,10 +1520,11 @@ * the driver. This makes the driver much more friendlier to shared IRQs * than previous designs, while remaining 100% (?) SMP safe and capable. */ -static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq) +static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) { ide_drive_t *drive; ide_hwif_t *hwif; + struct request *rq; ide_startstop_t startstop; ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */ @@ -1423,7 +1577,8 @@ if ( drive->queue.plugged ) /* paranoia */ printk("%s: Huh? nuking plugged queue\n", drive->name); - hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + + rq = hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1436,7 +1591,7 @@ disable_irq_nosync(hwif->irq); spin_unlock(&io_request_lock); ide__sti(); /* allow other IRQs while we start this request */ - startstop = start_request(drive); + startstop = start_request(drive, rq); spin_lock_irq(&io_request_lock); if (masked_irq && hwif->irq != masked_irq) enable_irq(hwif->irq); @@ -1965,6 +2120,10 @@ (void) request_module("ide-tape"); if (drive->media == ide_floppy) (void) request_module("ide-floppy"); +#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) + if (drive->media == ide_scsi) + (void) request_module("ide-scsi"); +#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */ } #endif /* CONFIG_KMOD */ while (drive->busy) @@ -2293,6 +2452,7 @@ memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports)); hwif->irq = hw->irq; hwif->noprobe = 0; + hwif->chipset = hw->chipset; if (!initializing) { ide_probe_module(); @@ -2588,6 +2748,61 @@ return((int) ((!system_bus_speed) ? ide_system_bus_speed() : system_bus_speed )); } +int ide_reinit_drive (ide_drive_t *drive) +{ + switch (drive->media) { +#ifdef CONFIG_BLK_DEV_IDECD + case ide_cdrom: + { + extern int ide_cdrom_reinit(ide_drive_t *drive); + if (ide_cdrom_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDECD */ +#ifdef CONFIG_BLK_DEV_IDEDISK + case ide_disk: + { + extern int idedisk_reinit(ide_drive_t *drive); + if (idedisk_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDEDISK */ +#ifdef CONFIG_BLK_DEV_IDEFLOPPY + case ide_floppy: + { + extern int idefloppy_reinit(ide_drive_t *drive); + if (idefloppy_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDEFLOPPY */ +#ifdef CONFIG_BLK_DEV_IDETAPE + case ide_tape: + { + extern int idetape_reinit(ide_drive_t *drive); + if (idetape_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDETAPE */ +#ifdef CONFIG_BLK_DEV_IDESCSI +/* + * { + * extern int idescsi_reinit(ide_drive_t *drive); + * if (idescsi_reinit(drive)) + * return 1; + * break; + * } + */ +#endif /* CONFIG_BLK_DEV_IDESCSI */ + default: + return 1; + } + return 0; +} + static int ide_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -2679,16 +2894,47 @@ drive->nice1 << IDE_NICE_1 | drive->nice2 << IDE_NICE_2, (long *) arg); + +#ifdef CONFIG_IDE_TASK_IOCTL + case HDIO_DRIVE_TASKFILE: + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + switch(drive->media) { + case ide_disk: + return ide_taskfile_ioctl(drive, inode, file, cmd, arg); +#ifdef CONFIG_PKT_TASK_IOCTL + case ide_cdrom: + case ide_tape: + case ide_floppy: + return pkt_taskfile_ioctl(drive, inode, file, cmd, arg); +#endif /* CONFIG_PKT_TASK_IOCTL */ + default: + return -ENOMSG; + } +#endif /* CONFIG_IDE_TASK_IOCTL */ + case HDIO_DRIVE_CMD: { byte args[4], *argbuf = args; byte xfer_rate = 0; int argsize = 4; - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; + ide_task_t tfargs; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; if (NULL == (void *) arg) return ide_do_drive_cmd(drive, &rq, ide_wait); if (copy_from_user(args, (void *)arg, 4)) return -EFAULT; + + tfargs.tfRegister[IDE_FEATURE_OFFSET] = args[2]; + tfargs.tfRegister[IDE_NSECTOR_OFFSET] = args[3]; + tfargs.tfRegister[IDE_SECTOR_OFFSET] = args[1]; + tfargs.tfRegister[IDE_LCYL_OFFSET] = 0x00; + tfargs.tfRegister[IDE_HCYL_OFFSET] = 0x00; + tfargs.tfRegister[IDE_SELECT_OFFSET] = 0x00; + tfargs.tfRegister[IDE_COMMAND_OFFSET] = args[0]; + if (args[3]) { argsize = 4 + (SECTOR_WORDS * 4 * args[3]); argbuf = kmalloc(argsize, GFP_KERNEL); @@ -2697,9 +2943,9 @@ memcpy(argbuf, args, 4); } - if (set_transfer(drive, args[0], args[1], args[2])) { + if (set_transfer(drive, &tfargs)) { xfer_rate = args[1]; - if (ide_ata66_check(drive, args[0], args[1], args[2])) + if (ide_ata66_check(drive, &tfargs)) goto abort; } @@ -2759,7 +3005,24 @@ drive->nice1 = (arg >> IDE_NICE_1) & 1; return 0; case HDIO_DRIVE_RESET: + { + unsigned long flags; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + if (!capable(CAP_SYS_ADMIN)) return -EACCES; +#if 1 + spin_lock_irqsave(&io_request_lock, flags); + if (hwgroup->handler != NULL) { + printk("%s: ide_set_handler: handler not null; %p\n", drive->name, hwgroup->handler); + (void) hwgroup->handler(drive); +// hwgroup->handler = NULL; +// hwgroup->expiry = NULL; + hwgroup->timer.expires = jiffies + 0;; + del_timer(&hwgroup->timer); + } + spin_unlock_irqrestore(&io_request_lock, flags); + +#endif (void) ide_do_reset(drive); if (drive->suspend_reset) { /* @@ -2774,7 +3037,7 @@ return ide_revalidate_disk(inode->i_rdev); } return 0; - + } case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -2797,7 +3060,7 @@ if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (HWIF(drive)->busproc) - HWIF(drive)->busproc(HWIF(drive), arg); + HWIF(drive)->busproc(drive, (int)arg); return 0; default: @@ -3458,6 +3721,16 @@ return ide_unregister_subdriver(drive); } +static int default_standby (ide_drive_t *drive) +{ + return 0; +} + +static int default_flushcache (ide_drive_t *drive) +{ + return 0; +} + static ide_startstop_t default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block) { ide_end_request(0, HWGROUP(drive)); @@ -3508,7 +3781,7 @@ return ide_stopped; } -static int default_driver_reinit (ide_drive_t *drive) +static int default_reinit (ide_drive_t *drive) { printk(KERN_ERR "%s: does not support hotswap of device class !\n", drive->name); @@ -3520,6 +3793,8 @@ ide_driver_t *d = drive->driver; if (d->cleanup == NULL) d->cleanup = default_cleanup; + if (d->standby == NULL) d->standby = default_standby; + if (d->flushcache == NULL) d->flushcache = default_flushcache; if (d->do_request == NULL) d->do_request = default_do_request; if (d->end_request == NULL) d->end_request = default_end_request; if (d->ioctl == NULL) d->ioctl = default_ioctl; @@ -3529,7 +3804,7 @@ if (d->pre_reset == NULL) d->pre_reset = default_pre_reset; if (d->capacity == NULL) d->capacity = default_capacity; if (d->special == NULL) d->special = default_special; - if (d->driver_reinit == NULL) d->driver_reinit = default_driver_reinit; + if (d->reinit == NULL) d->reinit = default_reinit; } ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n) @@ -3674,6 +3949,7 @@ EXPORT_SYMBOL(ide_output_data); EXPORT_SYMBOL(atapi_input_bytes); EXPORT_SYMBOL(atapi_output_bytes); +EXPORT_SYMBOL(drive_is_ready); EXPORT_SYMBOL(ide_set_handler); EXPORT_SYMBOL(ide_dump_status); EXPORT_SYMBOL(ide_error); @@ -3696,6 +3972,8 @@ EXPORT_SYMBOL(ide_remove_proc_entries); EXPORT_SYMBOL(proc_ide_read_geometry); EXPORT_SYMBOL(create_proc_ide_interfaces); +EXPORT_SYMBOL(recreate_proc_ide_device); +EXPORT_SYMBOL(destroy_proc_ide_device); #endif EXPORT_SYMBOL(ide_add_setting); EXPORT_SYMBOL(ide_remove_setting); @@ -3710,6 +3988,54 @@ EXPORT_SYMBOL(system_bus_clock); +EXPORT_SYMBOL(ide_reinit_drive); + +static int ide_notify_reboot (struct notifier_block *this, unsigned long event, void *x) +{ + ide_hwif_t *hwif; + ide_drive_t *drive; + int i, unit; + + switch (event) { + case SYS_HALT: + case SYS_POWER_OFF: + case SYS_RESTART: + break; + default: + return NOTIFY_DONE; + } + + printk("flushing ide devices: "); + + for (i = 0; i < MAX_HWIFS; i++) { + hwif = &ide_hwifs[i]; + if (!hwif->present) + continue; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &hwif->drives[unit]; + if (!drive->present) + continue; + + /* set the drive to standby */ + printk("%s ", drive->name); + if (event != SYS_RESTART) + if (drive->driver != NULL && DRIVER(drive)->standby(drive)) + continue; + + if (drive->driver != NULL && DRIVER(drive)->cleanup(drive)) + continue; + } + } + printk("\n"); + return NOTIFY_DONE; +} + +static struct notifier_block ide_notifier = { + ide_notify_reboot, + NULL, + 5 +}; + /* * This is gets invoked once during initialization, to set *everything* up */ @@ -3737,6 +4063,7 @@ ide_geninit(hwif); } + register_reboot_notifier(&ide_notifier); return 0; } @@ -3769,6 +4096,7 @@ { int index; + unregister_reboot_notifier(&ide_notifier); for (index = 0; index < MAX_HWIFS; ++index) { ide_unregister(index); #if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI) diff -urN linux-2.4.17-rc2-virgin/drivers/ide/pdc202xx.c linux-2.4.17-rc2-wli2/drivers/ide/pdc202xx.c --- linux-2.4.17-rc2-virgin/drivers/ide/pdc202xx.c Wed Nov 14 11:44:03 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/pdc202xx.c Thu Dec 20 19:49:13 2001 @@ -108,7 +108,7 @@ u32 bibma = pci_resource_start(dev, 4); u32 reg60h = 0, reg64h = 0, reg68h = 0, reg6ch = 0; u16 reg50h = 0, pmask = (1<<10), smask = (1<<11); - u8 hi = 0, lo = 0, invalid_data_set = 0; + u8 hi = 0, lo = 0; /* * at that point bibma+0x2 et bibma+0xa are byte registers @@ -132,11 +132,6 @@ pci_read_config_dword(dev, 0x6c, ®6ch); switch(dev->device) { - case PCI_DEVICE_ID_PROMISE_20268: - case PCI_DEVICE_ID_PROMISE_20268R: - p += sprintf(p, "\n PDC20268 TX2 Chipset.\n"); - invalid_data_set = 1; - break; case PCI_DEVICE_ID_PROMISE_20267: p += sprintf(p, "\n PDC20267 Chipset.\n"); break; @@ -180,45 +175,77 @@ p += sprintf(p, " Mode %s Mode %s\n", (sc1a & 0x01) ? "MASTER" : "PCI ", (sc1b & 0x01) ? "MASTER" : "PCI "); - if (!(invalid_data_set)) - p += sprintf(p, " %s %s\n", - (sc1d & 0x08) ? "Error " : - ((sc1d & 0x05) == 0x05) ? "Not My INTR " : - (sc1d & 0x04) ? "Interrupting" : - (sc1d & 0x02) ? "FIFO Full " : - (sc1d & 0x01) ? "FIFO Empty " : "????????????", - (sc1d & 0x80) ? "Error " : - ((sc1d & 0x50) == 0x50) ? "Not My INTR " : - (sc1d & 0x40) ? "Interrupting" : - (sc1d & 0x20) ? "FIFO Full " : - (sc1d & 0x10) ? "FIFO Empty " : "????????????"); + p += sprintf(p, " %s %s\n", + (sc1d & 0x08) ? "Error " : + ((sc1d & 0x05) == 0x05) ? "Not My INTR " : + (sc1d & 0x04) ? "Interrupting" : + (sc1d & 0x02) ? "FIFO Full " : + (sc1d & 0x01) ? "FIFO Empty " : "????????????", + (sc1d & 0x80) ? "Error " : + ((sc1d & 0x50) == 0x50) ? "Not My INTR " : + (sc1d & 0x40) ? "Interrupting" : + (sc1d & 0x20) ? "FIFO Full " : + (sc1d & 0x10) ? "FIFO Empty " : "????????????"); p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n"); p += sprintf(p, "DMA enabled: %s %s %s %s\n", (c0&0x20)?"yes":"no ",(c0&0x40)?"yes":"no ",(c1&0x20)?"yes":"no ",(c1&0x40)?"yes":"no "); - if (!(invalid_data_set)) - p += sprintf(p, "DMA Mode: %s %s %s %s\n", - pdc202xx_ultra_verbose(reg60h, (reg50h & pmask)), - pdc202xx_ultra_verbose(reg64h, (reg50h & pmask)), - pdc202xx_ultra_verbose(reg68h, (reg50h & smask)), - pdc202xx_ultra_verbose(reg6ch, (reg50h & smask))); - if (!(invalid_data_set)) - p += sprintf(p, "PIO Mode: %s %s %s %s\n", - pdc202xx_pio_verbose(reg60h), - pdc202xx_pio_verbose(reg64h), - pdc202xx_pio_verbose(reg68h), - pdc202xx_pio_verbose(reg6ch)); + p += sprintf(p, "DMA Mode: %s %s %s %s\n", + pdc202xx_ultra_verbose(reg60h, (reg50h & pmask)), + pdc202xx_ultra_verbose(reg64h, (reg50h & pmask)), + pdc202xx_ultra_verbose(reg68h, (reg50h & smask)), + pdc202xx_ultra_verbose(reg6ch, (reg50h & smask))); + p += sprintf(p, "PIO Mode: %s %s %s %s\n", + pdc202xx_pio_verbose(reg60h), + pdc202xx_pio_verbose(reg64h), + pdc202xx_pio_verbose(reg68h), + pdc202xx_pio_verbose(reg6ch)); #if 0 p += sprintf(p, "--------------- Can ATAPI DMA ---------------\n"); #endif - if (invalid_data_set) - p += sprintf(p, "--------------- Cannot Decode HOST ---------------\n"); + return (char *)p; +} + +static char * pdc202xx_info_new (char *buf, struct pci_dev *dev) +{ + char *p = buf; +// u32 bibma = pci_resource_start(dev, 4); + +// u32 reg60h = 0, reg64h = 0, reg68h = 0, reg6ch = 0; +// u16 reg50h = 0, word88 = 0; +// int udmasel[4]={0,0,0,0}, piosel[4]={0,0,0,0}, i=0, hd=0; + + switch(dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + p += sprintf(p, "\n PDC20275 Chipset.\n"); + break; + case PCI_DEVICE_ID_PROMISE_20269: + p += sprintf(p, "\n PDC20269 TX2 Chipset.\n"); + break; + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20268R: + p += sprintf(p, "\n PDC20268 TX2 Chipset.\n"); + break; +default: + p += sprintf(p, "\n PDC202XX Chipset.\n"); + break; + } return (char *)p; } static int pdc202xx_get_info (char *buffer, char **addr, off_t offset, int count) { char *p = buffer; - p = pdc202xx_info(buffer, bmide_dev); + switch(bmide_dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20268R: + p = pdc202xx_info_new(buffer, bmide_dev); + break; + default: + p = pdc202xx_info(buffer, bmide_dev); + break; + } return p-buffer; /* => must be less than 4k! */ } #endif /* defined(DISPLAY_PDC202XX_TIMINGS) && defined(CONFIG_PROC_FS) */ @@ -387,9 +414,6 @@ if ((drive->media != ide_disk) && (speed < XFER_SW_DMA_0)) return -1; - if (dev->device == PCI_DEVICE_ID_PROMISE_20268) - goto skip_register_hell; - pci_read_config_dword(dev, drive_pci, &drive_conf); pci_read_config_byte(dev, (drive_pci), &AP); pci_read_config_byte(dev, (drive_pci)|0x01, &BP); @@ -432,6 +456,7 @@ switch(speed) { #ifdef CONFIG_BLK_DEV_IDEDMA + /* case XFER_UDMA_6: */ case XFER_UDMA_5: case XFER_UDMA_4: TB = 0x20; TC = 0x01; break; /* speed 8 == UDMA mode 4 */ case XFER_UDMA_3: TB = 0x40; TC = 0x02; break; /* speed 7 == UDMA mode 3 */ @@ -477,8 +502,6 @@ decode_registers(REG_D, DP); #endif /* PDC202XX_DECODE_REGISTER_INFO */ -skip_register_hell: - if (!drive->init_speed) drive->init_speed = speed; err = ide_config_drive_speed(drive, speed); @@ -494,6 +517,159 @@ return err; } +static int pdc202xx_new_tune_chipset (ide_drive_t *drive, byte speed) +{ + ide_hwif_t *hwif = HWIF(drive); +#ifdef CONFIG_BLK_DEV_IDEDMA + unsigned long indexreg = (hwif->dma_base + 1); + unsigned long datareg = (hwif->dma_base + 3); +#else + struct pci_dev *dev = hwif->pci_dev; + unsigned long high_16 = pci_resource_start(dev, 4); + unsigned long indexreg = high_16 + (hwif->channel ? 0x09 : 0x01); + unsigned long datareg = (indexreg + 2); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + byte thold = 0x10; + byte adj = (drive->dn%2) ? 0x08 : 0x00; + + int err; + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (speed == XFER_UDMA_2) { + OUT_BYTE((thold + adj), indexreg); + OUT_BYTE((IN_BYTE(datareg) & 0x7f), datareg); + } + switch (speed) { + case XFER_UDMA_7: + speed = XFER_UDMA_6; + case XFER_UDMA_6: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x1a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x01, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xcb, datareg); + break; + case XFER_UDMA_5: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x1a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x02, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xcb, datareg); + break; + case XFER_UDMA_4: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x1a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x03, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xcd, datareg); + break; + case XFER_UDMA_3: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x1a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x05, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xcd, datareg); + break; + case XFER_UDMA_2: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x2a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x07, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xcd, datareg); + break; + case XFER_UDMA_1: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x3a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x0a, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xd0, datareg); + break; + case XFER_UDMA_0: + OUT_BYTE((0x10 + adj), indexreg); + OUT_BYTE(0x4a, datareg); + OUT_BYTE((0x11 + adj), indexreg); + OUT_BYTE(0x0f, datareg); + OUT_BYTE((0x12 + adj), indexreg); + OUT_BYTE(0xd5, datareg); + break; + case XFER_MW_DMA_2: + OUT_BYTE((0x0e + adj), indexreg); + OUT_BYTE(0x69, datareg); + OUT_BYTE((0x0f + adj), indexreg); + OUT_BYTE(0x25, datareg); + break; + case XFER_MW_DMA_1: + OUT_BYTE((0x0e + adj), indexreg); + OUT_BYTE(0x6b, datareg); + OUT_BYTE((0x0f+ adj), indexreg); + OUT_BYTE(0x27, datareg); + break; + case XFER_MW_DMA_0: + OUT_BYTE((0x0e + adj), indexreg); + OUT_BYTE(0xdf, datareg); + OUT_BYTE((0x0f + adj), indexreg); + OUT_BYTE(0x5f, datareg); + break; +#else + switch (speed) { +#endif /* CONFIG_BLK_DEV_IDEDMA */ + case XFER_PIO_4: + OUT_BYTE((0x0c + adj), indexreg); + OUT_BYTE(0x23, datareg); + OUT_BYTE((0x0d + adj), indexreg); + OUT_BYTE(0x09, datareg); + OUT_BYTE((0x13 + adj), indexreg); + OUT_BYTE(0x25, datareg); + break; + case XFER_PIO_3: + OUT_BYTE((0x0c + adj), indexreg); + OUT_BYTE(0x27, datareg); + OUT_BYTE((0x0d + adj), indexreg); + OUT_BYTE(0x0d, datareg); + OUT_BYTE((0x13 + adj), indexreg); + OUT_BYTE(0x35, datareg); + break; + case XFER_PIO_2: + OUT_BYTE((0x0c + adj), indexreg); + OUT_BYTE(0x23, datareg); + OUT_BYTE((0x0d + adj), indexreg); + OUT_BYTE(0x26, datareg); + OUT_BYTE((0x13 + adj), indexreg); + OUT_BYTE(0x64, datareg); + break; + case XFER_PIO_1: + OUT_BYTE((0x0c + adj), indexreg); + OUT_BYTE(0x46, datareg); + OUT_BYTE((0x0d + adj), indexreg); + OUT_BYTE(0x29, datareg); + OUT_BYTE((0x13 + adj), indexreg); + OUT_BYTE(0xa4, datareg); + break; + case XFER_PIO_0: + OUT_BYTE((0x0c + adj), indexreg); + OUT_BYTE(0xfb, datareg); + OUT_BYTE((0x0d + adj), indexreg); + OUT_BYTE(0x2b, datareg); + OUT_BYTE((0x13 + adj), indexreg); + OUT_BYTE(0xac, datareg); + break; + default: + } + + if (!drive->init_speed) + drive->init_speed = speed; + err = ide_config_drive_speed(drive, speed); + drive->current_speed = speed; + + return err; +} + /* 0 1 2 3 4 5 6 7 8 * 960, 480, 390, 300, 240, 180, 120, 90, 60 * 180, 150, 120, 90, 60 @@ -524,18 +700,75 @@ struct pci_dev *dev = hwif->pci_dev; unsigned long high_16 = pci_resource_start(dev, 4); unsigned long dma_base = hwif->dma_base; + unsigned long indexreg = dma_base + 1; + unsigned long datareg = dma_base + 3; + byte iordy = 0x13; + byte adj = (drive->dn%2) ? 0x08 : 0x00; + byte cable = 0; + byte jumpbit = 0; byte unit = (drive->select.b.unit & 0x01); - unsigned int drive_conf; - byte drive_pci; + byte drive_pci = 0; byte test1, test2, speed = -1; byte AP; unsigned short EP; - byte CLKSPD = IN_BYTE(high_16 + 0x11); - byte udma_33 = ultra ? (inb(high_16 + 0x001f) & 1) : 0; + byte CLKSPD = 0; + byte udma_33 = ultra; +// byte udma_33 = ultra ? (IN_BYTE(high_16 + 0x001f) & 1) : 0; byte udma_66 = ((eighty_ninty_three(drive)) && udma_33) ? 1 : 0; - byte udma_100 = (((dev->device == PCI_DEVICE_ID_PROMISE_20265) || (dev->device == PCI_DEVICE_ID_PROMISE_20267) || (dev->device == PCI_DEVICE_ID_PROMISE_20268)) && udma_66) ? 1 : 0; + byte udma_100 = 0; + byte udma_133 = 0; + byte mask = hwif->channel ? 0x08 : 0x02; + unsigned short c_mask = hwif->channel ? (1<<11) : (1<<10); + + byte ultra_66 = ((id->dma_ultra & 0x0010) || + (id->dma_ultra & 0x0008)) ? 1 : 0; + byte ultra_100 = ((id->dma_ultra & 0x0020) || + (ultra_66)) ? 1 : 0; + byte ultra_133 = ((id->dma_ultra & 0x0040) || + (ultra_100)) ? 1 : 0; + + switch(dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20269: + udma_133 = (udma_66) ? 1 : 0; + udma_100 = (udma_66) ? 1 : 0; + OUT_BYTE(0x0b, (hwif->dma_base + 1)); + cable = ((IN_BYTE((hwif->dma_base + 3)) & 0x04)); + jumpbit = 1; + break; + case PCI_DEVICE_ID_PROMISE_20268R: + udma_100 = 1; + udma_66 = 1; + OUT_BYTE(0x0b, (hwif->dma_base + 1)); + cable = ((IN_BYTE((hwif->dma_base + 3)) & 0x04)); + jumpbit = 1; + break; + case PCI_DEVICE_ID_PROMISE_20268: + udma_100 = (udma_66) ? 1 : 0; + OUT_BYTE(0x0b, (hwif->dma_base + 1)); + cable = ((IN_BYTE((hwif->dma_base + 3)) & 0x04)); + jumpbit = 1; + break; + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20265: + udma_100 = (udma_66) ? 1 : 0; + pci_read_config_word(dev, 0x50, &EP); + cable = (EP & c_mask); + jumpbit = 0; + break; + case PCI_DEVICE_ID_PROMISE_20262: + pci_read_config_word(dev, 0x50, &EP); + cable = (EP & c_mask); + jumpbit = 0; + break; + default: + udma_100 = 0; udma_133 = 0; cable = 1; jumpbit = 0; + break; + } + if (!jumpbit) + CLKSPD = IN_BYTE(high_16 + 0x11); /* * Set the control register to use the 66Mhz system * clock for UDMA 3/4 mode operation. If one drive on @@ -549,47 +782,52 @@ * parameters. */ - byte mask = hwif->channel ? 0x08 : 0x02; - unsigned short c_mask = hwif->channel ? (1<<11) : (1<<10); - byte ultra_66 = ((id->dma_ultra & 0x0010) || - (id->dma_ultra & 0x0008)) ? 1 : 0; - byte ultra_100 = ((id->dma_ultra & 0x0020) || - (id->dma_ultra & 0x0010) || - (id->dma_ultra & 0x0008)) ? 1 : 0; - - if (dev->device == PCI_DEVICE_ID_PROMISE_20268) - goto jump_pci_mucking; - - pci_read_config_word(dev, 0x50, &EP); - - if (((ultra_66) || (ultra_100)) && (EP & c_mask)) { + if (((ultra_66) || (ultra_100) || (ultra_133)) && (cable)) { #ifdef DEBUG printk("ULTRA66: %s channel of Ultra 66 requires an 80-pin cable for Ultra66 operation.\n", hwif->channel ? "Secondary" : "Primary"); printk(" Switching to Ultra33 mode.\n"); #endif /* DEBUG */ /* Primary : zero out second bit */ /* Secondary : zero out fourth bit */ - OUT_BYTE(CLKSPD & ~mask, (high_16 + 0x11)); + if (!jumpbit) + OUT_BYTE(CLKSPD & ~mask, (high_16 + 0x11)); + printk("Warning: %s channel requires an 80-pin cable for operation.\n", hwif->channel ? "Secondary":"Primary"); + printk("%s reduced to Ultra33 mode.\n", drive->name); + udma_66 = 0; udma_100 = 0; udma_133 = 0; } else { - if ((ultra_66) || (ultra_100)) { + if ((ultra_66) || (ultra_100) || (ultra_133)) { /* * check to make sure drive on same channel * is u66 capable */ if (hwif->drives[!(drive->dn%2)].id) { - if ((hwif->drives[!(drive->dn%2)].id->dma_ultra & 0x0020) || + if ((hwif->drives[!(drive->dn%2)].id->dma_ultra & 0x0040) || + (hwif->drives[!(drive->dn%2)].id->dma_ultra +& 0x0020) || (hwif->drives[!(drive->dn%2)].id->dma_ultra & 0x0010) || (hwif->drives[!(drive->dn%2)].id->dma_ultra & 0x0008)) { - OUT_BYTE(CLKSPD | mask, (high_16 + 0x11)); + if (!jumpbit) + OUT_BYTE(CLKSPD | mask, (high_16 + 0x11)); } else { - OUT_BYTE(CLKSPD & ~mask, (high_16 + 0x11)); + if (!jumpbit) + OUT_BYTE(CLKSPD & ~mask, (high_16 + 0x11)); } } else { /* udma4 drive by itself */ - OUT_BYTE(CLKSPD | mask, (high_16 + 0x11)); + if (!jumpbit) + OUT_BYTE(CLKSPD | mask, (high_16 + 0x11)); } } } + if (jumpbit) { + if (drive->media != ide_disk) return ide_dma_off_quietly; + if (id->capability & 4) { /* IORDY_EN & PREFETCH_EN */ + OUT_BYTE((iordy + adj), indexreg); + OUT_BYTE((IN_BYTE(datareg)|0x03), datareg); + } + goto jumpbit_is_set; + } + switch(drive->dn) { case 0: drive_pci = 0x60; pci_read_config_dword(dev, drive_pci, &drive_conf); @@ -640,31 +878,33 @@ if (drive->media == ide_disk) /* PREFETCH_EN */ pci_write_config_byte(dev, (drive_pci), AP|PREFETCH_EN); -jump_pci_mucking: +jumpbit_is_set: - if ((id->dma_ultra & 0x0020) && (udma_100)) speed = XFER_UDMA_5; - else if ((id->dma_ultra & 0x0010) && (udma_66)) speed = XFER_UDMA_4; - else if ((id->dma_ultra & 0x0008) && (udma_66)) speed = XFER_UDMA_3; - else if ((id->dma_ultra & 0x0004) && (udma_33)) speed = XFER_UDMA_2; - else if ((id->dma_ultra & 0x0002) && (udma_33)) speed = XFER_UDMA_1; - else if ((id->dma_ultra & 0x0001) && (udma_33)) speed = XFER_UDMA_0; + if ((id->dma_ultra & 0x0040)&&(udma_133)) speed = XFER_UDMA_6; + else if ((id->dma_ultra & 0x0020)&&(udma_100)) speed = XFER_UDMA_5; + else if ((id->dma_ultra & 0x0010)&&(udma_66)) speed = XFER_UDMA_4; + else if ((id->dma_ultra & 0x0008)&&(udma_66)) speed = XFER_UDMA_3; + else if ((id->dma_ultra & 0x0004)&&(udma_33)) speed = XFER_UDMA_2; + else if ((id->dma_ultra & 0x0002)&&(udma_33)) speed = XFER_UDMA_1; + else if ((id->dma_ultra & 0x0001)&&(udma_33)) speed = XFER_UDMA_0; else if (id->dma_mword & 0x0004) speed = XFER_MW_DMA_2; else if (id->dma_mword & 0x0002) speed = XFER_MW_DMA_1; else if (id->dma_mword & 0x0001) speed = XFER_MW_DMA_0; - else if (id->dma_1word & 0x0004) speed = XFER_SW_DMA_2; - else if (id->dma_1word & 0x0002) speed = XFER_SW_DMA_1; - else if (id->dma_1word & 0x0001) speed = XFER_SW_DMA_0; + else if ((id->dma_1word & 0x0004)&&(!jumpbit)) speed = XFER_SW_DMA_2; + else if ((id->dma_1word & 0x0002)&&(!jumpbit)) speed = XFER_SW_DMA_1; + else if ((id->dma_1word & 0x0001)&&(!jumpbit)) speed = XFER_SW_DMA_0; else { /* restore original pci-config space */ - if (dev->device != PCI_DEVICE_ID_PROMISE_20268) + if (!jumpbit) pci_write_config_dword(dev, drive_pci, drive_conf); return ide_dma_off_quietly; } outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); - (void) pdc202xx_tune_chipset(drive, speed); + (void) hwif->speedproc(drive, speed); - return ((int) ((id->dma_ultra >> 11) & 7) ? ide_dma_on : + return ((int) ((id->dma_ultra >> 14) & 3) ? ide_dma_on : + ((id->dma_ultra >> 11) & 7) ? ide_dma_on : ((id->dma_ultra >> 8) & 7) ? ide_dma_on : ((id->dma_mword >> 8) & 7) ? ide_dma_on : ((id->dma_1word >> 8) & 7) ? ide_dma_on : @@ -685,7 +925,7 @@ } dma_func = ide_dma_off_quietly; if (id->field_valid & 4) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x007F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive, 1); if ((id->field_valid & 2) && @@ -732,16 +972,65 @@ */ int pdc202xx_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { - byte dma_stat = 0, sc1d = 0; - unsigned long high_16 = pci_resource_start(HWIF(drive)->pci_dev, 4); - unsigned long dma_base = HWIF(drive)->dma_base; + byte dma_stat = 0; + byte sc1d = 0; + byte newchip = 0; + byte clock = 0; + byte hardware48hack = 0; + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + unsigned long high_16 = pci_resource_start(dev, 4); + unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x00); + unsigned long dma_base = hwif->dma_base; + + switch (dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20268R: + case PCI_DEVICE_ID_PROMISE_20268: + newchip = 1; + break; + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20265: + case PCI_DEVICE_ID_PROMISE_20262: + hardware48hack = 1; + clock = IN_BYTE(high_16 + 0x11); + default: + break; + } switch (func) { case ide_dma_check: return config_drive_xfer_rate(drive); + case ide_dma_begin: + /* Note that this is done *after* the cmd has + * been issued to the drive, as per the BM-IDE spec. + * The Promise Ultra33 doesn't work correctly when + * we do this part before issuing the drive cmd. + */ + if ((drive->addressing) && (hardware48hack)) { + struct request *rq = HWGROUP(drive)->rq; + unsigned long word_count = 0; + + outb(clock|(hwif->channel ? 0x08 : 0x02), high_16 + 0x11); + word_count = (rq->nr_sectors << 8); + word_count = (rq->cmd == READ) ? word_count | 0x05000000 : word_count | 0x06000000; + outl(word_count, atapi_reg); + } + break; + case ide_dma_end: + if ((drive->addressing) && (hardware48hack)) { + outl(0, atapi_reg); /* zero out extra */ + clock = IN_BYTE(high_16 + 0x11); + OUT_BYTE(clock & ~(hwif->channel ? 0x08:0x02), high_16 + 0x11); + } + break; case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */ - dma_stat = inb(dma_base+2); - sc1d = inb(high_16 + 0x001d); + dma_stat = IN_BYTE(dma_base+2); + if (newchip) + return (dma_stat & 4) == 4; + + sc1d = IN_BYTE(high_16 + 0x001d); if (HWIF(drive)->channel) { if ((sc1d & 0x50) == 0x50) goto somebody_else; else if ((sc1d & 0x40) == 0x40) @@ -764,61 +1053,113 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA */ +void pdc202xx_new_reset (ide_drive_t *drive) +{ + OUT_BYTE(0x04,IDE_CONTROL_REG); + mdelay(1000); + OUT_BYTE(0x00,IDE_CONTROL_REG); + mdelay(1000); + printk("PDC202XX: %s channel reset.\n", + HWIF(drive)->channel ? "Secondary" : "Primary"); +} + void pdc202xx_reset (ide_drive_t *drive) { unsigned long high_16 = pci_resource_start(HWIF(drive)->pci_dev, 4); - byte udma_speed_flag = inb(high_16 + 0x001f); + byte udma_speed_flag = IN_BYTE(high_16 + 0x001f); OUT_BYTE(udma_speed_flag | 0x10, high_16 + 0x001f); mdelay(100); OUT_BYTE(udma_speed_flag & ~0x10, high_16 + 0x001f); mdelay(2000); /* 2 seconds ?! */ + printk("PDC202XX: %s channel reset.\n", + HWIF(drive)->channel ? "Secondary" : "Primary"); } -unsigned int __init pci_init_pdc202xx (struct pci_dev *dev, const char *name) +/* + * Since SUN Cobalt is attempting to do this operation, I should disclose + * this has been a long time ago Thu Jul 27 16:40:57 2000 was the patch date + * HOTSWAP ATA Infrastructure. + */ +static int pdc202xx_tristate (ide_drive_t * drive, int state) { - unsigned long high_16 = pci_resource_start(dev, 4); - byte udma_speed_flag = inb(high_16 + 0x001f); - byte primary_mode = inb(high_16 + 0x001a); - byte secondary_mode = inb(high_16 + 0x001b); - - if ((dev->device == PCI_DEVICE_ID_PROMISE_20262) || - (dev->device == PCI_DEVICE_ID_PROMISE_20265) || - (dev->device == PCI_DEVICE_ID_PROMISE_20267)) { - /* - * software reset - this is required because the bios - * will set UDMA timing on if the hdd supports it. The - * user may want to turn udma off. A bug in the pdc20262 - * is that it cannot handle a downgrade in timing from UDMA - * to DMA. Disk accesses after issuing a set feature command - * will result in errors. A software reset leaves the timing - * registers intact, but resets the drives. - */ - - OUT_BYTE(udma_speed_flag | 0x10, high_16 + 0x001f); - mdelay(100); - OUT_BYTE(udma_speed_flag & ~0x10, high_16 + 0x001f); - mdelay(2000); /* 2 seconds ?! */ +#if 0 + ide_hwif_t *hwif = HWIF(drive); + unsigned long high_16 = pci_resource_start(hwif->pci_dev, 4); + byte sc1f = inb(high_16 + 0x001f); + + if (!hwif) + return -EINVAL; + +// hwif->bus_state = state; + + if (state) { + outb(sc1f | 0x08, high_16 + 0x001f); + } else { + outb(sc1f & ~0x08, high_16 + 0x001f); } +#endif + return 0; +} + +unsigned int __init pci_init_pdc202xx (struct pci_dev *dev, const char *name) +{ + unsigned long high_16 = pci_resource_start(dev, 4); + byte udma_speed_flag = IN_BYTE(high_16 + 0x001f); + byte primary_mode = IN_BYTE(high_16 + 0x001a); + byte secondary_mode = IN_BYTE(high_16 + 0x001b); + byte newchip = 0; if (dev->resource[PCI_ROM_RESOURCE].start) { pci_write_config_dword(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE); printk("%s: ROM enabled at 0x%08lx\n", name, dev->resource[PCI_ROM_RESOURCE].start); } - if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) { - byte irq = 0, irq2 = 0; - pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); - pci_read_config_byte(dev, (PCI_INTERRUPT_LINE)|0x80, &irq2); /* 0xbc */ - if ((irq != irq2) && - (dev->device != PCI_DEVICE_ID_PROMISE_20265) && - (dev->device != PCI_DEVICE_ID_PROMISE_20267) && - (dev->device != PCI_DEVICE_ID_PROMISE_20268)) { - pci_write_config_byte(dev, (PCI_INTERRUPT_LINE)|0x80, irq); /* 0xbc */ - printk("%s: pci-config space interrupt mirror fixed.\n", name); - } + switch (dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20268R: + case PCI_DEVICE_ID_PROMISE_20268: + newchip = 1; + break; + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20265: + OUT_BYTE(udma_speed_flag | 0x10, high_16 + 0x001f); + mdelay(100); + OUT_BYTE(udma_speed_flag & ~0x10, high_16 + 0x001f); + mdelay(2000); /* 2 seconds ?! */ + break; + case PCI_DEVICE_ID_PROMISE_20262: + /* + * software reset - this is required because the bios + * will set UDMA timing on if the hdd supports it. The + * user may want to turn udma off. A bug in the pdc20262 + * is that it cannot handle a downgrade in timing from + * UDMA to DMA. Disk accesses after issuing a set + * feature command will result in errors. A software + * reset leaves the timing registers intact, + * but resets the drives. + */ + OUT_BYTE(udma_speed_flag | 0x10, high_16 + 0x001f); + mdelay(100); + OUT_BYTE(udma_speed_flag & ~0x10, high_16 + 0x001f); + mdelay(2000); /* 2 seconds ?! */ + default: + if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) { + byte irq = 0, irq2 = 0; + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); + pci_read_config_byte(dev, (PCI_INTERRUPT_LINE)|0x80, &irq2); /* 0xbc */ + if (irq != irq2) { + pci_write_config_byte(dev, (PCI_INTERRUPT_LINE)|0x80, irq); /* 0xbc */ + printk("%s: pci-config space interrupt mirror fixed.\n", name); + } + } + break; } + if (newchip) + goto fttk_tx_series; + printk("%s: (U)DMA Burst Bit %sABLED " \ "Primary %s Mode " \ "Secondary %s Mode.\n", @@ -830,8 +1171,8 @@ #ifdef CONFIG_PDC202XX_BURST if (!(udma_speed_flag & 1)) { printk("%s: FORCING BURST BIT 0x%02x -> 0x%02x ", name, udma_speed_flag, (udma_speed_flag|1)); - outb(udma_speed_flag|1, high_16 + 0x001f); - printk("%sCTIVE\n", (inb(high_16 + 0x001f) & 1) ? "A" : "INA"); + OUT_BYTE(udma_speed_flag|1, high_16 + 0x001f); + printk("%sCTIVE\n", (IN_BYTE(high_16 + 0x001f) & 1) ? "A" : "INA"); } #endif /* CONFIG_PDC202XX_BURST */ @@ -839,18 +1180,20 @@ if (!(primary_mode & 1)) { printk("%s: FORCING PRIMARY MODE BIT 0x%02x -> 0x%02x ", name, primary_mode, (primary_mode|1)); - outb(primary_mode|1, high_16 + 0x001a); - printk("%s\n", (inb(high_16 + 0x001a) & 1) ? "MASTER" : "PCI"); + OUT_BYTE(primary_mode|1, high_16 + 0x001a); + printk("%s\n", (IN_BYTE(high_16 + 0x001a) & 1) ? "MASTER" : "PCI"); } if (!(secondary_mode & 1)) { printk("%s: FORCING SECONDARY MODE BIT 0x%02x -> 0x%02x ", name, secondary_mode, (secondary_mode|1)); - outb(secondary_mode|1, high_16 + 0x001b); - printk("%s\n", (inb(high_16 + 0x001b) & 1) ? "MASTER" : "PCI"); + OUT_BYTE(secondary_mode|1, high_16 + 0x001b); + printk("%s\n", (IN_BYTE(high_16 + 0x001b) & 1) ? "MASTER" : "PCI"); } #endif /* CONFIG_PDC202XX_MASTER */ +fttk_tx_series: + #if defined(DISPLAY_PDC202XX_TIMINGS) && defined(CONFIG_PROC_FS) if (!pdc202xx_proc) { pdc202xx_proc = 1; @@ -866,20 +1209,41 @@ unsigned short mask = (hwif->channel) ? (1<<11) : (1<<10); unsigned short CIS; - pci_read_config_word(hwif->pci_dev, 0x50, &CIS); - return ((CIS & mask) ? 0 : 1); + switch(hwif->pci_dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20268R: + OUT_BYTE(0x0b, (hwif->dma_base + 1)); + return (!(IN_BYTE((hwif->dma_base + 3)) & 0x04)); + default: + pci_read_config_word(hwif->pci_dev, 0x50, &CIS); + return (!(CIS & mask)); + } } void __init ide_init_pdc202xx (ide_hwif_t *hwif) { - hwif->tuneproc = &pdc202xx_tune_drive; - hwif->speedproc = &pdc202xx_tune_chipset; - hwif->quirkproc = &pdc202xx_quirkproc; - - if ((hwif->pci_dev->device == PCI_DEVICE_ID_PROMISE_20262) || - (hwif->pci_dev->device == PCI_DEVICE_ID_PROMISE_20265) || - (hwif->pci_dev->device == PCI_DEVICE_ID_PROMISE_20267)) { - hwif->resetproc = &pdc202xx_reset; + hwif->tuneproc = &pdc202xx_tune_drive; + hwif->quirkproc = &pdc202xx_quirkproc; + + switch(hwif->pci_dev->device) { + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20268R: + hwif->speedproc = &pdc202xx_new_tune_chipset; + hwif->resetproc = &pdc202xx_new_reset; + break; + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20265: + case PCI_DEVICE_ID_PROMISE_20262: + hwif->busproc = &pdc202xx_tristate; + hwif->resetproc = &pdc202xx_reset; + case PCI_DEVICE_ID_PROMISE_20246: + hwif->speedproc = &pdc202xx_tune_chipset; + default: + break; } #undef CONFIG_PDC202XX_32_UNMASK diff -urN linux-2.4.17-rc2-virgin/drivers/ide/pdc4030.c linux-2.4.17-rc2-wli2/drivers/ide/pdc4030.c --- linux-2.4.17-rc2-virgin/drivers/ide/pdc4030.c Tue Jul 17 18:53:55 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/pdc4030.c Thu Dec 20 19:49:13 2001 @@ -89,6 +89,12 @@ #include "pdc4030.h" +#ifdef CONFIG_IDE_TASKFILE_IO +# define __TASKFILE__IO +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + /* * promise_selectproc() is invoked by ide.c * in preparation for access to the specified drive. @@ -298,8 +304,6 @@ } } - - /* * promise_read_intr() is the handler for disk read/multread interrupts */ @@ -495,11 +499,15 @@ */ ide_startstop_t do_pdc4030_io (ide_drive_t *drive, struct request *rq) { + ide_startstop_t startstop; unsigned long timeout; byte stat; - if (rq->cmd == READ) { - OUT_BYTE(PROMISE_READ, IDE_COMMAND_REG); + switch(rq->cmd) { + case READ: +#ifndef __TASKFILE__IO + OUT_BYTE(PROMISE_READ, IDE_COMMAND_REG); +#endif /* * The card's behaviour is odd at this point. If the data is * available, DRQ will be true, and no interrupt will be @@ -510,44 +518,61 @@ * If neither of these is the case, we wait for up to 50ms (badly I'm * afraid!) until one of them is. */ - timeout = jiffies + HZ/20; /* 50ms wait */ - do { - stat=GET_STAT(); - if (stat & DRQ_STAT) { - udelay(1); - return promise_read_intr(drive); - } - if (IN_BYTE(IDE_SELECT_REG) & 0x01) { + timeout = jiffies + HZ/20; /* 50ms wait */ + do { + stat=GET_STAT(); + if (stat & DRQ_STAT) { + udelay(1); + return promise_read_intr(drive); + } + if (IN_BYTE(IDE_SELECT_REG) & 0x01) { #ifdef DEBUG_READ - printk(KERN_DEBUG "%s: read: waiting for " - "interrupt\n", drive->name); + printk(KERN_DEBUG "%s: read: waiting for interrupt\n", drive->name); #endif - ide_set_handler(drive, &promise_read_intr, WAIT_CMD, NULL); - return ide_started; - } - udelay(1); - } while (time_before(jiffies, timeout)); - - printk(KERN_ERR "%s: reading: No DRQ and not waiting - Odd!\n", - drive->name); - return ide_stopped; - } else if (rq->cmd == WRITE) { - ide_startstop_t startstop; - OUT_BYTE(PROMISE_WRITE, IDE_COMMAND_REG); - if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { - printk(KERN_ERR "%s: no DRQ after issuing " - "PROMISE_WRITE\n", drive->name); - return startstop; - } - if (!drive->unmask) - __cli(); /* local CPU only */ - HWGROUP(drive)->wrq = *rq; /* scratchpad */ - return promise_write(drive); + ide_set_handler(drive, &promise_read_intr, WAIT_CMD, NULL); + return ide_started; + } + udelay(1); + } while (time_before(jiffies, timeout)); - } else { - printk("KERN_WARNING %s: bad command: %d\n", - drive->name, rq->cmd); - ide_end_request(0, HWGROUP(drive)); - return ide_stopped; + printk(KERN_ERR "%s: reading: No DRQ and not waiting - Odd!\n", drive->name); + return ide_stopped; + case WRITE: +#ifndef __TASKFILE__IO + OUT_BYTE(PROMISE_WRITE, IDE_COMMAND_REG); +#endif + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing PROMISE_WRITE\n", drive->name); + return startstop; + } + if (!drive->unmask) + __cli(); /* local CPU only */ + HWGROUP(drive)->wrq = *rq; /* scratchpad */ + return promise_write(drive); + default: + printk("KERN_WARNING %s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; } } + +#ifdef __TASKFILE__IO + +ide_startstop_t promise_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + + taskfile.sector_count = rq->nr_sectors; + taskfile.sector_number = block; + taskfile.low_cylinder = (block>>=8); + taskfile.high_cylinder = (block>>=8); + taskfile.device_head = ((block>>8)&0x0f)|drive->select.all; + taskfile.command = (rq->cmd==READ)?PROMISE_READ:PROMISE_WRITE; + + do_taskfile(drive, &taskfile, NULL, NULL); + return do_pdc4030_io(drive, rq); +} +#endif + diff -urN linux-2.4.17-rc2-virgin/drivers/ide/pdcadma.c linux-2.4.17-rc2-wli2/drivers/ide/pdcadma.c --- linux-2.4.17-rc2-virgin/drivers/ide/pdcadma.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/drivers/ide/pdcadma.c Thu Dec 20 19:49:13 2001 @@ -0,0 +1,109 @@ +/* + * linux/drivers/ide/pdcadma.c Version 0.01 June 21, 2001 + * + * Copyright (C) 1999-2000 Andre Hedrick + * May be copied or modified under the terms of the GNU General Public License + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "ide_modes.h" + +#undef DISPLAY_PDCADMA_TIMINGS + +#if defined(DISPLAY_PDCADMA_TIMINGS) && defined(CONFIG_PROC_FS) +#include +#include + +static int pdcadma_get_info(char *, char **, off_t, int); +extern int (*pdcadma_display_info)(char *, char **, off_t, int); /* ide-proc.c */ +extern char *ide_media_verbose(ide_drive_t *); +static struct pci_dev *bmide_dev; + +static int pdcadma_get_info (char *buffer, char **addr, off_t offset, int count) +{ + char *p = buffer; + u32 bibma = pci_resource_start(bmide_dev, 4); + + p += sprintf(p, "\n PDC ADMA %04X Chipset.\n", bmide_dev->device); + p += sprintf(p, "UDMA\n"); + p += sprintf(p, "PIO\n"); + + return p-buffer; /* => must be less than 4k! */ +} +#endif /* defined(DISPLAY_PDCADMA_TIMINGS) && defined(CONFIG_PROC_FS) */ + +byte pdcadma_proc = 0; + +extern char *ide_xfer_verbose (byte xfer_rate); + +#ifdef CONFIG_BLK_DEV_IDEDMA +/* + * pdcadma_dmaproc() initiates/aborts (U)DMA read/write operations on a drive. + */ + +int pdcadma_dmaproc (ide_dma_action_t func, ide_drive_t *drive) +{ + switch (func) { + case ide_dma_check: + func = ide_dma_off_quietly; + default: + break; + } + return ide_dmaproc(func, drive); /* use standard DMA stuff */ +} +#endif /* CONFIG_BLK_DEV_IDEDMA */ + +unsigned int __init pci_init_pdcadma (struct pci_dev *dev, const char *name) +{ +#if defined(DISPLAY_PDCADMA_TIMINGS) && defined(CONFIG_PROC_FS) + if (!pdcadma_proc) { + pdcadma_proc = 1; + bmide_dev = dev; + pdcadma_display_info = &pdcadma_get_info; + } +#endif /* DISPLAY_PDCADMA_TIMINGS && CONFIG_PROC_FS */ + return 0; +} + +unsigned int __init ata66_pdcadma (ide_hwif_t *hwif) +{ + return 1; +} + +void __init ide_init_pdcadma (ide_hwif_t *hwif) +{ + hwif->autodma = 0; + hwif->dma_base = 0; + +// hwif->tuneproc = &pdcadma_tune_drive; +// hwif->speedproc = &pdcadma_tune_chipset; + +// if (hwif->dma_base) { +// hwif->dmaproc = &pdcadma_dmaproc; +// hwif->autodma = 1; +// } +} + +void __init ide_dmacapable_pdcadma (ide_hwif_t *hwif, unsigned long dmabase) +{ +// ide_setup_dma(hwif, dmabase, 8); +} + diff -urN linux-2.4.17-rc2-virgin/drivers/ide/piix.c linux-2.4.17-rc2-wli2/drivers/ide/piix.c --- linux-2.4.17-rc2-virgin/drivers/ide/piix.c Thu Oct 25 13:53:47 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/piix.c Thu Dec 20 19:49:13 2001 @@ -425,7 +425,7 @@ } dma_func = ide_dma_off_quietly; if (id->field_valid & 4) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x003F) { /* Force if Capable UltraDMA */ dma_func = piix_config_drive_for_dma(drive); if ((id->field_valid & 2) && diff -urN linux-2.4.17-rc2-virgin/drivers/ide/qd65xx.c linux-2.4.17-rc2-wli2/drivers/ide/qd65xx.c --- linux-2.4.17-rc2-virgin/drivers/ide/qd65xx.c Fri Sep 7 09:28:38 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/qd65xx.c Thu Dec 20 19:49:13 2001 @@ -1,14 +1,15 @@ /* - * linux/drivers/ide/qd65xx.c Version 0.06 Aug 3, 2000 + * linux/drivers/ide/qd65xx.c Version 0.07 Sep 30, 2001 * - * Copyright (C) 1996-2000 Linus Torvalds & author (see below) + * Copyright (C) 1996-2001 Linus Torvalds & author (see below) */ /* * Version 0.03 Cleaned auto-tune, added probe * Version 0.04 Added second channel tuning * Version 0.05 Enhanced tuning ; added qd6500 support - * Version 0.06 added dos driver's list + * Version 0.06 Added dos driver's list + * Version 0.07 Second channel bug fix * * QDI QD6500/QD6580 EIDE controller fast support * @@ -67,6 +68,7 @@ * qd6500: 1100 * qd6580: either 1010 or 0101 * + * * base+0x02: Timer2 (qd6580 only) * * @@ -137,12 +139,12 @@ { byte active_cycle,recovery_cycle; - if (system_bus_clock()<=33) { - active_cycle = 9 - IDE_IN(active_time * system_bus_clock() / 1000 + 1, 2, 9); - recovery_cycle = 15 - IDE_IN(recovery_time * system_bus_clock() / 1000 + 1, 0, 15); + if (ide_system_bus_speed()<=33) { + active_cycle = 9 - IDE_IN(active_time * ide_system_bus_speed() / 1000 + 1, 2, 9); + recovery_cycle = 15 - IDE_IN(recovery_time * ide_system_bus_speed() / 1000 + 1, 0, 15); } else { - active_cycle = 8 - IDE_IN(active_time * system_bus_clock() / 1000 + 1, 1, 8); - recovery_cycle = 18 - IDE_IN(recovery_time * system_bus_clock() / 1000 + 1, 3, 18); + active_cycle = 8 - IDE_IN(active_time * ide_system_bus_speed() / 1000 + 1, 1, 8); + recovery_cycle = 18 - IDE_IN(recovery_time * ide_system_bus_speed() / 1000 + 1, 3, 18); } return((recovery_cycle<<4) | 0x08 | active_cycle); @@ -156,8 +158,8 @@ static byte qd6580_compute_timing (int active_time, int recovery_time) { - byte active_cycle = 17-IDE_IN(active_time * system_bus_clock() / 1000 + 1, 2, 17); - byte recovery_cycle = 15-IDE_IN(recovery_time * system_bus_clock() / 1000 + 1, 2, 15); + byte active_cycle = 17-IDE_IN(active_time * ide_system_bus_speed() / 1000 + 1, 2, 17); + byte recovery_cycle = 15-IDE_IN(recovery_time * ide_system_bus_speed() / 1000 + 1, 2, 15); return((recovery_cycle<<4) | active_cycle); } @@ -427,7 +429,8 @@ ide_hwifs[i].tuneproc = &qd6580_tune_drive; for (j=0;j<2;j++) { - ide_hwifs[i].drives[j].drive_data = QD6580_DEF_DATA; + ide_hwifs[i].drives[j].drive_data = + i?QD6580_DEF_DATA2:QD6580_DEF_DATA; ide_hwifs[i].drives[j].io_32bit = 1; } } diff -urN linux-2.4.17-rc2-virgin/drivers/ide/qd65xx.h linux-2.4.17-rc2-wli2/drivers/ide/qd65xx.h --- linux-2.4.17-rc2-virgin/drivers/ide/qd65xx.h Fri Sep 7 09:28:38 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/qd65xx.h Thu Dec 20 19:49:13 2001 @@ -29,7 +29,7 @@ #define QD_CONTR_SEC_DISABLED 0x01 -#define QD_ID3 (config & QD_CONFIG_ID3) +#define QD_ID3 ((config & QD_CONFIG_ID3)!=0) #define QD_CONFIG(hwif) ((hwif)->config_data & 0x00ff) #define QD_CONTROL(hwif) (((hwif)->config_data & 0xff00) >> 8) @@ -39,6 +39,7 @@ #define QD6500_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08)) #define QD6580_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00)) +#define QD6580_DEF_DATA2 ((QD_TIM2_PORT<<8) | (QD_ID3 ? 0x0a : 0x00)) #define QD_DEF_CONTR (0x40 | ((control & 0x02) ? 0x9f : 0x1f)) #define QD_TESTVAL 0x19 /* safe value */ diff -urN linux-2.4.17-rc2-virgin/drivers/ide/serverworks.c linux-2.4.17-rc2-wli2/drivers/ide/serverworks.c --- linux-2.4.17-rc2-virgin/drivers/ide/serverworks.c Sun Sep 9 10:43:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/serverworks.c Thu Dec 20 19:49:13 2001 @@ -1,16 +1,26 @@ /* - * linux/drivers/ide/serverworks.c Version 0.2 17 Oct 2000 + * linux/drivers/ide/serverworks.c Version 0.3 26 Oct 2001 * - * Copyright (C) 2000 Cobalt Networks, Inc. - * May be copied or modified under the terms of the GNU General Public License + * May be copied or modified under the terms of the GNU General Public License * - * interface borrowed from alim15x3.c: - * Copyright (C) 1998-2000 Michel Aubry, Maintainer - * Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer + * Copyright (C) 1998-2000 Michel Aubry + * Copyright (C) 1998-2000 Andrzej Krzysztofowicz + * Copyright (C) 1998-2000 Andre Hedrick + * Portions copyright (c) 2001 Sun Microsystems * - * Copyright (C) 1998-2000 Andre Hedrick * - * IDE support for the ServerWorks OSB4 IDE chipset + * RCC/ServerWorks IDE driver for Linux + * + * OSB4: `Open South Bridge' IDE Interface (fn 1) + * supports UDMA mode 2 (33 MB/s) + * + * CSB5: `Champion South Bridge' IDE Interface (fn 1) + * all revisions support UDMA mode 4 (66 MB/s) + * revision A2.0 and up support UDMA mode 5 (100 MB/s) + * + * *** The CSB5 does not provide ANY register *** + * *** to detect 80-conductor cable presence. *** + * * * here's the default lspci: * @@ -83,15 +93,15 @@ #include "ide_modes.h" -#define SVWKS_DEBUG_DRIVE_INFO 0 - -#define DISPLAY_SVWKS_TIMINGS +#define DISPLAY_SVWKS_TIMINGS 1 +#undef SVWKS_DEBUG_DRIVE_INFO #if defined(DISPLAY_SVWKS_TIMINGS) && defined(CONFIG_PROC_FS) #include #include static struct pci_dev *bmide_dev; +static byte svwks_revision = 0; static int svwks_get_info(char *, char **, off_t, int); extern int (*svwks_display_info)(char *, char **, off_t, int); /* ide-proc.c */ @@ -103,7 +113,7 @@ u32 bibma = pci_resource_start(bmide_dev, 4); u32 reg40, reg44; u16 reg48, reg56; - u8 c0 = 0, c1 = 0, reg54; + u8 reg54, c0=0, c1=0; pci_read_config_dword(bmide_dev, 0x40, ®40); pci_read_config_dword(bmide_dev, 0x44, ®44); @@ -120,20 +130,23 @@ switch(bmide_dev->device) { case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE: - p += sprintf(p, "\n ServerWorks CSB5 Chipset.\n"); + p += sprintf(p, "\n " + "ServerWorks CSB5 Chipset (rev %02x)\n", + svwks_revision); break; - case PCI_DEVICE_ID_SERVERWORKS_OSB4: - p += sprintf(p, "\n ServerWorks OSB4 Chipset.\n"); + case PCI_DEVICE_ID_SERVERWORKS_OSB4IDE: + p += sprintf(p, "\n " + "ServerWorks OSB4 Chipset (rev %02x)\n", + svwks_revision); break; default: - p += sprintf(p, "\n ServerWorks 0x%04x Chipset.\n", bmide_dev->device); + p += sprintf(p, "\n " + "ServerWorks %04x Chipset (rev %02x)\n", + bmide_dev->device, svwks_revision); break; } p += sprintf(p, "------------------------------- General Status ---------------------------------\n"); -#if 0 - p += sprintf(p, " : %s\n", "str"); -#endif p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n"); p += sprintf(p, " %sabled %sabled\n", (c0&0x80) ? "dis" : " en", @@ -191,11 +204,7 @@ ((reg44&0x00210000)==0x00210000)?"1": ((reg44&0x00770000)==0x00770000)?"0": ((reg44&0x00FF0000)==0x00FF0000)?"X":"?"); -#if 0 - if (bmide_dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) - p += sprintf(p, "PIO enabled: %s %s %s %s\n", - if (bmide_dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4) -#endif + p += sprintf(p, "PIO enabled: %s %s %s %s\n", ((reg40&0x00002000)==0x00002000)?"4": ((reg40&0x00002200)==0x00002200)?"3": @@ -221,7 +230,7 @@ } #endif /* defined(DISPLAY_SVWKS_TIMINGS) && defined(CONFIG_PROC_FS) */ -static byte svwks_revision = 0; +#define SVWKS_CSB5_REVISION_NEW 0x92 /* min PCI_REVISION_ID for UDMA5 (A2.0) */ byte svwks_proc = 0; @@ -292,6 +301,7 @@ pio_timing |= pio_modes[speed - XFER_PIO_0]; csb5_pio |= ((speed - XFER_PIO_0) << (4*drive->dn)); break; + #ifdef CONFIG_BLK_DEV_IDEDMA case XFER_MW_DMA_2: case XFER_MW_DMA_1: @@ -307,9 +317,9 @@ case XFER_UDMA_2: case XFER_UDMA_1: case XFER_UDMA_0: - pio_timing |= pio_modes[pio]; - csb5_pio |= (pio << (4*drive->dn)); - dma_timing |= dma_modes[2]; + pio_timing |= pio_modes[pio]; + csb5_pio |= (pio << (4*drive->dn)); + dma_timing |= dma_modes[2]; ultra_timing |= ((udma_modes[speed - XFER_UDMA_0]) << (4*unit)); ultra_enable |= (0x01 << drive->dn); #endif @@ -322,9 +332,9 @@ drive->name, ultra_timing, dma_timing, pio_timing); #endif -#if OSB4_DEBUG_DRIVE_INFO +#if SVWKS_DEBUG_DRIVE_INFO printk("%s: %s drive%d\n", drive->name, ide_xfer_verbose(speed), drive->dn); -#endif /* OSB4_DEBUG_DRIVE_INFO */ +#endif /* SVWKS_DEBUG_DRIVE_INFO */ if (!drive->init_speed) drive->init_speed = speed; @@ -338,11 +348,10 @@ pci_write_config_byte(dev, drive_pci3, ultra_timing); pci_write_config_byte(dev, 0x54, ultra_enable); - if (speed > XFER_PIO_4) { + if (speed > XFER_PIO_4) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); - } else { + else outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); - } #endif /* CONFIG_BLK_DEV_IDEDMA */ err = ide_config_drive_speed(drive, speed); @@ -354,25 +363,24 @@ { unsigned short eide_pio_timing[6] = {960, 480, 240, 180, 120, 90}; unsigned short xfer_pio = drive->id->eide_pio_modes; - byte timing, speed, pio; + byte timing, speed, pio; pio = ide_get_best_pio_mode(drive, 255, 5, NULL); if (xfer_pio> 4) xfer_pio = 0; - if (drive->id->eide_pio_iordy > 0) { + if (drive->id->eide_pio_iordy > 0) for (xfer_pio = 5; xfer_pio>0 && drive->id->eide_pio_iordy>eide_pio_timing[xfer_pio]; xfer_pio--); - } else { + else xfer_pio = (drive->id->eide_pio_modes & 4) ? 0x05 : (drive->id->eide_pio_modes & 2) ? 0x04 : (drive->id->eide_pio_modes & 1) ? 0x03 : (drive->id->tPIO & 2) ? 0x02 : (drive->id->tPIO & 1) ? 0x01 : xfer_pio; - } timing = (xfer_pio >= pio) ? xfer_pio : pio; @@ -407,12 +415,10 @@ { struct hd_driveid *id = drive->id; struct pci_dev *dev = HWIF(drive)->pci_dev; - byte udma_66 = eighty_ninty_three(drive); - byte speed; - - int ultra66 = (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) ? 1 : 0; - /* need specs to figure out if osb4 is capable of ata/66/100 */ - int ultra100 = (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) ? 1 : 0; + byte udma_66 = eighty_ninty_three(drive); + int ultra66 = (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) ? 1 : 0; + int ultra100 = (ultra66 && svwks_revision >= SVWKS_CSB5_REVISION_NEW) ? 1 : 0; + byte speed; if ((id->dma_ultra & 0x0020) && (udma_66) && (ultra100)) { speed = XFER_UDMA_5; @@ -458,7 +464,7 @@ } dma_func = ide_dma_off_quietly; if (id->field_valid & 4) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x003F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive); if ((id->field_valid & 2) && @@ -499,7 +505,41 @@ switch (func) { case ide_dma_check: return config_drive_xfer_rate(drive); - default : + case ide_dma_end: + { + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + + if(inb(dma_base+0x02)&1) + { +#if 0 + int i; + printk(KERN_ERR "Curious - OSB4 thinks the DMA is still running.\n"); + for(i=0;i<10;i++) + { + if(!(inb(dma_base+0x02)&1)) + { + printk(KERN_ERR "OSB4 now finished.\n"); + break; + } + udelay(5); + } +#endif + printk(KERN_CRIT "Serverworks OSB4 in impossible state.\n"); + printk(KERN_CRIT "Disable UDMA or if you are using Seagate then try switching disk types\n"); + printk(KERN_CRIT "on this controller. Please report this event to osb4-bug@ide.cabal.tm\n"); +#if 0 + /* Panic might sys_sync -> death by corrupt disk */ + panic("OSB4: continuing might cause disk corruption.\n"); +#else + printk(KERN_CRIT "OSB4: continuing might cause disk corruption.\n"); + while(1) + cpu_relax(); +#endif + } + /* and drop through */ + } + default: break; } /* Other cases are done by generic IDE-DMA code. */ @@ -509,30 +549,43 @@ unsigned int __init pci_init_svwks (struct pci_dev *dev, const char *name) { - unsigned int reg64; + unsigned int reg; + byte btr; + /* save revision id to determine DMA capability */ pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision); - if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) { - isa_dev = pci_find_device(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4, NULL); + /* force Master Latency Timer value to 64 PCICLKs */ + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x40); - pci_read_config_dword(isa_dev, 0x64, ®64); -#ifdef DEBUG - printk("%s: reg64 == 0x%08x\n", name, reg64); -#endif + /* OSB4 : South Bridge and IDE */ + if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) { + isa_dev = pci_find_device(PCI_VENDOR_ID_SERVERWORKS, + PCI_DEVICE_ID_SERVERWORKS_OSB4, NULL); + if (isa_dev) { + pci_read_config_dword(isa_dev, 0x64, ®); + reg &= ~0x00002000; /* disable 600ns interrupt mask */ + reg |= 0x00004000; /* enable UDMA/33 support */ + pci_write_config_dword(isa_dev, 0x64, reg); + } + } -// reg64 &= ~0x0000A000; -//#ifdef CONFIG_SMP -// reg64 |= 0x00008000; -//#endif - /* Assume the APIC was set up properly by the BIOS for now . If it - wasnt we need to do a fix up _way_ earlier. Bits 15,10,3 control - APIC enable, routing and decode */ - - reg64 &= ~0x00002000; - pci_write_config_dword(isa_dev, 0x64, reg64); + /* setup CSB5 : South Bridge and IDE */ + else if (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) { + /* setup the UDMA Control register + * + * 1. clear bit 6 to enable DMA + * 2. enable DMA modes with bits 0-1 + * 00 : legacy + * 01 : udma2 + * 10 : udma2/udma4 + * 11 : udma2/udma4/udma5 + */ + pci_read_config_byte(dev, 0x5A, &btr); + btr &= ~0x40; + btr |= (svwks_revision >= SVWKS_CSB5_REVISION_NEW) ? 0x3 : 0x2; + pci_write_config_byte(dev, 0x5A, btr); } - pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x40); #if defined(DISPLAY_SVWKS_TIMINGS) && defined(CONFIG_PROC_FS) if (!svwks_proc) { @@ -551,26 +604,46 @@ * Bit 14 clear = primary IDE channel does not have 80-pin cable. * Bit 14 set = primary IDE channel has 80-pin cable. */ - static unsigned int __init ata66_svwks_dell (ide_hwif_t *hwif) { - struct pci_dev *dev = hwif->pci_dev; - if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL && - dev->vendor == PCI_VENDOR_ID_SERVERWORKS && - dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) + struct pci_dev *dev = hwif->pci_dev; + if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL && + dev->vendor == PCI_VENDOR_ID_SERVERWORKS && + dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) return ((1 << (hwif->channel + 14)) & dev->subsystem_device) ? 1 : 0; - return 0; +} +/* Sun Cobalt Alpine hardware avoids the 80-pin cable + * detect issue by attaching the drives directly to the board. + * This check follows the Dell precedent (how scary is that?!) + * + * WARNING: this only works on Alpine hardware! + */ +static unsigned int __init ata66_svwks_cobalt (ide_hwif_t *hwif) +{ + struct pci_dev *dev = hwif->pci_dev; + if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN && + dev->vendor == PCI_VENDOR_ID_SERVERWORKS && + dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) + return ((1 << (hwif->channel + 14)) & + dev->subsystem_device) ? 1 : 0; + return 0; } unsigned int __init ata66_svwks (ide_hwif_t *hwif) { - struct pci_dev *dev = hwif->pci_dev; + struct pci_dev *dev = hwif->pci_dev; + + /* Dell PowerEdge */ if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL) return ata66_svwks_dell (hwif); - + + /* Cobalt Alpine */ + if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN) + return ata66_svwks_cobalt (hwif); + return 0; } @@ -586,9 +659,7 @@ hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; hwif->autodma = 0; - return; #else /* CONFIG_BLK_DEV_IDEDMA */ - if (hwif->dma_base) { if (!noautodma) hwif->autodma = 1; diff -urN linux-2.4.17-rc2-virgin/drivers/ide/slc90e66.c linux-2.4.17-rc2-wli2/drivers/ide/slc90e66.c --- linux-2.4.17-rc2-virgin/drivers/ide/slc90e66.c Sun Jul 15 16:22:23 2001 +++ linux-2.4.17-rc2-wli2/drivers/ide/slc90e66.c Thu Dec 20 19:49:13 2001 @@ -86,8 +86,13 @@ * at that point bibma+0x2 et bibma+0xa are byte registers * to investigate: */ +#ifdef __mips__ /* only for mips? */ + c0 = inb_p(bibma + 0x02); + c1 = inb_p(bibma + 0x0a); +#else c0 = inb_p((unsigned short)bibma + 0x02); c1 = inb_p((unsigned short)bibma + 0x0a); +#endif p += sprintf(p, " SLC90E66 Chipset.\n"); p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n"); @@ -253,7 +258,9 @@ case XFER_MW_DMA_2: case XFER_MW_DMA_1: case XFER_SW_DMA_2: break; +#if 0 /* allow PIO modes */ default: return -1; +#endif } if (speed >= XFER_UDMA_0) { @@ -291,6 +298,13 @@ byte speed = 0; byte udma_66 = eighty_ninty_three(drive); +#if 1 /* allow PIO modes */ + if (!HWIF(drive)->autodma) { + speed = XFER_PIO_0 + ide_get_best_pio_mode(drive, 255, 5, NULL); + (void) slc90e66_tune_chipset(drive, speed); + return ((int) ide_dma_off_quietly); + } +#endif if ((id->dma_ultra & 0x0010) && (ultra)) { speed = (udma_66) ? XFER_UDMA_4 : XFER_UDMA_2; } else if ((id->dma_ultra & 0x0008) && (ultra)) { diff -urN linux-2.4.17-rc2-virgin/drivers/ieee1394/csr.c linux-2.4.17-rc2-wli2/drivers/ieee1394/csr.c --- linux-2.4.17-rc2-virgin/drivers/ieee1394/csr.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/drivers/ieee1394/csr.c Tue Dec 18 22:28:42 2001 @@ -10,6 +10,7 @@ */ #include +#include #include "ieee1394_types.h" #include "hosts.h" diff -urN linux-2.4.17-rc2-virgin/drivers/scsi/ide-scsi.c linux-2.4.17-rc2-wli2/drivers/scsi/ide-scsi.c --- linux-2.4.17-rc2-virgin/drivers/scsi/ide-scsi.c Tue Dec 18 21:21:44 2001 +++ linux-2.4.17-rc2-wli2/drivers/scsi/ide-scsi.c Thu Dec 20 19:49:13 2001 @@ -529,28 +529,35 @@ return 0; } +int idescsi_reinit(ide_drive_t *drive); + /* * IDE subdriver functions, registered with ide.c */ static ide_driver_t idescsi_driver = { - "ide-scsi", /* name */ - IDESCSI_VERSION, /* version */ - ide_scsi, /* media */ - 0, /* busy */ - 1, /* supports_dma */ - 0, /* supports_dsc_overlap */ - idescsi_cleanup, /* cleanup */ - idescsi_do_request, /* do_request */ - idescsi_end_request, /* end_request */ - NULL, /* ioctl */ - idescsi_open, /* open */ - idescsi_ide_release, /* release */ - NULL, /* media_change */ - NULL, /* revalidate */ - NULL, /* pre_reset */ - NULL, /* capacity */ - NULL, /* special */ - NULL /* proc */ + name: "ide-scsi", + version: IDESCSI_VERSION, + media: ide_scsi, + busy: 0, + supports_dma: 1, + supports_dsc_overlap: 0, + cleanup: idescsi_cleanup, + standby: NULL, + flushcache: NULL, + do_request: idescsi_do_request, + end_request: idescsi_end_request, + ioctl: NULL, + open: idescsi_open, + release: idescsi_ide_release, + media_change: NULL, + revalidate: NULL, + pre_reset: NULL, + capacity: NULL, + special: NULL, + proc: NULL, + reinit: idescsi_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, }; int idescsi_init (void); @@ -561,6 +568,43 @@ NULL }; +int idescsi_reinit (ide_drive_t *drive) +{ +#if 0 + idescsi_scsi_t *scsi; + byte media[] = {TYPE_DISK, TYPE_TAPE, TYPE_PROCESSOR, TYPE_WORM, TYPE_ROM, TYPE_SCANNER, TYPE_MOD, 255}; + int i, failed, id; + + if (!idescsi_initialized) + return 0; + for (i = 0; i < MAX_HWIFS * MAX_DRIVES; i++) + idescsi_drives[i] = NULL; + + MOD_INC_USE_COUNT; + for (i = 0; media[i] != 255; i++) { + failed = 0; + while ((drive = ide_scan_devices (media[i], idescsi_driver.name, NULL, failed++)) != NULL) { + + if ((scsi = (idescsi_scsi_t *) kmalloc (sizeof (idescsi_scsi_t), GFP_KERNEL)) == NULL) { + printk (KERN_ERR "ide-scsi: %s: Can't allocate a scsi structure\n", drive->name); + continue; + } + if (ide_register_subdriver (drive, &idescsi_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-scsi: %s: Failed to register the driver with ide.c\n", drive->name); + kfree (scsi); + continue; + } + for (id = 0; id < MAX_HWIFS * MAX_DRIVES && idescsi_drives[id]; id++); + idescsi_setup (drive, scsi, id); + failed--; + } + } + ide_register_module(&idescsi_module); + MOD_DEC_USE_COUNT; +#endif + return 0; +} + /* * idescsi_init will register the driver for each scsi. */ @@ -591,7 +635,7 @@ continue; } for (id = 0; id < MAX_HWIFS * MAX_DRIVES && idescsi_drives[id]; id++); - idescsi_setup (drive, scsi, id); + idescsi_setup (drive, scsi, id); failed--; } } diff -urN linux-2.4.17-rc2-virgin/fs/adfs/map.c linux-2.4.17-rc2-wli2/fs/adfs/map.c --- linux-2.4.17-rc2-virgin/fs/adfs/map.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/fs/adfs/map.c Tue Dec 18 22:28:42 2001 @@ -12,6 +12,7 @@ #include #include #include +#include #include "adfs.h" diff -urN linux-2.4.17-rc2-virgin/fs/binfmt_elf.c linux-2.4.17-rc2-wli2/fs/binfmt_elf.c --- linux-2.4.17-rc2-virgin/fs/binfmt_elf.c Tue Dec 18 23:18:02 2001 +++ linux-2.4.17-rc2-wli2/fs/binfmt_elf.c Tue Dec 18 22:28:42 2001 @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -1032,25 +1032,6 @@ elf_fpregset_t fpu; /* NT_PRFPREG */ struct elf_prpsinfo psinfo; /* NT_PRPSINFO */ - /* first copy the parameters from user space */ - memset(&psinfo, 0, sizeof(psinfo)); - { - int i, len; - - len = current->mm->arg_end - current->mm->arg_start; - if (len >= ELF_PRARGSZ) - len = ELF_PRARGSZ-1; - copy_from_user(&psinfo.pr_psargs, - (const char *)current->mm->arg_start, len); - for(i = 0; i < len; i++) - if (psinfo.pr_psargs[i] == 0) - psinfo.pr_psargs[i] = ' '; - psinfo.pr_psargs[len] = 0; - - } - - /* now stop all vm operations */ - down_write(¤t->mm->mmap_sem); segs = current->mm->map_count; #ifdef DEBUG @@ -1092,6 +1073,7 @@ * Set up the notes in similar form to SVR4 core dumps made * with info from their /proc. */ + memset(&psinfo, 0, sizeof(psinfo)); memset(&prstatus, 0, sizeof(prstatus)); notes[0].name = "CORE"; @@ -1147,6 +1129,23 @@ psinfo.pr_flag = current->flags; psinfo.pr_uid = NEW_TO_OLD_UID(current->uid); psinfo.pr_gid = NEW_TO_OLD_GID(current->gid); + { + int i, len; + + set_fs(fs); + + len = current->mm->arg_end - current->mm->arg_start; + if (len >= ELF_PRARGSZ) + len = ELF_PRARGSZ-1; + copy_from_user(&psinfo.pr_psargs, + (const char *)current->mm->arg_start, len); + for(i = 0; i < len; i++) + if (psinfo.pr_psargs[i] == 0) + psinfo.pr_psargs[i] = ' '; + psinfo.pr_psargs[len] = 0; + + set_fs(KERNEL_DS); + } strncpy(psinfo.pr_fname, current->comm, sizeof(psinfo.pr_fname)); notes[2].name = "CORE"; @@ -1218,6 +1217,8 @@ if (!writenote(¬es[i], file)) goto end_coredump; + set_fs(fs); + DUMP_SEEK(dataoff); for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { @@ -1231,24 +1232,22 @@ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - struct page* page; - struct vm_area_struct *vma; - - if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &vma) <= 0) { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(vma->vm_mm, addr); + if (pgd_none(*pgd)) + goto nextpage_coredump; + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) + goto nextpage_coredump; + pte = pte_offset(pmd, addr); + if (pte_none(*pte)) { +nextpage_coredump: DUMP_SEEK (file->f_pos + PAGE_SIZE); } else { - if (page == ZERO_PAGE(addr)) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); - } else { - void *kaddr; - flush_cache_page(vma, addr); - kaddr = kmap(page); - DUMP_WRITE(kaddr, PAGE_SIZE); - flush_page_to_ram(page); - kunmap(page); - } - put_page(page); + DUMP_WRITE((void*)addr, PAGE_SIZE); } } } @@ -1261,7 +1260,6 @@ end_coredump: set_fs(fs); - up_write(¤t->mm->mmap_sem); return has_dumped; } #endif /* USE_ELF_CORE_DUMP */ diff -urN linux-2.4.17-rc2-virgin/fs/buffer.c linux-2.4.17-rc2-wli2/fs/buffer.c --- linux-2.4.17-rc2-virgin/fs/buffer.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/buffer.c Thu Dec 20 18:43:51 2001 @@ -254,7 +254,6 @@ while (next && --nr >= 0) { struct buffer_head *bh = next; next = bh->b_next_free; - if (!buffer_locked(bh)) { if (refile) __refile_buffer(bh); @@ -262,7 +261,13 @@ } if (dev && bh->b_dev != dev) continue; - +#if 0 + if (conditional_schedule_needed()) { + debug_lock_break(1); + spin_unlock(&lru_list_lock); + return -EAGAIN; + } +#endif get_bh(bh); spin_unlock(&lru_list_lock); wait_on_buffer (bh); @@ -459,13 +464,25 @@ return err; } -/* After several hours of tedious analysis, the following hash - * function won. Do not mess with it... -DaveM +/* + * The shift/add buffer cache hash function from Chuck Lever's paper. + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * page 6 describes the behavior of various buffer cache hashes. + * + * The lack of an attempt to mix the bits of dev in this hash + * function appears disturbing to me, but I don't have the + * resources to investigate the value of attempting to do so. + * + * Changed to Lever's multiplicative hash function. + * -- wli */ -#define _hashfn(dev,block) \ - ((((dev)<<(bh_hash_shift - 6)) ^ ((dev)<<(bh_hash_shift - 9))) ^ \ - (((block)<<(bh_hash_shift - 6)) ^ ((block) >> 13) ^ \ - ((block) << (bh_hash_shift - 12)))) + +static inline unsigned long _hashfn(unsigned long dev, unsigned long block) +{ + return ((dev + block) * 2654435761UL) + >> (BITS_PER_LONG - bh_hash_shift); +} + #define hash(dev,block) hash_table[(_hashfn(HASHDEV(dev),block) & bh_hash_mask)] static inline void __insert_into_hash_list(struct buffer_head *bh) @@ -672,6 +689,13 @@ /* Not hashed? */ if (!bh->b_pprev) continue; + if (conditional_schedule_needed()) { + debug_lock_break(2); /* bkl is held too */ + get_bh(bh); + break_spin_lock_and_resched(&lru_list_lock); + put_bh(bh); + slept = 1; + } if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&lru_list_lock); @@ -719,11 +743,9 @@ static void free_more_memory(void) { - zone_t * zone = contig_page_data.node_zonelists[GFP_NOFS & GFP_ZONEMASK].zones[0]; - balance_dirty(); wakeup_bdflush(); - try_to_free_pages(zone, GFP_NOFS, 0); + try_to_free_pages(GFP_NOFS); run_task_queue(&tq_disk); current->policy |= SCHED_YIELD; __set_current_state(TASK_RUNNING); @@ -823,6 +845,8 @@ struct buffer_head *bh; struct inode tmp; int err = 0, err2; + + DEFINE_LOCK_COUNT(); INIT_LIST_HEAD(&tmp.i_dirty_buffers); @@ -844,6 +868,12 @@ spin_lock(&lru_list_lock); } } + /* haven't hit this code path ... */ + debug_lock_break(551); + if (TEST_LOCK_COUNT(32)) { + RESET_LOCK_COUNT(); + break_spin_lock(&lru_list_lock); + } } while (!list_empty(&tmp.i_dirty_buffers)) { @@ -873,6 +903,7 @@ struct inode tmp; int err = 0, err2; + DEFINE_LOCK_COUNT(); INIT_LIST_HEAD(&tmp.i_dirty_data_buffers); spin_lock(&lru_list_lock); @@ -904,9 +935,14 @@ if (!buffer_uptodate(bh)) err = -EIO; brelse(bh); + debug_lock_break(1); + if (TEST_LOCK_COUNT(32)) { + RESET_LOCK_COUNT(); + conditional_schedule(); + } spin_lock(&lru_list_lock); } - + spin_unlock(&lru_list_lock); err2 = osync_inode_data_buffers(inode); @@ -933,6 +969,8 @@ struct list_head *list; int err = 0; + DEFINE_LOCK_COUNT(); + spin_lock(&lru_list_lock); repeat: @@ -940,6 +978,17 @@ for (list = inode->i_dirty_buffers.prev; bh = BH_ENTRY(list), list != &inode->i_dirty_buffers; list = bh->b_inode_buffers.prev) { + /* untested code path ... */ + debug_lock_break(551); + + if (TEST_LOCK_COUNT(32)) { + RESET_LOCK_COUNT(); + if (conditional_schedule_needed()) { + break_spin_lock(&lru_list_lock); + goto repeat; + } + } + if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&lru_list_lock); diff -urN linux-2.4.17-rc2-virgin/fs/dcache.c linux-2.4.17-rc2-wli2/fs/dcache.c --- linux-2.4.17-rc2-virgin/fs/dcache.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/dcache.c Tue Dec 18 22:28:42 2001 @@ -320,11 +320,24 @@ void prune_dcache(int count) { + DEFINE_LOCK_COUNT(); + spin_lock(&dcache_lock); + +redo: for (;;) { struct dentry *dentry; struct list_head *tmp; + if (TEST_LOCK_COUNT(100)) { + RESET_LOCK_COUNT(); + debug_lock_break(1); + if (conditional_schedule_needed()) { + break_spin_lock(&dcache_lock); + goto redo; + } + } + tmp = dentry_unused.prev; if (tmp == &dentry_unused) @@ -480,6 +493,8 @@ struct list_head *next; int found = 0; + DEFINE_LOCK_COUNT(); + spin_lock(&dcache_lock); repeat: next = this_parent->d_subdirs.next; @@ -493,6 +508,12 @@ list_add(&dentry->d_lru, dentry_unused.prev); found++; } + if (TEST_LOCK_COUNT(500) && found > 10) { + debug_lock_break(1); + if (conditional_schedule_needed()) + goto out; + RESET_LOCK_COUNT(); + } /* * Descend a level if the d_subdirs list is non-empty. */ @@ -517,6 +538,7 @@ #endif goto resume; } +out: spin_unlock(&dcache_lock); return found; } @@ -546,6 +568,11 @@ * 0 - very urgent: shrink everything * ... * 6 - base-level: try to shrink a bit. + * + * Chuck Lever's dcache hash function relies on the aggressive + * shrinking where dentry_stat.nr_used is divided by priority. + * I added in a check for a priority of 0 to avoid division by 0. + * -- wli */ int shrink_dcache_memory(int priority, unsigned int gfp_mask) { @@ -565,6 +592,9 @@ if (!(gfp_mask & __GFP_FS)) return 0; + if(!priority) + BUG(); + count = dentry_stat.nr_unused / priority; prune_dcache(count); @@ -683,10 +713,45 @@ return res; } +/* + * The mult + shift 11 hash function from Chuck Lever's paper + * This apparently requires help from shrink_dcache_memory() + * and so that is added. + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * page 8 describes the hash function. + */ static inline struct list_head * d_hash(struct dentry * parent, unsigned long hash) { - hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash ^ (hash >> D_HASHBITS); + hash += (unsigned long) parent; + + /* + * The integer multiply Lever hash function appears to be too + * expensive even with hardware multiply support. Here we + * enter the realm of voodoo. + * + * The multiplicative hash function was this: + * hash *= 2654435761UL; + * hash >>= 11; + * The hard constant 11 is disturbing, and perhaps + * has some bearing on why this did not work well. + * + * The hash function used here is the Mersenne prime + * multiplicative hash function described in Lever's + * paper, which uses a shift/add implementation afforded + * by bit pattern properties of Mersenne primes. + * -- wli + * + * Added in more special sauce to use the upper D_HASHBITS + * of the computed hash key (which is voodoo). + * -- wli + * + * Reverted to the Lever hash function. + * -- wli + */ + + /* hash = (hash << 7) - hash + (hash >> 10) + (hash >> 18); */ + hash *= 2654435761UL; + hash >>= BITS_PER_LONG - D_HASHBITS; return dentry_hashtable + (hash & D_HASHMASK); } diff -urN linux-2.4.17-rc2-virgin/fs/exec.c linux-2.4.17-rc2-wli2/fs/exec.c --- linux-2.4.17-rc2-virgin/fs/exec.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/exec.c Tue Dec 18 22:28:42 2001 @@ -35,6 +35,7 @@ #include #include #include +#include #define __NO_VERSION__ #include @@ -279,6 +280,7 @@ flush_dcache_page(page); flush_page_to_ram(page); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY)))); + page_add_rmap(page, pte); tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); @@ -420,8 +422,8 @@ active_mm = current->active_mm; current->mm = mm; current->active_mm = mm; - task_unlock(current); activate_mm(active_mm, mm); + task_unlock(current); mm_release(); if (old_mm) { if (active_mm != old_mm) BUG(); diff -urN linux-2.4.17-rc2-virgin/fs/ext3/inode.c linux-2.4.17-rc2-wli2/fs/ext3/inode.c --- linux-2.4.17-rc2-virgin/fs/ext3/inode.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/ext3/inode.c Tue Dec 18 22:28:42 2001 @@ -1654,6 +1654,8 @@ } for (p = first; p < last; p++) { + debug_lock_break(1); /* bkl is held */ + conditional_schedule(); nr = le32_to_cpu(*p); if (nr) { /* accumulate blocks to free if they're contiguous */ @@ -1718,6 +1720,8 @@ /* Go read the buffer for the next level down */ bh = bread(inode->i_dev, nr, inode->i_sb->s_blocksize); + debug_lock_break(1); + conditional_schedule(); /* * A read failure? Report error and clear slot diff -urN linux-2.4.17-rc2-virgin/fs/ext3/namei.c linux-2.4.17-rc2-wli2/fs/ext3/namei.c --- linux-2.4.17-rc2-virgin/fs/ext3/namei.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/ext3/namei.c Tue Dec 18 22:28:42 2001 @@ -157,6 +157,8 @@ if ((bh = bh_use[ra_ptr++]) == NULL) goto next; wait_on_buffer(bh); + debug_lock_break(1); + conditional_schedule(); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ brelse(bh); diff -urN linux-2.4.17-rc2-virgin/fs/fat/cache.c linux-2.4.17-rc2-wli2/fs/fat/cache.c --- linux-2.4.17-rc2-virgin/fs/fat/cache.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/fat/cache.c Tue Dec 18 22:28:42 2001 @@ -14,6 +14,7 @@ #include #include #include +#include #if 0 # define PRINTK(x) printk x diff -urN linux-2.4.17-rc2-virgin/fs/inode.c linux-2.4.17-rc2-wli2/fs/inode.c --- linux-2.4.17-rc2-virgin/fs/inode.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/inode.c Thu Dec 20 17:28:53 2001 @@ -567,6 +567,12 @@ if (tmp == head) break; inode = list_entry(tmp, struct inode, i_list); + + debug_lock_break(2); /* bkl is also held */ + atomic_inc(&inode->i_count); + break_spin_lock_and_resched(&inode_lock); + atomic_dec(&inode->i_count); + if (inode->i_sb != sb) continue; invalidate_inode_buffers(inode); @@ -668,8 +674,11 @@ int count; struct inode * inode; + DEFINE_LOCK_COUNT(); + spin_lock(&inode_lock); +free_unused: count = 0; entry = inode_unused.prev; while (entry != &inode_unused) @@ -692,6 +701,14 @@ count++; if (!--goal) break; + if (TEST_LOCK_COUNT(32)) { + RESET_LOCK_COUNT(); + debug_lock_break(1); + if (conditional_schedule_needed()) { + break_spin_lock(&inode_lock); + goto free_unused; + } + } } inodes_stat.nr_unused -= count; spin_unlock(&inode_lock); @@ -899,14 +916,30 @@ return inode; } +/* + * The properties have changed from Lever's paper. This is + * the multiplicative page cache hash function from Chuck Lever's paper, + * adapted to the inode hash table. + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * iput() appears to be showing up in profiles, So I put what appears to + * be a theoretically sounder hash function here. + * + * Heavy testing by Anton Blanchard and Rusty Russell has verified that + * this inode cache hash function distributes keys well under heavy stress. + * + * Anton, Rusty, please insert a comment here describing the nature of the + * results of the testing. + * + * -- wli + */ static inline unsigned long hash(struct super_block *sb, unsigned long i_ino) { - unsigned long tmp = i_ino + ((unsigned long) sb / L1_CACHE_BYTES); - tmp = tmp + (tmp >> I_HASHBITS); - return tmp & I_HASHMASK; -} + unsigned long hashval = i_ino + (unsigned long) sb; + + hashval = (hashval * 2654435761UL) >> (BITS_PER_LONG - I_HASHBITS); -/* Yeah, I know about quadratic hash. Maybe, later. */ + return hashval & I_HASHMASK; +} /** * iunique - get a unique inode number diff -urN linux-2.4.17-rc2-virgin/fs/jbd/commit.c linux-2.4.17-rc2-wli2/fs/jbd/commit.c --- linux-2.4.17-rc2-virgin/fs/jbd/commit.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/jbd/commit.c Tue Dec 18 22:28:42 2001 @@ -212,6 +212,9 @@ __journal_remove_journal_head(bh); refile_buffer(bh); __brelse(bh); + debug_lock_break(2); + if (conditional_schedule_needed()) + break; } } if (bufs == ARRAY_SIZE(wbuf)) { @@ -235,8 +238,7 @@ journal_brelse_array(wbuf, bufs); lock_journal(journal); spin_lock(&journal_datalist_lock); - if (bufs) - goto write_out_data_locked; + goto write_out_data_locked; } /* @@ -272,6 +274,14 @@ */ while ((jh = commit_transaction->t_async_datalist)) { struct buffer_head *bh = jh2bh(jh); + if (conditional_schedule_needed()) { + debug_lock_break(551); + spin_unlock(&journal_datalist_lock); + unlock_journal(journal); + lock_journal(journal); + spin_lock(&journal_datalist_lock); + continue; + } if (buffer_locked(bh)) { spin_unlock(&journal_datalist_lock); unlock_journal(journal); diff -urN linux-2.4.17-rc2-virgin/fs/partitions/Makefile linux-2.4.17-rc2-wli2/fs/partitions/Makefile --- linux-2.4.17-rc2-virgin/fs/partitions/Makefile Thu Jul 26 16:30:04 2001 +++ linux-2.4.17-rc2-wli2/fs/partitions/Makefile Thu Dec 20 19:49:13 2001 @@ -9,7 +9,7 @@ O_TARGET := partitions.o -export-objs := check.o ibm.o +export-objs := check.o ibm.o msdos.o obj-y := check.o diff -urN linux-2.4.17-rc2-virgin/fs/partitions/msdos.c linux-2.4.17-rc2-wli2/fs/partitions/msdos.c --- linux-2.4.17-rc2-virgin/fs/partitions/msdos.c Thu Oct 11 08:07:07 2001 +++ linux-2.4.17-rc2-wli2/fs/partitions/msdos.c Thu Dec 20 19:49:13 2001 @@ -29,7 +29,13 @@ #ifdef CONFIG_BLK_DEV_IDE #include /* IDE xlate */ -#endif /* CONFIG_BLK_DEV_IDE */ +#elif defined(CONFIG_BLK_DEV_IDE_MODULE) +#include + +int (*ide_xlate_1024_hook)(kdev_t, int, int, const char *); +EXPORT_SYMBOL(ide_xlate_1024_hook); +#define ide_xlate_1024 ide_xlate_1024_hook +#endif #include @@ -468,7 +474,7 @@ */ static int handle_ide_mess(struct block_device *bdev) { -#ifdef CONFIG_BLK_DEV_IDE +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) Sector sect; unsigned char *data; kdev_t dev = to_kdev_t(bdev->bd_dev); @@ -476,6 +482,10 @@ int heads = 0; struct partition *p; int i; +#ifdef CONFIG_BLK_DEV_IDE_MODULE + if (!ide_xlate_1024) + return 1; +#endif /* * The i386 partition handling programs very often * make partitions end on cylinder boundaries. @@ -537,7 +547,7 @@ /* Flush the cache */ invalidate_bdev(bdev, 1); truncate_inode_pages(bdev->bd_inode->i_mapping, 0); -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ return 1; } diff -urN linux-2.4.17-rc2-virgin/fs/proc/array.c linux-2.4.17-rc2-wli2/fs/proc/array.c --- linux-2.4.17-rc2-virgin/fs/proc/array.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/proc/array.c Tue Dec 18 22:28:42 2001 @@ -392,82 +392,11 @@ mmput(mm); return res; } - -static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, - int * pages, int * shared, int * dirty, int * total) -{ - pte_t * pte; - unsigned long end; - - if (pmd_none(*pmd)) - return; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return; - } - pte = pte_offset(pmd, address); - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - pte_t page = *pte; - struct page *ptpage; - - address += PAGE_SIZE; - pte++; - if (pte_none(page)) - continue; - ++*total; - if (!pte_present(page)) - continue; - ptpage = pte_page(page); - if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) - continue; - ++*pages; - if (pte_dirty(page)) - ++*dirty; - if (page_count(pte_page(page)) > 1) - ++*shared; - } while (address < end); -} - -static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, - int * pages, int * shared, int * dirty, int * total) -{ - pmd_t * pmd; - unsigned long end; - - if (pgd_none(*pgd)) - return; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - return; - } - pmd = pmd_offset(pgd, address); - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - statm_pte_range(pmd, address, end - address, pages, shared, dirty, total); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); -} - -static void statm_pgd_range(pgd_t * pgd, unsigned long address, unsigned long end, - int * pages, int * shared, int * dirty, int * total) -{ - while (address < end) { - statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - pgd++; - } -} +/* + * This thing is slow so I've ripped out the page table scanning. + * The VMA scanning is slow enough. + */ int proc_pid_statm(struct task_struct *task, char * buffer) { struct mm_struct *mm; @@ -482,23 +411,24 @@ struct vm_area_struct * vma; down_read(&mm->mmap_sem); vma = mm->mmap; + resident = mm->rss; + size = mm->total_vm; while (vma) { - pgd_t *pgd = pgd_offset(mm, vma->vm_start); - int pages = 0, shared = 0, dirty = 0, total = 0; + int pages, total; + + total = vma->vm_end - vma->vm_start; + pages = total >> PAGE_SHIFT; + + if (vma->vm_flags & VM_SHARED) + share += pages; - statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total); - resident += pages; - share += shared; - dt += dirty; - size += total; - if (vma->vm_flags & VM_EXECUTABLE) - trs += pages; /* text */ - else if (vma->vm_flags & VM_GROWSDOWN) - drs += pages; /* stack */ - else if (vma->vm_end > 0x60000000) - lrs += pages; /* library */ - else - drs += pages; + if (vma->vm_flags & VM_EXECUTABLE) { + if(vma->vm_end > TASK_UNMAPPED_BASE) + lrs += pages; /* library */ + else + trs += pages; /* text */ + } else + drs += pages; /* stack and data */ vma = vma->vm_next; } up_read(&mm->mmap_sem); diff -urN linux-2.4.17-rc2-virgin/fs/proc/proc_misc.c linux-2.4.17-rc2-wli2/fs/proc/proc_misc.c --- linux-2.4.17-rc2-virgin/fs/proc/proc_misc.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/proc/proc_misc.c Tue Dec 18 22:28:42 2001 @@ -164,7 +164,8 @@ "Cached: %8lu kB\n" "SwapCached: %8lu kB\n" "Active: %8u kB\n" - "Inactive: %8u kB\n" + "Inact_dirty: %8u kB\n" + "Inact_clean: %8u kB\n" "HighTotal: %8lu kB\n" "HighFree: %8lu kB\n" "LowTotal: %8lu kB\n" @@ -178,7 +179,8 @@ K(pg_size - swapper_space.nrpages), K(swapper_space.nrpages), K(nr_active_pages), - K(nr_inactive_pages), + K(nr_inactive_dirty_pages), + K(nr_inactive_clean_pages), K(i.totalhigh), K(i.freehigh), K(i.totalram-i.totalhigh), diff -urN linux-2.4.17-rc2-virgin/fs/reiserfs/bitmap.c linux-2.4.17-rc2-wli2/fs/reiserfs/bitmap.c --- linux-2.4.17-rc2-virgin/fs/reiserfs/bitmap.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/reiserfs/bitmap.c Tue Dec 18 22:28:42 2001 @@ -410,19 +410,23 @@ amount_needed++ ; continue ; } - - reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ; + RFALSE( is_reusable (s, search_start, 0) == 0, + "vs-4140: bad block number found"); - RFALSE( buffer_locked (SB_AP_BITMAP (s)[i]) || - is_reusable (s, search_start, 0) == 0, - "vs-4140: bitmap block is locked or bad block number found"); + reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ; /* if this bit was already set, we've scheduled, and someone else ** has allocated it. loop around and try again */ if (reiserfs_test_and_set_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)) { reiserfs_restore_prepared_buffer(s, SB_AP_BITMAP(s)[i]) ; + /* if this block has been allocated while we slept, it is + ** impossible to find any more contiguous blocks for ourselves. + ** If we are doing preallocation, give up now and return. + */ + if (for_prealloc) + goto free_and_return; amount_needed++ ; continue ; } diff -urN linux-2.4.17-rc2-virgin/fs/reiserfs/buffer2.c linux-2.4.17-rc2-wli2/fs/reiserfs/buffer2.c --- linux-2.4.17-rc2-virgin/fs/reiserfs/buffer2.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/reiserfs/buffer2.c Tue Dec 18 22:28:42 2001 @@ -55,6 +55,8 @@ PROC_EXP( unsigned int ctx_switches = kstat.context_swtch ); result = bread (super -> s_dev, n_block, n_size); + debug_lock_break(1); + conditional_schedule(); PROC_INFO_INC( super, breads ); PROC_EXP( if( kstat.context_swtch != ctx_switches ) PROC_INFO_INC( super, bread_miss ) ); diff -urN linux-2.4.17-rc2-virgin/fs/reiserfs/journal.c linux-2.4.17-rc2-wli2/fs/reiserfs/journal.c --- linux-2.4.17-rc2-virgin/fs/reiserfs/journal.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/reiserfs/journal.c Tue Dec 18 22:28:42 2001 @@ -574,6 +574,8 @@ /* lock the current transaction */ inline static void lock_journal(struct super_block *p_s_sb) { PROC_INFO_INC( p_s_sb, journal.lock_journal ); + debug_lock_break(1); + conditional_schedule(); while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) { PROC_INFO_INC( p_s_sb, journal.lock_journal_wait ); sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ; @@ -704,6 +706,8 @@ mark_buffer_dirty(tbh) ; } ll_rw_block(WRITE, 1, &tbh) ; + debug_lock_break(1); + conditional_schedule(); count++ ; put_bh(tbh) ; /* once for our get_hash */ } @@ -833,6 +837,8 @@ set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ; ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ; wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; + debug_lock_break(1); + conditional_schedule(); if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) { printk( "reiserfs: journal-837: IO error during journal replay\n" ); return -EIO ; @@ -2092,6 +2098,8 @@ } int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { + debug_lock_break(1); + conditional_schedule(); return do_journal_begin_r(th, p_s_sb, nblocks, 0) ; } @@ -2232,6 +2240,8 @@ } int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { + debug_lock_break(1); + conditional_schedule(); return do_journal_end(th, p_s_sb, nblocks, 0) ; } @@ -2683,6 +2693,8 @@ RFALSE( buffer_locked(bh) && cur_tb != NULL, "waiting while do_balance was running\n") ; wait_on_buffer(bh) ; + debug_lock_break(1); + conditional_schedule(); } PROC_INFO_INC( p_s_sb, journal.prepare_retry ); retry_count++ ; @@ -2856,6 +2868,8 @@ /* copy all the real blocks into log area. dirty log blocks */ if (test_bit(BH_JDirty, &cn->bh->b_state)) { struct buffer_head *tmp_bh ; + debug_lock_break(1); + conditional_schedule(); /* getblk can sleep, so... */ tmp_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT), p_s_sb->s_blocksize) ; diff -urN linux-2.4.17-rc2-virgin/fs/reiserfs/stree.c linux-2.4.17-rc2-wli2/fs/reiserfs/stree.c --- linux-2.4.17-rc2-virgin/fs/reiserfs/stree.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/fs/reiserfs/stree.c Tue Dec 18 22:28:42 2001 @@ -648,9 +648,8 @@ stop at leaf level - set to DISK_LEAF_NODE_LEVEL */ ) { - int n_block_number = SB_ROOT_BLOCK (p_s_sb), - expected_level = SB_TREE_HEIGHT (p_s_sb), - n_block_size = p_s_sb->s_blocksize; + int n_block_number, expected_level; + int n_block_size = p_s_sb->s_blocksize; struct buffer_head * p_s_bh; struct path_element * p_s_last_element; int n_node_level, n_retval; @@ -662,7 +661,10 @@ #endif PROC_INFO_INC( p_s_sb, search_by_key ); - + + debug_lock_break(1); + conditional_schedule(); + /* As we add each node to a path we increase its count. This means that we must be careful to release all nodes in a path before we either discard the path struct or re-use the path struct, as we do here. */ @@ -674,6 +676,8 @@ /* With each iteration of this loop we search through the items in the current node, and calculate the next current node(next path element) for the next iteration of this loop.. */ + n_block_number = SB_ROOT_BLOCK (p_s_sb); + expected_level = SB_TREE_HEIGHT (p_s_sb); while ( 1 ) { #ifdef CONFIG_REISERFS_CHECK @@ -1099,6 +1103,9 @@ for (n_counter = *p_n_removed; n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) { + + debug_lock_break(1); + conditional_schedule(); if (item_moved (&s_ih, p_s_path)) { need_research = 1 ; diff -urN linux-2.4.17-rc2-virgin/include/asm-alpha/bootmem.h linux-2.4.17-rc2-wli2/include/asm-alpha/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-alpha/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-alpha/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,12 @@ +/* + * include/asm-alpha/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * Alpha has some NUMA systems, but it's uncertain to me what + * an appropriate value of NR_SEGMENTS should be. + * + * For the moment, the generic single-page definition is here, + * but those who run on Alpha may need to increase the value + * at least until the page stealing is in place. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-alpha/ide.h linux-2.4.17-rc2-wli2/include/asm-alpha/ide.h --- linux-2.4.17-rc2-virgin/include/asm-alpha/ide.h Wed May 24 08:40:41 2000 +++ linux-2.4.17-rc2-wli2/include/asm-alpha/ide.h Thu Dec 20 19:49:13 2001 @@ -91,7 +91,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) diff -urN linux-2.4.17-rc2-virgin/include/asm-alpha/kdb.h linux-2.4.17-rc2-wli2/include/asm-alpha/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-alpha/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-alpha/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for alpha. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/bootmem.h linux-2.4.17-rc2-wli2/include/asm-arm/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-arm/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-arm/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,9 @@ +/* + * include/asm-arm/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * ARM appeared to have little trouble with a single-page-sized + * segment pool, so the generic NR_SEGMENTS is okay for now. + * This will go away once page stealing is in place. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/dma.h linux-2.4.17-rc2-wli2/include/asm-arm/dma.h --- linux-2.4.17-rc2-virgin/include/asm-arm/dma.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-arm/dma.h Tue Dec 18 22:28:42 2001 @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/hardirq.h linux-2.4.17-rc2-wli2/include/asm-arm/hardirq.h --- linux-2.4.17-rc2-virgin/include/asm-arm/hardirq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-arm/hardirq.h Tue Dec 18 22:28:42 2001 @@ -34,6 +34,7 @@ #define irq_exit(cpu,irq) (local_irq_count(cpu)--) #define synchronize_irq() do { } while (0) +#define release_irqlock(cpu) do { } while (0) #else #error SMP not supported diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/ide.h linux-2.4.17-rc2-wli2/include/asm-arm/ide.h --- linux-2.4.17-rc2-virgin/include/asm-arm/ide.h Thu Jun 17 01:11:35 1999 +++ linux-2.4.17-rc2-wli2/include/asm-arm/ide.h Thu Dec 20 19:49:13 2001 @@ -30,7 +30,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/kdb.h linux-2.4.17-rc2-wli2/include/asm-arm/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-arm/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-arm/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for arm. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/mmu_context.h linux-2.4.17-rc2-wli2/include/asm-arm/mmu_context.h --- linux-2.4.17-rc2-virgin/include/asm-arm/mmu_context.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-arm/mmu_context.h Tue Dec 18 22:28:42 2001 @@ -42,6 +42,10 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned int cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_is_disable() == 0) + BUG(); +#endif if (prev != next) { cpu_switch_mm(next->pgd, tsk); clear_bit(cpu, &prev->cpu_vm_mask); diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/pgalloc.h linux-2.4.17-rc2-wli2/include/asm-arm/pgalloc.h --- linux-2.4.17-rc2-virgin/include/asm-arm/pgalloc.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-arm/pgalloc.h Tue Dec 18 22:28:42 2001 @@ -57,40 +57,48 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)__pgd_next(ret); ret[1] = ret[2]; clean_dcache_entry(ret + 1); pgtable_cache_size--; } + preempt_enable(); return (pgd_t *)ret; } static inline void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); __pgd_next(pgd) = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) { unsigned long *ret; + preempt_disable(); if((ret = pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)__pte_next(ret); ret[0] = 0; clean_dcache_entry(ret); pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static inline void free_pte_fast(pte_t *pte) { + preempt_disable(); __pte_next(pte) = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } #else /* CONFIG_NO_PGT_CACHE */ diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/smplock.h linux-2.4.17-rc2-wli2/include/asm-arm/smplock.h --- linux-2.4.17-rc2-virgin/include/asm-arm/smplock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-arm/smplock.h Tue Dec 18 22:28:42 2001 @@ -3,12 +3,17 @@ * * Default SMP lock implementation */ +#include #include #include extern spinlock_t kernel_flag; +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_is_disable() +#else #define kernel_locked() spin_is_locked(&kernel_flag) +#endif /* * Release global kernel lock and global interrupt lock @@ -40,8 +45,14 @@ */ static inline void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else if (!++current->lock_depth) spin_lock(&kernel_flag); +#endif } static inline void unlock_kernel(void) diff -urN linux-2.4.17-rc2-virgin/include/asm-arm/softirq.h linux-2.4.17-rc2-wli2/include/asm-arm/softirq.h --- linux-2.4.17-rc2-virgin/include/asm-arm/softirq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-arm/softirq.h Tue Dec 18 22:28:42 2001 @@ -5,20 +5,22 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) #define in_softirq() (local_bh_count(smp_processor_id()) != 0) -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ if (!--*ptr && ptr[-2]) \ __asm__("bl%? __do_softirq": : : "lr");/* out of line */\ } while (0) + +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-cris/bootmem.h linux-2.4.17-rc2-wli2/include/asm-cris/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-cris/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-cris/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,11 @@ +/* + * include/asm-cris/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * Cris hasn't been tested with this yet, so + * port maintainers may want to increase the value + * of NR_SEGMENTS if this becomes a problem. + * This will go away once page stealing is in place. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-cris/ide.h linux-2.4.17-rc2-wli2/include/asm-cris/ide.h --- linux-2.4.17-rc2-virgin/include/asm-cris/ide.h Tue Feb 13 14:13:44 2001 +++ linux-2.4.17-rc2-wli2/include/asm-cris/ide.h Thu Dec 20 19:49:13 2001 @@ -97,7 +97,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; /* some configuration options we don't need */ diff -urN linux-2.4.17-rc2-virgin/include/asm-generic/bootmem.h linux-2.4.17-rc2-wli2/include/asm-generic/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-generic/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-generic/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,25 @@ +#ifndef _ASM_BOOTMEM_H +#define _ASM_BOOTMEM_H + +/* + * include/asm-generic/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * NR_SEGMENTS is the number of line segment tree nodes held + * in the per-node segment pools. + * + * For the moment, this is a fixed size, because dynamically + * determining the number of segments per node would require + * a change of interface. On 32-bit machines with 4KB pages + * this is 170 distinct fragments of memory per page. + * + * So long as the arena for the tree nodes is statically + * allocated, this must be an arch-specific #define + * This can be eliminated entirely only by a change of + * interface. Page stealing is simple, but unsafe until + * after the absolutely necessary reservations are done. + */ + +#define NR_SEGMENTS (PAGE_SIZE/sizeof(segment_buf_t)) + +#endif /* _ASM_BOOTMEM_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-generic/kdb.h linux-2.4.17-rc2-wli2/include/asm-generic/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-generic/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-generic/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for generic. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-generic/rmap.h linux-2.4.17-rc2-wli2/include/asm-generic/rmap.h --- linux-2.4.17-rc2-virgin/include/asm-generic/rmap.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-generic/rmap.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,51 @@ +#ifndef _GENERIC_RMAP_H +#define _GENERIC_RMAP_H +/* + * linux/include/asm-generic/rmap.h + * + * Architecture dependant parts of the reverse mapping code, + * this version should work for most architectures with a + * 'normal' page table layout. + * + * We use the struct page of the page table page to find out + * the process and full address of a page table entry: + * - page->mapping points to the process' mm_struct + * - page->index has the high bits of the address + * - the lower bits of the address are calculated from the + * offset of the page table entry within the page table page + */ +#include + +static inline void pgtable_add_rmap(pte_t * ptep, struct mm_struct * mm, unsigned long address) +{ + struct page * page = virt_to_page(ptep); + + page->mapping = (void *)mm; + page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); +} + +static inline void pgtable_remove_rmap(pte_t * ptep) +{ + struct page * page = virt_to_page(ptep); + + page->mapping = NULL; + page->index = 0; +} + +static inline struct mm_struct * ptep_to_mm(pte_t * ptep) +{ + struct page * page = virt_to_page(ptep); + + return (struct mm_struct *) page->mapping; +} + +static inline unsigned long ptep_to_address(pte_t * ptep) +{ + struct page * page = virt_to_page(ptep); + unsigned long low_bits; + + low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE; + return page->index + low_bits; +} + +#endif /* _GENERIC_RMAP_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/bootmem.h linux-2.4.17-rc2-wli2/include/asm-i386/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-i386/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-i386/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,9 @@ +/* + * include/asm-i386/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * i386 has been well-tested with this value of NR_SEGMENTS. + * There are some i386 architectures with highly-fragmented + * memory that may need to alter it. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/hardirq.h linux-2.4.17-rc2-wli2/include/asm-i386/hardirq.h --- linux-2.4.17-rc2-virgin/include/asm-i386/hardirq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/hardirq.h Thu Dec 20 18:55:14 2001 @@ -36,6 +36,8 @@ #define synchronize_irq() barrier() +#define release_irqlock(cpu) do { } while (0) + #else #include diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/highmem.h linux-2.4.17-rc2-wli2/include/asm-i386/highmem.h --- linux-2.4.17-rc2-virgin/include/asm-i386/highmem.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/highmem.h Thu Dec 20 18:55:30 2001 @@ -88,6 +88,7 @@ enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); if (page < highmem_start_page) return page_address(page); @@ -109,8 +110,10 @@ unsigned long vaddr = (unsigned long) kvaddr; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) // FIXME + if (vaddr < FIXADDR_START) { // FIXME + preempt_enable(); return; + } if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) BUG(); @@ -122,6 +125,8 @@ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); #endif + + preempt_enable(); } #endif /* __KERNEL__ */ diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/hw_irq.h linux-2.4.17-rc2-wli2/include/asm-i386/hw_irq.h --- linux-2.4.17-rc2-virgin/include/asm-i386/hw_irq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/hw_irq.h Thu Dec 20 18:55:14 2001 @@ -23,6 +23,7 @@ #define FIRST_EXTERNAL_VECTOR 0x20 #define SYSCALL_VECTOR 0x80 +#define KDBENTER_VECTOR 0x81 /* * Vectors 0x20-0x2f are used for ISA interrupts. @@ -42,6 +43,7 @@ #define INVALIDATE_TLB_VECTOR 0xfd #define RESCHEDULE_VECTOR 0xfc #define CALL_FUNCTION_VECTOR 0xfb +#define KDB_VECTOR 0xfa /* * Local APIC timer IRQ vector is on a different priority level, @@ -95,6 +97,18 @@ #define __STR(x) #x #define STR(x) __STR(x) +#define GET_CURRENT \ + "movl %esp, %ebx\n\t" \ + "andl $-8192, %ebx\n\t" + +#ifdef CONFIG_PREEMPT +#define BUMP_LOCK_COUNT \ + GET_CURRENT \ + "incl 4(%ebx)\n\t" +#else +#define BUMP_LOCK_COUNT +#endif + #define SAVE_ALL \ "cld\n\t" \ "pushl %es\n\t" \ @@ -108,14 +122,11 @@ "pushl %ebx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" + "movl %edx,%es\n\t" \ + BUMP_LOCK_COUNT #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -#define GET_CURRENT \ - "movl %esp, %ebx\n\t" \ - "andl $-8192, %ebx\n\t" /* * SMP has a few special interrupts for IPI messages diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/i387.h linux-2.4.17-rc2-wli2/include/asm-i386/i387.h --- linux-2.4.17-rc2-virgin/include/asm-i386/i387.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/i387.h Thu Dec 20 19:05:30 2001 @@ -12,6 +12,7 @@ #define __ASM_I386_I387_H #include +#include #include #include #include @@ -24,7 +25,7 @@ extern void restore_fpu( struct task_struct *tsk ); extern void kernel_fpu_begin(void); -#define kernel_fpu_end() stts() +#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) #define unlazy_fpu( tsk ) do { \ diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/ide.h linux-2.4.17-rc2-wli2/include/asm-i386/ide.h --- linux-2.4.17-rc2-virgin/include/asm-i386/ide.h Thu Nov 22 11:46:58 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/ide.h Thu Dec 20 19:49:13 2001 @@ -95,7 +95,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/kdb.h linux-2.4.17-rc2-wli2/include/asm-i386/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-i386/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-i386/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,62 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H + + /* + * KDB_ENTER() is a macro which causes entry into the kernel + * debugger from any point in the kernel code stream. If it + * is intended to be used from interrupt level, it must use + * a non-maskable entry method. + */ +#define KDB_ENTER() asm("\tint $129\n") + + /* + * Define the exception frame for this architeture + */ +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; + + /* + * Needed for exported symbols. + */ +typedef unsigned long kdb_machreg_t; + +#define kdb_machreg_fmt "0x%lx" +#define kdb_machreg_fmt0 "0x%08lx" +#define kdb_bfd_vma_fmt "0x%lx" +#define kdb_bfd_vma_fmt0 "0x%08lx" +#define kdb_elfw_addr_fmt "0x%x" +#define kdb_elfw_addr_fmt0 "0x%08x" + + /* + * Per cpu arch specific kdb state. Must be in range 0xff000000. + */ +#define KDB_STATE_A_IF 0x01000000 /* Saved IF flag */ + + /* + * Interface from kernel trap handling code to kernel debugger. + */ +extern int kdba_callback_die(struct pt_regs *, int, long, void*); +extern int kdba_callback_bp(struct pt_regs *, int, long, void*); +extern int kdba_callback_debug(struct pt_regs *, int, long, void *); + +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/kdbprivate.h linux-2.4.17-rc2-wli2/include/asm-i386/kdbprivate.h --- linux-2.4.17-rc2-virgin/include/asm-i386/kdbprivate.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-i386/kdbprivate.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,178 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ +#if !defined(_ASM_KDBPRIVATE_H) +#define _ASM_KDBPRIVATE_H + +typedef unsigned char kdb_machinst_t; + + /* + * KDB_MAXBPT describes the total number of breakpoints + * supported by this architecure. + */ +#define KDB_MAXBPT 16 + /* + * KDB_MAXHARDBPT describes the total number of hardware + * breakpoint registers that exist. + */ +#define KDB_MAXHARDBPT 4 + /* + * Provide space for KDB_MAX_COMMANDS commands. + */ +#define KDB_MAX_COMMANDS 125 + + /* + * Platform specific environment entries + */ +#define KDB_PLATFORM_ENV "IDMODE=x86", "BYTESPERWORD=4", "IDCOUNT=16" + + /* + * Define the direction that the stack grows + */ +#define KDB_STACK_DIRECTION (-1) /* Stack grows down */ + + /* + * Support for ia32 debug registers + */ +typedef struct _kdbhard_bp { + kdb_machreg_t bph_reg; /* Register this breakpoint uses */ + + unsigned int bph_free:1; /* Register available for use */ + unsigned int bph_data:1; /* Data Access breakpoint */ + + unsigned int bph_write:1; /* Write Data breakpoint */ + unsigned int bph_mode:2; /* 0=inst, 1=write, 2=io, 3=read */ + unsigned int bph_length:2; /* 0=1, 1=2, 2=BAD, 3=4 (bytes) */ +} kdbhard_bp_t; + +extern kdbhard_bp_t kdb_hardbreaks[/* KDB_MAXHARDBPT */]; + +#define IA32_BREAKPOINT_INSTRUCTION 0xcc + +#define DR6_BT 0x00008000 +#define DR6_BS 0x00004000 +#define DR6_BD 0x00002000 + +#define DR6_B3 0x00000008 +#define DR6_B2 0x00000004 +#define DR6_B1 0x00000002 +#define DR6_B0 0x00000001 + +#define DR7_RW_VAL(dr, drnum) \ + (((dr) >> (16 + (4 * (drnum)))) & 0x3) + +#define DR7_RW_SET(dr, drnum, rw) \ + do { \ + (dr) &= ~(0x3 << (16 + (4 * (drnum)))); \ + (dr) |= (((rw) & 0x3) << (16 + (4 * (drnum)))); \ + } while (0) + +#define DR7_RW0(dr) DR7_RW_VAL(dr, 0) +#define DR7_RW0SET(dr,rw) DR7_RW_SET(dr, 0, rw) +#define DR7_RW1(dr) DR7_RW_VAL(dr, 1) +#define DR7_RW1SET(dr,rw) DR7_RW_SET(dr, 1, rw) +#define DR7_RW2(dr) DR7_RW_VAL(dr, 2) +#define DR7_RW2SET(dr,rw) DR7_RW_SET(dr, 2, rw) +#define DR7_RW3(dr) DR7_RW_VAL(dr, 3) +#define DR7_RW3SET(dr,rw) DR7_RW_SET(dr, 3, rw) + + +#define DR7_LEN_VAL(dr, drnum) \ + (((dr) >> (18 + (4 * (drnum)))) & 0x3) + +#define DR7_LEN_SET(dr, drnum, rw) \ + do { \ + (dr) &= ~(0x3 << (18 + (4 * (drnum)))); \ + (dr) |= (((rw) & 0x3) << (18 + (4 * (drnum)))); \ + } while (0) +#define DR7_LEN0(dr) DR7_LEN_VAL(dr, 0) +#define DR7_LEN0SET(dr,len) DR7_LEN_SET(dr, 0, len) +#define DR7_LEN1(dr) DR7_LEN_VAL(dr, 1) +#define DR7_LEN1SET(dr,len) DR7_LEN_SET(dr, 1, len) +#define DR7_LEN2(dr) DR7_LEN_VAL(dr, 2) +#define DR7_LEN2SET(dr,len) DR7_LEN_SET(dr, 2, len) +#define DR7_LEN3(dr) DR7_LEN_VAL(dr, 3) +#define DR7_LEN3SET(dr,len) DR7_LEN_SET(dr, 3, len) + +#define DR7_G0(dr) (((dr)>>1)&0x1) +#define DR7_G0SET(dr) ((dr) |= 0x2) +#define DR7_G0CLR(dr) ((dr) &= ~0x2) +#define DR7_G1(dr) (((dr)>>3)&0x1) +#define DR7_G1SET(dr) ((dr) |= 0x8) +#define DR7_G1CLR(dr) ((dr) &= ~0x8) +#define DR7_G2(dr) (((dr)>>5)&0x1) +#define DR7_G2SET(dr) ((dr) |= 0x20) +#define DR7_G2CLR(dr) ((dr) &= ~0x20) +#define DR7_G3(dr) (((dr)>>7)&0x1) +#define DR7_G3SET(dr) ((dr) |= 0x80) +#define DR7_G3CLR(dr) ((dr) &= ~0x80) + +#define DR7_L0(dr) (((dr))&0x1) +#define DR7_L0SET(dr) ((dr) |= 0x1) +#define DR7_L0CLR(dr) ((dr) &= ~0x1) +#define DR7_L1(dr) (((dr)>>2)&0x1) +#define DR7_L1SET(dr) ((dr) |= 0x4) +#define DR7_L1CLR(dr) ((dr) &= ~0x4) +#define DR7_L2(dr) (((dr)>>4)&0x1) +#define DR7_L2SET(dr) ((dr) |= 0x10) +#define DR7_L2CLR(dr) ((dr) &= ~0x10) +#define DR7_L3(dr) (((dr)>>6)&0x1) +#define DR7_L3SET(dr) ((dr) |= 0x40) +#define DR7_L3CLR(dr) ((dr) &= ~0x40) + +#define DR7_GD 0x00002000 /* General Detect Enable */ +#define DR7_GE 0x00000200 /* Global exact */ +#define DR7_LE 0x00000100 /* Local exact */ + +extern kdb_machreg_t kdba_getdr6(void); +extern void kdba_putdr6(kdb_machreg_t); + +extern kdb_machreg_t kdba_getdr7(void); + +extern kdb_machreg_t kdba_getdr(int); +extern void kdba_putdr(int, kdb_machreg_t); + +extern kdb_machreg_t kdb_getcr(int); + +#define KDB_HAVE_LONGJMP +#ifdef KDB_HAVE_LONGJMP +/* + * Support for setjmp/longjmp + */ +#define JB_BX 0 +#define JB_SI 1 +#define JB_DI 2 +#define JB_BP 3 +#define JB_SP 4 +#define JB_PC 5 + +typedef struct __kdb_jmp_buf { + unsigned long regs[6]; /* kdba_setjmp assumes fixed offsets here */ +} kdb_jmp_buf; + +extern int kdba_setjmp(kdb_jmp_buf *); +extern void kdba_longjmp(kdb_jmp_buf *, int); + +extern kdb_jmp_buf kdbjmpbuf[]; +#endif /* KDB_HAVE_LONGJMP */ + +#endif /* !_ASM_KDBPRIVATE_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/keyboard.h linux-2.4.17-rc2-wli2/include/asm-i386/keyboard.h --- linux-2.4.17-rc2-virgin/include/asm-i386/keyboard.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/keyboard.h Thu Dec 20 18:56:36 2001 @@ -42,6 +42,7 @@ #define kbd_sysrq_xlate pckbd_sysrq_xlate #define SYSRQ_KEY 0x54 +#define E1_PAUSE 119 /* PAUSE key */ /* resource allocation */ #define kbd_request_region() diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/mmu_context.h linux-2.4.17-rc2-wli2/include/asm-i386/mmu_context.h --- linux-2.4.17-rc2-virgin/include/asm-i386/mmu_context.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/mmu_context.h Thu Dec 20 18:55:14 2001 @@ -27,6 +27,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif if (prev != next) { /* stop flush ipis for the previous mm */ clear_bit(cpu, &prev->cpu_vm_mask); diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/param.h linux-2.4.17-rc2-wli2/include/asm-i386/param.h --- linux-2.4.17-rc2-virgin/include/asm-i386/param.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/param.h Thu Dec 20 19:39:04 2001 @@ -2,7 +2,8 @@ #define _ASMi386_PARAM_H #ifndef HZ -#define HZ 100 +/* #define HZ 100 */ +#define HZ 256 #endif #define EXEC_PAGESIZE 4096 @@ -18,7 +19,7 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ #ifdef __KERNEL__ -# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ +# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ #endif #endif diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/pgalloc.h linux-2.4.17-rc2-wli2/include/asm-i386/pgalloc.h --- linux-2.4.17-rc2-virgin/include/asm-i386/pgalloc.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/pgalloc.h Thu Dec 20 18:55:14 2001 @@ -75,20 +75,26 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; - } else + preempt_enable(); + } else { + preempt_enable(); ret = (unsigned long *)get_pgd_slow(); + } return (pgd_t *)ret; } static inline void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static inline void free_pgd_slow(pgd_t *pgd) @@ -119,19 +125,23 @@ { unsigned long *ret; + preempt_disable(); if ((ret = (unsigned long *)pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); ret[0] = ret[1]; pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static inline void pte_free_fast(pte_t *pte) { + preempt_disable(); *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } static __inline__ void pte_free_slow(pte_t *pte) diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/pgtable.h linux-2.4.17-rc2-wli2/include/asm-i386/pgtable.h --- linux-2.4.17-rc2-virgin/include/asm-i386/pgtable.h Thu Nov 22 11:46:19 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/pgtable.h Thu Dec 20 18:55:14 2001 @@ -267,7 +267,18 @@ * Permanent address of a page. Obviously must never be * called on a highmem page. */ +#ifndef CONFIG_HIGHMEM + +#define page_address(page) \ + __va( (((page) - PageZone(page)->zone_mem_map) << PAGE_SHIFT) \ + + PageZone(page)->zone_start_paddr) + +#else /* CONFIG_HIGHMEM */ + #define page_address(page) ((page)->virtual) + +#endif /* CONFIG_HIGHMEM */ + #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/processor.h linux-2.4.17-rc2-wli2/include/asm-i386/processor.h --- linux-2.4.17-rc2-virgin/include/asm-i386/processor.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/processor.h Thu Dec 20 18:55:13 2001 @@ -502,7 +502,10 @@ { __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); } -#define spin_lock_prefetch(x) prefetchw(x) +#define spin_lock_prefetch(x) do { \ + prefetchw(x); \ + preempt_prefetch(¤t->preempt_count); \ +} while(0) #endif diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/ptrace.h linux-2.4.17-rc2-wli2/include/asm-i386/ptrace.h --- linux-2.4.17-rc2-virgin/include/asm-i386/ptrace.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/ptrace.h Tue Dec 18 22:21:49 2001 @@ -54,6 +54,29 @@ /* options set using PTRACE_SETOPTIONS */ #define PTRACE_O_TRACESYSGOOD 0x00000001 +enum EFLAGS { + EF_CF = 0x00000001, + EF_PF = 0x00000004, + EF_AF = 0x00000010, + EF_ZF = 0x00000040, + EF_SF = 0x00000080, + EF_TF = 0x00000100, + EF_IE = 0x00000200, + EF_DF = 0x00000400, + EF_OF = 0x00000800, + EF_IOPL = 0x00003000, + EF_IOPL_RING0 = 0x00000000, + EF_IOPL_RING1 = 0x00001000, + EF_IOPL_RING2 = 0x00002000, + EF_NT = 0x00004000, /* nested task */ + EF_RF = 0x00010000, /* resume */ + EF_VM = 0x00020000, /* virtual mode */ + EF_AC = 0x00040000, /* alignment */ + EF_VIF = 0x00080000, /* virtual interrupt */ + EF_VIP = 0x00100000, /* virtual interrupt pending */ + EF_ID = 0x00200000, /* id */ +}; + #ifdef __KERNEL__ #define user_mode(regs) ((VM_MASK & (regs)->eflags) || (3 & (regs)->xcs)) #define instruction_pointer(regs) ((regs)->eip) diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/rmap.h linux-2.4.17-rc2-wli2/include/asm-i386/rmap.h --- linux-2.4.17-rc2-virgin/include/asm-i386/rmap.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-i386/rmap.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,7 @@ +#ifndef _I386_RMAP_H +#define _I386_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/smplock.h linux-2.4.17-rc2-wli2/include/asm-i386/smplock.h --- linux-2.4.17-rc2-virgin/include/asm-i386/smplock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/smplock.h Thu Dec 20 18:55:14 2001 @@ -10,7 +10,15 @@ extern spinlock_t kernel_flag; +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_is_disabled() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -42,6 +50,11 @@ */ static __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else #if 1 if (!++current->lock_depth) spin_lock(&kernel_flag); @@ -53,6 +66,7 @@ "\n9:" :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); +#endif #endif } diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/softirq.h linux-2.4.17-rc2-wli2/include/asm-i386/softirq.h --- linux-2.4.17-rc2-virgin/include/asm-i386/softirq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/softirq.h Thu Dec 20 18:55:14 2001 @@ -5,9 +5,9 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) @@ -22,7 +22,7 @@ * If you change the offsets in irq_stat then you have to * update this code as well. */ -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ \ @@ -44,5 +44,7 @@ : "r" (ptr), "i" (do_softirq) \ /* no registers clobbered */ ); \ } while (0) + +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-i386/spinlock.h linux-2.4.17-rc2-wli2/include/asm-i386/spinlock.h --- linux-2.4.17-rc2-virgin/include/asm-i386/spinlock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-i386/spinlock.h Thu Dec 20 18:55:13 2001 @@ -77,7 +77,7 @@ :"=m" (lock->lock) : : "memory" -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -97,7 +97,7 @@ :"=q" (oldval), "=m" (lock->lock) \ :"0" (oldval) : "memory" -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { char oldval = 1; #if SPINLOCK_DEBUG @@ -113,7 +113,7 @@ #endif -static inline int spin_trylock(spinlock_t *lock) +static inline int _raw_spin_trylock(spinlock_t *lock) { char oldval; __asm__ __volatile__( @@ -123,7 +123,7 @@ return oldval > 0; } -static inline void spin_lock(spinlock_t *lock) +static inline void _raw_spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -179,7 +179,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */ -static inline void read_lock(rwlock_t *rw) +static inline void _raw_read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -188,7 +188,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -static inline void write_lock(rwlock_t *rw) +static inline void _raw_write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -197,10 +197,10 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") -static inline int write_trylock(rwlock_t *lock) +static inline int _raw_write_trylock(rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff -urN linux-2.4.17-rc2-virgin/include/asm-ia64/bootmem.h linux-2.4.17-rc2-wli2/include/asm-ia64/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-ia64/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-ia64/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,19 @@ +#ifndef _ASM_BOOTMEM_H +#define _ASM_BOOTMEM_H + +/* + * include/asm-ia64/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * ACPI on IA64 is one of the heaviest memory-reserving subsystems + * of any architecture. This leads to enough fragmentation to exhaust + * the segment pool with the default NR_SEGMENTS several times over. + * This value has been tested on Intel Lion systems, but the author + * is well-aware of systems requiring still higher values. + * + * This will go away entirely once page stealing is in place. + */ + +#define NR_SEGMENTS ((8*PAGE_SIZE)/sizeof(segment_buf_t)) + +#endif /* _ASM_BOOTMEM_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-ia64/ide.h linux-2.4.17-rc2-wli2/include/asm-ia64/ide.h --- linux-2.4.17-rc2-virgin/include/asm-ia64/ide.h Wed May 24 08:40:41 2000 +++ linux-2.4.17-rc2-wli2/include/asm-ia64/ide.h Thu Dec 20 19:49:13 2001 @@ -101,7 +101,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) diff -urN linux-2.4.17-rc2-virgin/include/asm-ia64/kdb.h linux-2.4.17-rc2-wli2/include/asm-ia64/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-ia64/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-ia64/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,54 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H + + /* + * KDB_ENTER() is a macro which causes entry into the kernel + * debugger from any point in the kernel code stream. If it + * is intended to be used from interrupt level, it must use + * a non-maskable entry method. + */ +#define KDB_BREAK_BREAK 0x80100 /* kdb breakpoint in kernel */ +#define KDB_BREAK_ENTER 0x80101 /* KDB_ENTER() */ +#define KDB_ENTER2(b) asm("\tbreak "#b"\n") +#define KDB_ENTER1(b) KDB_ENTER2(b) +#define KDB_ENTER() KDB_ENTER1(KDB_BREAK_ENTER) + + /* + * Define the exception frame for this architeture + */ +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; + + /* + * Needed for exported symbols. + */ +typedef unsigned long kdb_machreg_t; + +#define kdb_machreg_fmt "0x%lx" +#define kdb_machreg_fmt0 "0x%016lx" +#define kdb_bfd_vma_fmt "0x%lx" +#define kdb_bfd_vma_fmt0 "0x%016lx" +#define kdb_elfw_addr_fmt "0x%lx" +#define kdb_elfw_addr_fmt0 "0x%016lx" + +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-ia64/kdbprivate.h linux-2.4.17-rc2-wli2/include/asm-ia64/kdbprivate.h --- linux-2.4.17-rc2-virgin/include/asm-ia64/kdbprivate.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-ia64/kdbprivate.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,122 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + */ +#if !defined(_ASM_KDBPRIVATE_H) +#define _ASM_KDBPRIVATE_H + +/* Definition of an machine instruction. + * Takes care of VLIW processors like Itanium + */ + +typedef struct { + unsigned long inst[2]; + } kdb_machinst_t; + + /* + * KDB_MAXBPT describes the total number of breakpoints + * supported by this architecure. + */ +#define KDB_MAXBPT 16 + /* + * KDB_MAXHARDBPT describes the total number of hardware + * breakpoint registers that exist. + */ +#define KDB_MAXHARDBPT 4 + /* + * Provide space for KDB_MAX_COMMANDS commands. + */ +#define KDB_MAX_COMMANDS 125 + + /* + * Platform specific environment entries + */ +#define KDB_PLATFORM_ENV "IDMODE=ia64", "BYTESPERWORD=4", "IDCOUNT=8" + + /* + * Define the direction that the stack grows + */ +#define KDB_STACK_DIRECTION (-1) /* Stack grows down */ + + /* + * Support for IA64 debug registers + */ +typedef struct _kdbhard_bp { + kdb_machreg_t bph_reg; /* Register this breakpoint uses */ + + unsigned int bph_free:1; /* Register available for use */ + unsigned int bph_data:1; /* Data Access breakpoint */ + + unsigned int bph_write:1; /* Write Data breakpoint */ + unsigned int bph_mode:2; /* 0=inst, 1=write, 2=io, 3=read */ + unsigned int bph_length:2; /* 0=1, 1=2, 2=BAD, 3=4 (bytes) */ +} kdbhard_bp_t; + +extern kdbhard_bp_t kdb_hardbreaks[/* KDB_MAXHARDBPT */]; + +#define getprsregs(regs) ((struct switch_stack *)regs -1) + +extern struct switch_stack *kdb_sw[ /*NR_CPUS*/ ]; + +/* bkpt support using break inst instead of IBP reg */ + +/* + * Define certain specific instructions + */ +#define BREAK_INSTR (long)(KDB_BREAK_BREAK << (5+6)) +#define INST_SLOT0_MASK (0x1ffffffffffL << 5) + +#define BKPTMODE_DATAR 3 +#define BKPTMODE_IO 2 +#define BKPTMODE_DATAW 1 +#define BKPTMODE_INST 0 + +/* Some of the fault registers needed by kdb but not passed with + * regs or switch stack. + */ +typedef struct fault_regs { + unsigned long isr ; + unsigned long ifa ; + unsigned long iim ; + unsigned long itir ; +} fault_regs_t ; + +#define KDB_HAVE_LONGJMP +#ifdef KDB_HAVE_LONGJMP +/* + * Support for setjmp/longjmp + */ + +/* __jmp_buf definition copied from libc/sysdeps/unix/sysv/linux/ia64/bits/setjmp.h */ + +#define _JBLEN 70 + +typedef struct __kdb_jmp_buf { + unsigned long __jmp_buf[_JBLEN]; +} kdb_jmp_buf __attribute__ ((aligned (16))); + +extern int kdba_setjmp(kdb_jmp_buf *); +extern int kdba_setjmp_asm(kdb_jmp_buf *); +extern void kdba_longjmp(kdb_jmp_buf *, int); +extern void kdba_longjmp_asm(kdb_jmp_buf *, int); + +extern kdb_jmp_buf kdbjmpbuf[]; +#endif /* KDB_HAVE_LONGJMP */ + +#endif /* !_ASM_KDBPRIVATE_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-m68k/bootmem.h linux-2.4.17-rc2-wli2/include/asm-m68k/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-m68k/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-m68k/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,11 @@ +/* + * include/asm-m68k/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * m68k should in all likelihood be happy with this value of + * NR_SEGMENTS, though testing has been obstructed + * by issues unrelated to bootmem. + * NR_SEGMENTS will go away entirely once page stealing is in place. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-m68k/ide.h linux-2.4.17-rc2-wli2/include/asm-m68k/ide.h --- linux-2.4.17-rc2-virgin/include/asm-m68k/ide.h Mon Jun 11 19:15:27 2001 +++ linux-2.4.17-rc2-wli2/include/asm-m68k/ide.h Thu Dec 20 19:49:13 2001 @@ -89,7 +89,19 @@ unsigned unit : 1; /* drive select number, 0 or 1 */ unsigned head : 4; /* always zeros here */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned HOB : 1; /* 48-bit address ordering */ + unsigned reserved456 : 3; + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned bit0 : 1; + } b; +} control_t; static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long flags, const char *device, void *dev_id) diff -urN linux-2.4.17-rc2-virgin/include/asm-m68k/kdb.h linux-2.4.17-rc2-wli2/include/asm-m68k/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-m68k/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-m68k/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for m68k. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-mips/bootmem.h linux-2.4.17-rc2-wli2/include/asm-mips/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-mips/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-mips/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,11 @@ +/* + * include/asm-mips/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * This value of NR_SEGMENTS has been tested on a DecStation 5000/200 + * and it was happy with it. That does not rule out a possible need to + * increase the value on systems I've not tested. + * NR_SEGMENTS will go away once page stealing is in place. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-mips/ide.h linux-2.4.17-rc2-wli2/include/asm-mips/ide.h --- linux-2.4.17-rc2-virgin/include/asm-mips/ide.h Sun Sep 9 10:43:01 2001 +++ linux-2.4.17-rc2-wli2/include/asm-mips/ide.h Thu Dec 20 19:49:13 2001 @@ -92,6 +92,26 @@ } b; } select_t; +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { +#ifdef __MIPSEB__ + unsigned HOB : 1; /* 48-bit address ordering */ + unsigned reserved456 : 3; + unsigned SRST : 1; /* host soft reset bit */ + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned bit0 : 1; +#else + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ +#endif + } b; +} control_t; + static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int,void *, struct pt_regs *), unsigned long flags, const char *device, void *dev_id) { diff -urN linux-2.4.17-rc2-virgin/include/asm-mips/kdb.h linux-2.4.17-rc2-wli2/include/asm-mips/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-mips/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-mips/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for mips. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-mips64/bootmem.h linux-2.4.17-rc2-wli2/include/asm-mips64/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-mips64/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-mips64/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,11 @@ +/* + * include/asm-mips64/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * mips64 includes some very large memory machines with very fragmented + * memory. There are also likely to be patch conflicts as the discontig + * patch touches bootmem. This value is almost certainly wrong. + * Fortunately, NR_SEGMENTS will go away soon. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-mips64/ide.h linux-2.4.17-rc2-wli2/include/asm-mips64/ide.h --- linux-2.4.17-rc2-virgin/include/asm-mips64/ide.h Sun Sep 9 10:43:02 2001 +++ linux-2.4.17-rc2-wli2/include/asm-mips64/ide.h Thu Dec 20 19:49:13 2001 @@ -85,7 +85,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int,void *, struct pt_regs *), unsigned long flags, const char *device, void *dev_id) diff -urN linux-2.4.17-rc2-virgin/include/asm-mips64/kdb.h linux-2.4.17-rc2-wli2/include/asm-mips64/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-mips64/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-mips64/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for mips64. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-parisc/bootmem.h linux-2.4.17-rc2-wli2/include/asm-parisc/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-parisc/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-parisc/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,10 @@ +/* + * include/asm-parisc/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * PA-RISC memory maps have relatively few contiguous + * ranges of available memory, and so the generic NR_SEGMENTS + * will suffice until NR_SEGMENTS is eliminated. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-parisc/ide.h linux-2.4.17-rc2-wli2/include/asm-parisc/ide.h --- linux-2.4.17-rc2-virgin/include/asm-parisc/ide.h Tue Dec 5 12:29:39 2000 +++ linux-2.4.17-rc2-wli2/include/asm-parisc/ide.h Thu Dec 20 19:49:13 2001 @@ -90,7 +90,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) diff -urN linux-2.4.17-rc2-virgin/include/asm-ppc/bootmem.h linux-2.4.17-rc2-wli2/include/asm-ppc/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-ppc/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-ppc/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,10 @@ +/* + * include/asm-ppc/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * According to sources, 32-bit PPC has relatively few fragments + * of available memory, and so the generic NR_SEGMENTS should + * suffice until NR_SEGMENTS is eliminated. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-ppc/ide.h linux-2.4.17-rc2-wli2/include/asm-ppc/ide.h --- linux-2.4.17-rc2-virgin/include/asm-ppc/ide.h Mon Oct 8 11:40:13 2001 +++ linux-2.4.17-rc2-wli2/include/asm-ppc/ide.h Thu Dec 20 19:49:13 2001 @@ -129,6 +129,18 @@ } b; } select_t; +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned HOB : 1; /* 48-bit address ordering */ + unsigned reserved456 : 3; + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned bit0 : 1; + } b; +} control_t; + #if !defined(ide_request_irq) #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #endif diff -urN linux-2.4.17-rc2-virgin/include/asm-ppc/kdb.h linux-2.4.17-rc2-wli2/include/asm-ppc/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-ppc/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-ppc/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for ppc. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-s390/bootmem.h linux-2.4.17-rc2-wli2/include/asm-s390/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-s390/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-s390/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,10 @@ +/* + * include/asm-alpha/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * S390 will probably not need to change NR_SEGMENTS, + * as setup.c tracks memory fragments on its own and + * insists on less than 16. + * NR_SEGMENTS will go away once page stealing is in place. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-s390/ide.h linux-2.4.17-rc2-wli2/include/asm-s390/ide.h --- linux-2.4.17-rc2-virgin/include/asm-s390/ide.h Fri May 12 11:41:44 2000 +++ linux-2.4.17-rc2-wli2/include/asm-s390/ide.h Thu Dec 20 19:49:13 2001 @@ -26,7 +26,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) do {} while (0) #define ide_free_irq(irq,dev_id) do {} while (0) diff -urN linux-2.4.17-rc2-virgin/include/asm-s390/kdb.h linux-2.4.17-rc2-wli2/include/asm-s390/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-s390/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-s390/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for s390. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-s390x/bootmem.h linux-2.4.17-rc2-wli2/include/asm-s390x/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-s390x/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-s390x/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,10 @@ +/* + * include/asm-s390x/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * S390x is unlikely to need to change NR_SEGMENTS, as it tracks ranges + * itself in setup.c and uses less than 16. + * NR_SEGMENTS will go away once page stealing is in place in the + * bootmem allocator. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-s390x/ide.h linux-2.4.17-rc2-wli2/include/asm-s390x/ide.h --- linux-2.4.17-rc2-virgin/include/asm-s390x/ide.h Tue Feb 13 14:13:44 2001 +++ linux-2.4.17-rc2-wli2/include/asm-s390x/ide.h Thu Dec 20 19:49:13 2001 @@ -26,7 +26,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) do {} while (0) #define ide_free_irq(irq,dev_id) do {} while (0) diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/bootmem.h linux-2.4.17-rc2-wli2/include/asm-sh/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-sh/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-sh/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,8 @@ +/* + * include/asm-sh/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * Super-H has not been tested, so NR_SEGMENTS may need to change. + * NR_SEGMENTS will be eliminated once page stealing is in place. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/hardirq.h linux-2.4.17-rc2-wli2/include/asm-sh/hardirq.h --- linux-2.4.17-rc2-virgin/include/asm-sh/hardirq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-sh/hardirq.h Tue Dec 18 22:28:42 2001 @@ -34,6 +34,8 @@ #define synchronize_irq() barrier() +#define release_irqlock(cpu) do { } while (0) + #else #error Super-H SMP is not available diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/ide.h linux-2.4.17-rc2-wli2/include/asm-sh/ide.h --- linux-2.4.17-rc2-virgin/include/asm-sh/ide.h Sat Sep 8 12:29:09 2001 +++ linux-2.4.17-rc2-wli2/include/asm-sh/ide.h Thu Dec 20 19:49:13 2001 @@ -116,7 +116,19 @@ unsigned lba : 1; /* using LBA instead of CHS */ unsigned bit7 : 1; /* always 1 */ } b; - } select_t; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/kdb.h linux-2.4.17-rc2-wli2/include/asm-sh/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-sh/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-sh/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for sh. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/mmu_context.h linux-2.4.17-rc2-wli2/include/asm-sh/mmu_context.h --- linux-2.4.17-rc2-virgin/include/asm-sh/mmu_context.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-sh/mmu_context.h Tue Dec 18 22:28:42 2001 @@ -166,6 +166,10 @@ struct mm_struct *next, struct task_struct *tsk, unsigned int cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif if (prev != next) { unsigned long __pgdir = (unsigned long)next->pgd; diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/smplock.h linux-2.4.17-rc2-wli2/include/asm-sh/smplock.h --- linux-2.4.17-rc2-virgin/include/asm-sh/smplock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-sh/smplock.h Tue Dec 18 22:28:42 2001 @@ -9,15 +9,88 @@ #include -#ifndef CONFIG_SMP - +#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) +/* + * Should never happen, since linux/smp_lock.h catches this case; + * but in case this file is included directly with neither SMP nor + * PREEMPT configuration, provide same dummys as linux/smp_lock.h + */ #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) -#define release_kernel_lock(task, cpu, depth) ((depth) = 1) -#define reacquire_kernel_lock(task, cpu, depth) do { } while(0) +#define release_kernel_lock(task, cpu) do { } while(0) +#define reacquire_kernel_lock(task) do { } while(0) +#define kernel_locked() 1 + +#else /* CONFIG_SMP || CONFIG_PREEMPT */ + +#if CONFIG_SMP +#error "We do not support SMP on SH yet" +#endif +/* + * Default SMP lock implementation (i.e. the i386 version) + */ + +#include +#include + +extern spinlock_t kernel_flag; +#define lock_bkl() spin_lock(&kernel_flag) +#define unlock_bkl() spin_unlock(&kernel_flag) +#ifdef CONFIG_SMP +#define kernel_locked() spin_is_locked(&kernel_flag) +#elif CONFIG_PREEMPT +#define kernel_locked() preempt_is_disabled() +#else /* neither */ +#define kernel_locked() 1 +#endif + +/* + * Release global kernel lock and global interrupt lock + */ +#define release_kernel_lock(task, cpu) \ +do { \ + if (task->lock_depth >= 0) \ + spin_unlock(&kernel_flag); \ + release_irqlock(cpu); \ + __sti(); \ +} while (0) + +/* + * Re-acquire the kernel lock + */ +#define reacquire_kernel_lock(task) \ +do { \ + if (task->lock_depth >= 0) \ + spin_lock(&kernel_flag); \ +} while (0) + +/* + * Getting the big kernel lock. + * + * This cannot happen asynchronously, + * so we only need to worry about other + * CPU's. + */ +static __inline__ void lock_kernel(void) +{ +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; #else -#error "We do not support SMP on SH" -#endif /* CONFIG_SMP */ + if (!++current->lock_depth) + spin_lock(&kernel_flag); +#endif +} + +static __inline__ void unlock_kernel(void) +{ + if (current->lock_depth < 0) + BUG(); + if (--current->lock_depth < 0) + spin_unlock(&kernel_flag); +} +#endif /* CONFIG_SMP || CONFIG_PREEMPT */ #endif /* __ASM_SH_SMPLOCK_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-sh/softirq.h linux-2.4.17-rc2-wli2/include/asm-sh/softirq.h --- linux-2.4.17-rc2-virgin/include/asm-sh/softirq.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/asm-sh/softirq.h Tue Dec 18 22:28:42 2001 @@ -6,6 +6,7 @@ #define local_bh_disable() \ do { \ + preempt_disable(); \ local_bh_count(smp_processor_id())++; \ barrier(); \ } while (0) @@ -14,6 +15,7 @@ do { \ barrier(); \ local_bh_count(smp_processor_id())--; \ + preempt_enable(); \ } while (0) #define local_bh_enable() \ @@ -22,6 +24,7 @@ if (!--local_bh_count(smp_processor_id()) \ && softirq_pending(smp_processor_id())) { \ do_softirq(); \ + preempt_enable(); \ } \ } while (0) diff -urN linux-2.4.17-rc2-virgin/include/asm-sparc/bootmem.h linux-2.4.17-rc2-wli2/include/asm-sparc/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-sparc/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-sparc/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,11 @@ +/* + * include/asm-sparc/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * 32-bit SPARC generally doesn't feature discontiguous + * memory, so this value of NR_SEGMENTS likely to be good. + * NR_SEGMENTS will be eliminated once page stealing in + * the bootmem allocator is in place. + */ + +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-sparc/ide.h linux-2.4.17-rc2-wli2/include/asm-sparc/ide.h --- linux-2.4.17-rc2-virgin/include/asm-sparc/ide.h Mon Jun 19 17:59:39 2000 +++ linux-2.4.17-rc2-wli2/include/asm-sparc/ide.h Thu Dec 20 19:49:13 2001 @@ -84,6 +84,18 @@ } b; } select_t; +typedef union { + unsigned int all : 8; /* all of the bits together */ + struct { + unsigned int HOB : 1; /* 48-bit address ordering */ + unsigned int reserved456: 3; + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned int SRST : 1; /* host soft reset bit */ + unsigned int nIEN : 1; /* device INTRQ to host * + unsigned int bit0 : 1; + } b; +} control_t; + static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long flags, const char *name, void *devid) diff -urN linux-2.4.17-rc2-virgin/include/asm-sparc/kdb.h linux-2.4.17-rc2-wli2/include/asm-sparc/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-sparc/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-sparc/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for sparc. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-sparc64/bootmem.h linux-2.4.17-rc2-wli2/include/asm-sparc64/bootmem.h --- linux-2.4.17-rc2-virgin/include/asm-sparc64/bootmem.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-sparc64/bootmem.h Tue Dec 18 22:28:42 2001 @@ -0,0 +1,10 @@ +/* + * include/asm-sparc64/bootmem.h + * (C) Nov 2001 William Irwin, IBM + * + * 64-bit SPARC may need a larger NR_SEGMENTS than this + * but it's not clear what a better value would be. + * NR_SEGMENTS will be eliminated once page stealing + * in the bootmem allocator is in place. + */ +#include diff -urN linux-2.4.17-rc2-virgin/include/asm-sparc64/ide.h linux-2.4.17-rc2-wli2/include/asm-sparc64/ide.h --- linux-2.4.17-rc2-virgin/include/asm-sparc64/ide.h Mon Oct 1 09:19:56 2001 +++ linux-2.4.17-rc2-wli2/include/asm-sparc64/ide.h Thu Dec 20 19:49:13 2001 @@ -80,6 +80,18 @@ } b; } select_t; +typedef union { + unsigned int all : 8; /* all of the bits together */ + struct { + unsigned int HOB : 1; /* 48-bit address ordering */ + unsigned int reserved456: 3; + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned int SRST : 1; /* host soft reset bit */ + unsigned int nIEN : 1; /* device INTRQ to host * + unsigned int bit0 : 1; + } b; +} control_t; + static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long flags, const char *name, void *devid) diff -urN linux-2.4.17-rc2-virgin/include/asm-sparc64/kdb.h linux-2.4.17-rc2-wli2/include/asm-sparc64/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-sparc64/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-sparc64/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for sparc64. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/asm-um/kdb.h linux-2.4.17-rc2-wli2/include/asm-um/kdb.h --- linux-2.4.17-rc2-virgin/include/asm-um/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/asm-um/kdb.h Tue Dec 18 22:21:49 2001 @@ -0,0 +1,10 @@ +/* + * Dummy include/asm/kdb.h for uml. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H +#define KDB_ENTER() +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; +typedef unsigned long kdb_machreg_t; +#endif /* ASM_KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/blkcdb.h linux-2.4.17-rc2-wli2/include/linux/blkcdb.h --- linux-2.4.17-rc2-virgin/include/linux/blkcdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/blkcdb.h Thu Dec 20 19:49:13 2001 @@ -0,0 +1,101 @@ +/* + * 2.5 Command Descriptor Block (CDB) Block Pre-Handler. + * + * Copyright (C) 2001 Andre Hedrick + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#ifndef _LINUX_BLKCDB_H +#define _LINUX_BLKCDB_H + +typedef struct cdb_list { +#if 0 + unsigned char cdb_0; + unsigned char cdb_1; + unsigned char cdb_2; + unsigned char cdb_3; + unsigned char cdb_4; + unsigned char cdb_5; + unsigned char cdb_6; + unsigned char cdb_7; + unsigned char cdb_8; + unsigned char cdb_9; + unsigned char cdb_10; + unsigned char cdb_11; + unsigned char cdb_12; + unsigned char cdb_13; + unsigned char cdb_14; + unsigned char cdb_15; +#else + unsigned char cdb_regs[16]; +#endif +} cdb_list_t; + +#if 0 + +typedef cdb_list_t * (queue_proc) (kdev_t dev); + +request_queue_t *ide_get_queue (kdev_t dev) +{ + ide_hwif_t *hwif = (ide_hwif_t *)blk_dev[MAJOR(dev)].data; + + return &hwif->drives[DEVICE_NR(dev) & 1].queue; +} + +static request_queue_t *sd_find_queue(kdev_t dev) +{ + Scsi_Disk *dpnt; + int target; + target = DEVICE_NR(dev); + + dpnt = &rscsi_disks[target]; + if (!dpnt) + return NULL; /* No such device */ + return &dpnt->device->request_queue; +} + +prebuilder: NULL, +block_device_operations +struct block_device { + +void do_ide_request(request_queue_t *q) + +ide_do_request + +typedef cdb_list_t (request_cdb_proc) (request_queue_t *q); + +typedef cdb_list_t (request_cdb_proc) (request_queue_t *q); +typedef void (request_fn_proc) (request_queue_t *q); + +srb + +switch (SCpnt->request.cmd) +SCpnt->cmnd[0] = WRITE_6/READ_6; +SCpnt->cmnd[1] = (SCpnt->device->scsi_level <= SCSI_2) ? + ((SCpnt->lun << 5) & 0xe0) : 0; +SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; +SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; +SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; +SCpnt->cmnd[5] = (unsigned char) block & 0xff; +SCpnt->cmnd[6] = 0; +SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; +SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; +SCpnt->cmnd[9] = 0; + +#endif + +#endif /* _LINUX_BLKCDB_H */ + diff -urN linux-2.4.17-rc2-virgin/include/linux/blkdev.h linux-2.4.17-rc2-wli2/include/linux/blkdev.h --- linux-2.4.17-rc2-virgin/include/linux/blkdev.h Mon Nov 26 05:29:17 2001 +++ linux-2.4.17-rc2-wli2/include/linux/blkdev.h Thu Dec 20 19:49:13 2001 @@ -6,6 +6,7 @@ #include #include #include +/* #include */ struct request_queue; typedef struct request_queue request_queue_t; @@ -37,6 +38,7 @@ unsigned int nr_hw_segments; unsigned long current_nr_sectors; void * special; +/* void * cdb; */ char * buffer; struct completion * waiting; struct buffer_head * bh; diff -urN linux-2.4.17-rc2-virgin/include/linux/bootmem.h linux-2.4.17-rc2-wli2/include/linux/bootmem.h --- linux-2.4.17-rc2-virgin/include/linux/bootmem.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/bootmem.h Thu Dec 20 18:56:26 2001 @@ -1,5 +1,6 @@ /* * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 + * Segment tree-based memory reservation system, William Irwin, IBM, Oct 2001 */ #ifndef _LINUX_BOOTMEM_H #define _LINUX_BOOTMEM_H @@ -9,6 +10,8 @@ #include #include #include +#include +#include /* * simple boot-time physical memory area allocator. @@ -25,8 +28,8 @@ unsigned long node_boot_start; unsigned long node_low_pfn; void *node_bootmem_map; - unsigned long last_offset; - unsigned long last_pos; + segment_tree_root_t segment_tree; + segment_buf_t *free_segments; } bootmem_data_t; extern unsigned long __init bootmem_bootmap_pages (unsigned long); diff -urN linux-2.4.17-rc2-virgin/include/linux/brlock.h linux-2.4.17-rc2-wli2/include/linux/brlock.h --- linux-2.4.17-rc2-virgin/include/linux/brlock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/brlock.h Thu Dec 20 18:55:45 2001 @@ -171,11 +171,11 @@ } #else -# define br_read_lock(idx) ((void)(idx)) -# define br_read_unlock(idx) ((void)(idx)) -# define br_write_lock(idx) ((void)(idx)) -# define br_write_unlock(idx) ((void)(idx)) -#endif +# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); }) +# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); }) +#endif /* CONFIG_SMP */ /* * Now enumerate all of the possible sw/hw IRQ protected diff -urN linux-2.4.17-rc2-virgin/include/linux/dcache.h linux-2.4.17-rc2-wli2/include/linux/dcache.h --- linux-2.4.17-rc2-virgin/include/linux/dcache.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/dcache.h Thu Dec 20 18:55:13 2001 @@ -36,17 +36,58 @@ }; extern struct dentry_stat_t dentry_stat; -/* Name hashing routines. Initial hash value */ -/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ -#define init_name_hash() 0 +/* + * Fowler, Noll, & Vo hash function + * -- wli + */ + +/* + * Initial hash value for Fowler, Noll, & Vo hash function. + * FreeBSD appears to use 33554467UL decimal / 0x2000023UL hex. + * Sources I see elsewhere (Noll's webpage) describe using an offset + * basis of 2166136261UL decimal / 0x811C9DC5UL hex. + * -- wli + */ +#define init_name_hash() 0x811C9DC5UL -/* partial hash update function. Assume roughly 4 bits per character */ -static __inline__ unsigned long partial_name_hash(unsigned long c, unsigned long prevhash) +/* + * This is a multiplicative hash function using the prime 16777619 + * The Fowler, Noll, and Vo hash function is rated the best in + * string hashing benchmarks published on gcc-patches and NetBSD + * mailing lists. + * -- wli + */ +static __inline__ unsigned long partial_name_hash(unsigned long c, + unsigned long prevhash) { - return (prevhash + (c << 4) + (c >> 4)) * 11; + /* + * A multiplicative definition would be: + * --wli + */ + return (prevhash * 0x01000193UL) ^ c; + + /* + * If I were to get overcomplicated, I would decode things + * for each bit of 0x01000193UL and then expand to the shift + * and add operations explicitly in order to avoid reliance on + * the compiler for this. + * The register pressure generated by this may not be a win + * on i386 vs. actual multiplication, but results remain + * to be seen. + * + * prevhash += (prevhash << 24) + * + (prevhash << 8) + * + (prevhash << 7) + * + (prevhash << 4) + * + (prevhash << 1); + * return prevhash ^ c; + */ } -/* Finally: cut down the number of bits to a int value (and try to avoid losing bits) */ +/* + * Finally: cut down the number of bits to a int value (and try to + * avoid losing bits) + */ static __inline__ unsigned long end_name_hash(unsigned long hash) { return (unsigned int) hash; @@ -126,31 +167,6 @@ extern spinlock_t dcache_lock; -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent - * dentry hashes, so that it won't be found through - * a VFS lookup any more. Note that this is different - * from deleting the dentry - d_delete will try to - * mark the dentry negative if possible, giving a - * successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants - * to invalidate a dentry for some reason (NFS - * timeouts or autofs deletes). - */ - -static __inline__ void d_drop(struct dentry * dentry) -{ - spin_lock(&dcache_lock); - list_del(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_hash); - spin_unlock(&dcache_lock); -} - static __inline__ int dname_external(struct dentry *d) { return d->d_name.name != d->d_iname; @@ -275,3 +291,34 @@ #endif /* __KERNEL__ */ #endif /* __LINUX_DCACHE_H */ + +#if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define __LINUX_DCACHE_H_INLINES + +#ifdef __KERNEL__ +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent + * dentry hashes, so that it won't be found through + * a VFS lookup any more. Note that this is different + * from deleting the dentry - d_delete will try to + * mark the dentry negative if possible, giving a + * successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants + * to invalidate a dentry for some reason (NFS + * timeouts or autofs deletes). + */ + +static __inline__ void d_drop(struct dentry * dentry) +{ + spin_lock(&dcache_lock); + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); + spin_unlock(&dcache_lock); +} +#endif +#endif diff -urN linux-2.4.17-rc2-virgin/include/linux/dis-asm.h linux-2.4.17-rc2-wli2/include/linux/dis-asm.h --- linux-2.4.17-rc2-virgin/include/linux/dis-asm.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/dis-asm.h Thu Dec 20 19:05:19 2001 @@ -0,0 +1,305 @@ +/* Interface between the opcode library and its callers. + Written by Cygnus Support, 1993. + + The opcode library (libopcodes.a) provides instruction decoders for + a large variety of instruction sets, callable with an identical + interface, for making instruction-processing programs more independent + of the instruction set being processed. */ + +/* Hacked by Scott Lurndal at SGI (02/1999) for linux kernel debugger */ +/* Upgraded to cygnus CVS Keith Owens 30 Oct 2000 */ + +#ifndef DIS_ASM_H +#define DIS_ASM_H + +#ifdef __cplusplus +extern "C" { +#endif + + /* + * Misc definitions + */ +#define PARAMS(x) x +#define PTR void * +#define FILE int +#if !defined(NULL) +#define NULL 0 +#endif + +#define abort() dis_abort(__LINE__) + +static inline void +dis_abort(int line) +{ + panic("Aborting disassembler @ line %d\n", line); +} + +#include +#include +#define xstrdup(string) ({ char *res = kdb_strdup(string, GFP_ATOMIC); if (!res) BUG(); res; }) +#define xmalloc(size) ({ void *res = kmalloc(size, GFP_ATOMIC); if (!res) BUG(); res; }) +#define free(address) kfree(address) + +#include + +typedef int (*fprintf_ftype) PARAMS((PTR, const char*, ...)); + +enum dis_insn_type { + dis_noninsn, /* Not a valid instruction */ + dis_nonbranch, /* Not a branch instruction */ + dis_branch, /* Unconditional branch */ + dis_condbranch, /* Conditional branch */ + dis_jsr, /* Jump to subroutine */ + dis_condjsr, /* Conditional jump to subroutine */ + dis_dref, /* Data reference instruction */ + dis_dref2 /* Two data references in instruction */ +}; + +/* This struct is passed into the instruction decoding routine, + and is passed back out into each callback. The various fields are used + for conveying information from your main routine into your callbacks, + for passing information into the instruction decoders (such as the + addresses of the callback functions), or for passing information + back from the instruction decoders to their callers. + + It must be initialized before it is first passed; this can be done + by hand, or using one of the initialization macros below. */ + +typedef struct disassemble_info { + fprintf_ftype fprintf_func; + fprintf_ftype fprintf_dummy; + PTR stream; + PTR application_data; + + /* Target description. We could replace this with a pointer to the bfd, + but that would require one. There currently isn't any such requirement + so to avoid introducing one we record these explicitly. */ + /* The bfd_flavour. This can be bfd_target_unknown_flavour. */ + enum bfd_flavour flavour; + /* The bfd_arch value. */ + enum bfd_architecture arch; + /* The bfd_mach value. */ + unsigned long mach; + /* Endianness (for bi-endian cpus). Mono-endian cpus can ignore this. */ + enum bfd_endian endian; + + /* An array of pointers to symbols either at the location being disassembled + or at the start of the function being disassembled. The array is sorted + so that the first symbol is intended to be the one used. The others are + present for any misc. purposes. This is not set reliably, but if it is + not NULL, it is correct. */ + asymbol **symbols; + /* Number of symbols in array. */ + int num_symbols; + + /* For use by the disassembler. + The top 16 bits are reserved for public use (and are documented here). + The bottom 16 bits are for the internal use of the disassembler. */ + unsigned long flags; +#define INSN_HAS_RELOC 0x80000000 + PTR private_data; + + /* Function used to get bytes to disassemble. MEMADDR is the + address of the stuff to be disassembled, MYADDR is the address to + put the bytes in, and LENGTH is the number of bytes to read. + INFO is a pointer to this struct. + Returns an errno value or 0 for success. */ + int (*read_memory_func) + PARAMS ((bfd_vma memaddr, bfd_byte *myaddr, unsigned int length, + struct disassemble_info *info)); + + /* Function which should be called if we get an error that we can't + recover from. STATUS is the errno value from read_memory_func and + MEMADDR is the address that we were trying to read. INFO is a + pointer to this struct. */ + void (*memory_error_func) + PARAMS ((int status, bfd_vma memaddr, struct disassemble_info *info)); + + /* Function called to print ADDR. */ + void (*print_address_func) + PARAMS ((bfd_vma addr, struct disassemble_info *info)); + + /* Function called to determine if there is a symbol at the given ADDR. + If there is, the function returns 1, otherwise it returns 0. + This is used by ports which support an overlay manager where + the overlay number is held in the top part of an address. In + some circumstances we want to include the overlay number in the + address, (normally because there is a symbol associated with + that address), but sometimes we want to mask out the overlay bits. */ + int (* symbol_at_address_func) + PARAMS ((bfd_vma addr, struct disassemble_info * info)); + + /* These are for buffer_read_memory. */ + bfd_byte *buffer; + bfd_vma buffer_vma; + unsigned int buffer_length; + + /* This variable may be set by the instruction decoder. It suggests + the number of bytes objdump should display on a single line. If + the instruction decoder sets this, it should always set it to + the same value in order to get reasonable looking output. */ + int bytes_per_line; + + /* the next two variables control the way objdump displays the raw data */ + /* For example, if bytes_per_line is 8 and bytes_per_chunk is 4, the */ + /* output will look like this: + 00: 00000000 00000000 + with the chunks displayed according to "display_endian". */ + int bytes_per_chunk; + enum bfd_endian display_endian; + + /* Number of octets per incremented target address + Normally one, but some DSPs have byte sizes of 16 or 32 bits + */ + unsigned int octets_per_byte; + + /* Results from instruction decoders. Not all decoders yet support + this information. This info is set each time an instruction is + decoded, and is only valid for the last such instruction. + + To determine whether this decoder supports this information, set + insn_info_valid to 0, decode an instruction, then check it. */ + + char insn_info_valid; /* Branch info has been set. */ + char branch_delay_insns; /* How many sequential insn's will run before + a branch takes effect. (0 = normal) */ + char data_size; /* Size of data reference in insn, in bytes */ + enum dis_insn_type insn_type; /* Type of instruction */ + bfd_vma target; /* Target address of branch or dref, if known; + zero if unknown. */ + bfd_vma target2; /* Second target address for dref2 */ + + /* Command line options specific to the target disassembler. */ + char * disassembler_options; + +} disassemble_info; + + +/* Standard disassemblers. Disassemble one instruction at the given + target address. Return number of bytes processed. */ +typedef int (*disassembler_ftype) + PARAMS((bfd_vma, disassemble_info *)); + +extern int print_insn_big_mips PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_little_mips PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_i386_att PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_i386_intel PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_ia64 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_i370 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_m68hc11 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_m68hc12 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_m68k PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_z8001 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_z8002 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_h8300 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_h8300h PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_h8300s PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_h8500 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_alpha PARAMS ((bfd_vma, disassemble_info*)); +extern disassembler_ftype arc_get_disassembler PARAMS ((int, int)); +extern int print_insn_big_arm PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_little_arm PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_sparc PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_big_a29k PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_little_a29k PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_i860 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_i960 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_sh PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_shl PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_hppa PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_fr30 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_m32r PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_m88k PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_mcore PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_mn10200 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_mn10300 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_ns32k PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_big_powerpc PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_little_powerpc PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_rs6000 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_w65 PARAMS ((bfd_vma, disassemble_info*)); +extern disassembler_ftype cris_get_disassembler PARAMS ((bfd *)); +extern int print_insn_d10v PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_d30v PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_v850 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_tic30 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_vax PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_tic54x PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_tic80 PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_pj PARAMS ((bfd_vma, disassemble_info*)); +extern int print_insn_avr PARAMS ((bfd_vma, disassemble_info*)); + +extern void print_arm_disassembler_options PARAMS ((FILE *)); +extern void parse_arm_disassembler_option PARAMS ((char *)); +extern int get_arm_regname_num_options PARAMS ((void)); +extern int set_arm_regname_option PARAMS ((int)); +extern int get_arm_regnames PARAMS ((int, const char **, const char **, const char ***)); + +/* Fetch the disassembler for a given BFD, if that support is available. */ +extern disassembler_ftype disassembler PARAMS ((bfd *)); + +/* Document any target specific options available from the disassembler. */ +extern void disassembler_usage PARAMS ((FILE *)); + + +/* This block of definitions is for particular callers who read instructions + into a buffer before calling the instruction decoder. */ + +/* Here is a function which callers may wish to use for read_memory_func. + It gets bytes from a buffer. */ +extern int buffer_read_memory + PARAMS ((bfd_vma, bfd_byte *, unsigned int, struct disassemble_info *)); + +/* This function goes with buffer_read_memory. + It prints a message using info->fprintf_func and info->stream. */ +extern void perror_memory PARAMS ((int, bfd_vma, struct disassemble_info *)); + + +/* Just print the address in hex. This is included for completeness even + though both GDB and objdump provide their own (to print symbolic + addresses). */ +extern void generic_print_address + PARAMS ((bfd_vma, struct disassemble_info *)); + +/* Always true. */ +extern int generic_symbol_at_address + PARAMS ((bfd_vma, struct disassemble_info *)); + +/* Macro to initialize a disassemble_info struct. This should be called + by all applications creating such a struct. */ +#define INIT_DISASSEMBLE_INFO(INFO, STREAM, FPRINTF_FUNC) \ + (INFO).flavour = bfd_target_unknown_flavour, \ + (INFO).arch = bfd_arch_unknown, \ + (INFO).mach = 0, \ + (INFO).endian = BFD_ENDIAN_UNKNOWN, \ + (INFO).octets_per_byte = 1, \ + INIT_DISASSEMBLE_INFO_NO_ARCH(INFO, STREAM, FPRINTF_FUNC) + +/* Call this macro to initialize only the internal variables for the + disassembler. Architecture dependent things such as byte order, or machine + variant are not touched by this macro. This makes things much easier for + GDB which must initialize these things separately. */ + +#define INIT_DISASSEMBLE_INFO_NO_ARCH(INFO, STREAM, FPRINTF_FUNC) \ + (INFO).fprintf_func = (fprintf_ftype)(FPRINTF_FUNC), \ + (INFO).stream = (PTR)(STREAM), \ + (INFO).symbols = NULL, \ + (INFO).num_symbols = 0, \ + (INFO).buffer = NULL, \ + (INFO).buffer_vma = 0, \ + (INFO).buffer_length = 0, \ + (INFO).read_memory_func = buffer_read_memory, \ + (INFO).memory_error_func = perror_memory, \ + (INFO).print_address_func = generic_print_address, \ + (INFO).symbol_at_address_func = generic_symbol_at_address, \ + (INFO).flags = 0, \ + (INFO).bytes_per_line = 0, \ + (INFO).bytes_per_chunk = 0, \ + (INFO).display_endian = BFD_ENDIAN_UNKNOWN, \ + (INFO).insn_info_valid = 0 + +#ifdef __cplusplus +}; +#endif + +#endif /* ! defined (DIS_ASM_H) */ diff -urN linux-2.4.17-rc2-virgin/include/linux/elevator.h linux-2.4.17-rc2-wli2/include/linux/elevator.h --- linux-2.4.17-rc2-virgin/include/linux/elevator.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/elevator.h Thu Dec 20 17:58:07 2001 @@ -5,8 +5,9 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn)(request_queue_t *, struct request **, + struct list_head *, struct buffer_head *bh, + int rw, int max_sectors, int max_bomb_segments); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); @@ -16,6 +17,7 @@ { int read_latency; int write_latency; + int max_bomb_segments; elevator_merge_fn *elevator_merge_fn; elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; @@ -24,13 +26,13 @@ unsigned int queue_ID; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); -void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); -void elevator_noop_merge_req(struct request *, struct request *); - -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); -void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); -void elevator_linus_merge_req(struct request *, struct request *); +elevator_merge_fn elevator_noop_merge; +elevator_merge_cleanup_fn elevator_noop_merge_cleanup; +elevator_merge_req_fn elevator_noop_merge_req; + +elevator_merge_fn elevator_linus_merge; +elevator_merge_cleanup_fn elevator_linus_merge_cleanup; +elevator_merge_req_fn elevator_linus_merge_req; typedef struct blkelv_ioctl_arg_s { int queue_ID; @@ -54,22 +56,6 @@ #define ELEVATOR_FRONT_MERGE 1 #define ELEVATOR_BACK_MERGE 2 -/* - * This is used in the elevator algorithm. We don't prioritise reads - * over writes any more --- although reads are more time-critical than - * writes, by treating them equally we increase filesystem throughput. - * This turns out to give better overall performance. -- sct - */ -#define IN_ORDER(s1,s2) \ - ((((s1)->rq_dev == (s2)->rq_dev && \ - (s1)->sector < (s2)->sector)) || \ - (s1)->rq_dev < (s2)->rq_dev) - -#define BHRQ_IN_ORDER(bh, rq) \ - ((((bh)->b_rdev == (rq)->rq_dev && \ - (bh)->b_rsector < (rq)->sector)) || \ - (bh)->b_rdev < (rq)->rq_dev) - static inline int elevator_request_latency(elevator_t * elevator, int rw) { int latency; @@ -85,7 +71,7 @@ ((elevator_t) { \ 0, /* read_latency */ \ 0, /* write_latency */ \ - \ + 0, /* max_bomb_segments */ \ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ @@ -95,7 +81,7 @@ ((elevator_t) { \ 8192, /* read passovers */ \ 16384, /* write passovers */ \ - \ + 6, /* max_bomb_segments */ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ diff -urN linux-2.4.17-rc2-virgin/include/linux/fs.h linux-2.4.17-rc2-wli2/include/linux/fs.h --- linux-2.4.17-rc2-virgin/include/linux/fs.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/fs.h Thu Dec 20 18:55:13 2001 @@ -283,7 +283,7 @@ extern void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); -#define touch_buffer(bh) mark_page_accessed(bh->b_page) +#define touch_buffer(bh) touch_page(bh->b_page) #include diff -urN linux-2.4.17-rc2-virgin/include/linux/fs_struct.h linux-2.4.17-rc2-wli2/include/linux/fs_struct.h --- linux-2.4.17-rc2-virgin/include/linux/fs_struct.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/fs_struct.h Tue Dec 18 22:28:42 2001 @@ -20,6 +20,15 @@ extern void exit_fs(struct task_struct *); extern void set_fs_altroot(void); +struct fs_struct *copy_fs_struct(struct fs_struct *old); +void put_fs_struct(struct fs_struct *fs); + +#endif +#endif + +#if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_FS_STRUCT_H_INLINES +#ifdef __KERNEL__ /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. @@ -65,9 +74,5 @@ mntput(old_pwdmnt); } } - -struct fs_struct *copy_fs_struct(struct fs_struct *old); -void put_fs_struct(struct fs_struct *fs); - #endif #endif diff -urN linux-2.4.17-rc2-virgin/include/linux/hdreg.h linux-2.4.17-rc2-wli2/include/linux/hdreg.h --- linux-2.4.17-rc2-virgin/include/linux/hdreg.h Thu Nov 22 11:46:18 2001 +++ linux-2.4.17-rc2-wli2/include/linux/hdreg.h Thu Dec 20 19:49:13 2001 @@ -6,104 +6,269 @@ * Various sources. */ -#define HD_IRQ 14 /* the standard disk interrupt */ +#define HD_IRQ 14 /* the standard disk interrupt */ /* ide.c has its own port definitions in "ide.h" */ /* Hd controller regs. Ref: IBM AT Bios-listing */ -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ +#define HD_DATA 0x1f0 /* _CTL when writing */ +#define HD_ERROR 0x1f1 /* see err-bits */ +#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ +#define HD_SECTOR 0x1f3 /* starting sector */ +#define HD_LCYL 0x1f4 /* starting cylinder */ +#define HD_HCYL 0x1f5 /* high byte of starting cyl */ +#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ +#define HD_STATUS 0x1f7 /* see status-bits */ +#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ +#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ +#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ +#define HD_CMD 0x3f6 /* used for resets */ +#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ /* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */ /* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Values for HD_COMMAND */ -#define WIN_RESTORE 0x10 -#define WIN_READ 0x20 -#define WIN_WRITE 0x30 -#define WIN_WRITE_VERIFY 0x3C -#define WIN_VERIFY 0x40 -#define WIN_FORMAT 0x50 -#define WIN_INIT 0x60 -#define WIN_SEEK 0x70 -#define WIN_DIAGNOSE 0x90 -#define WIN_SPECIFY 0x91 /* set drive geometry translation */ -#define WIN_IDLEIMMEDIATE 0xE1 /* force drive to become "ready" */ -#define WIN_SETIDLE1 0xE3 -#define WIN_SETIDLE2 0x97 - -#define WIN_STANDBYNOW1 0xE0 -#define WIN_STANDBYNOW2 0x94 -#define WIN_SLEEPNOW1 0xE6 -#define WIN_SLEEPNOW2 0x99 -#define WIN_CHECKPOWERMODE1 0xE5 -#define WIN_CHECKPOWERMODE2 0x98 - -#define WIN_DOORLOCK 0xDE /* lock door on removable drives */ -#define WIN_DOORUNLOCK 0xDF /* unlock door on removable drives */ - -#define WIN_MULTREAD 0xC4 /* read sectors using multiple mode */ -#define WIN_MULTWRITE 0xC5 /* write sectors using multiple mode */ -#define WIN_SETMULT 0xC6 /* enable/disable multiple mode */ -#define WIN_IDENTIFY 0xEC /* ask drive to identify itself */ -#define WIN_IDENTIFY_DMA 0xEE /* same as WIN_IDENTIFY, but DMA */ -#define WIN_SETFEATURES 0xEF /* set special drive features */ -#define WIN_READDMA 0xC8 /* read sectors using DMA transfers */ -#define WIN_WRITEDMA 0xCA /* write sectors using DMA transfers */ - -#define WIN_QUEUED_SERVICE 0xA2 /* */ -#define WIN_READDMA_QUEUED 0xC7 /* read sectors using Queued DMA transfers */ -#define WIN_WRITEDMA_QUEUED 0xCC /* write sectors using Queued DMA transfers */ - -#define WIN_READ_BUFFER 0xE4 /* force read only 1 sector */ -#define WIN_WRITE_BUFFER 0xE8 /* force write only 1 sector */ - -#define WIN_SMART 0xB0 /* self-monitoring and reporting */ - -/* Additional drive command codes used by ATAPI devices. */ -#define WIN_PIDENTIFY 0xA1 /* identify ATAPI device */ -#define WIN_SRST 0x08 /* ATAPI soft reset command */ -#define WIN_PACKETCMD 0xA0 /* Send a packet command. */ +#define ERR_STAT 0x01 +#define INDEX_STAT 0x02 +#define ECC_STAT 0x04 /* Corrected error */ +#define DRQ_STAT 0x08 +#define SEEK_STAT 0x10 +#define WRERR_STAT 0x20 +#define READY_STAT 0x40 +#define BUSY_STAT 0x80 -#define DISABLE_SEAGATE 0xFB -#define EXABYTE_ENABLE_NEST 0xF0 +/* Bits for HD_ERROR */ +#define MARK_ERR 0x01 /* Bad address mark */ +#define TRK0_ERR 0x02 /* couldn't find track 0 */ +#define ABRT_ERR 0x04 /* Command aborted */ +#define MCR_ERR 0x08 /* media change request */ +#define ID_ERR 0x10 /* ID field not found */ +#define MC_ERR 0x20 /* media changed */ +#define ECC_ERR 0x40 /* Uncorrectable ECC error */ +#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ +#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ + +/* + * Command Header sizes for IOCTL commands + * HDIO_DRIVE_CMD, HDIO_DRIVE_TASK, and HDIO_DRIVE_TASKFILE + */ + +#if 0 +#include +typedef ide_ioreg_t task_ioreg_t; +#else +typedef unsigned char task_ioreg_t; +#endif + +#define HDIO_DRIVE_CMD_HDR_SIZE 4*sizeof(task_ioreg_t) +#define HDIO_DRIVE_TASK_HDR_SIZE 8*sizeof(task_ioreg_t) +#define HDIO_DRIVE_HOB_HDR_SIZE 8*sizeof(task_ioreg_t) + +#define IDE_DRIVE_TASK_INVALID -1 +#define IDE_DRIVE_TASK_NO_DATA 0 +#define IDE_DRIVE_TASK_SET_XFER 1 + +#define IDE_DRIVE_TASK_IN 2 + +#define IDE_DRIVE_TASK_OUT 3 +#define IDE_DRIVE_TASK_RAW_WRITE 4 + +struct hd_drive_cmd_hdr { + task_ioreg_t command; + task_ioreg_t sector_number; + task_ioreg_t feature; + task_ioreg_t sector_count; +}; -/* WIN_SMART sub-commands */ +typedef struct hd_drive_task_hdr { + task_ioreg_t data; + task_ioreg_t feature; + task_ioreg_t sector_count; + task_ioreg_t sector_number; + task_ioreg_t low_cylinder; + task_ioreg_t high_cylinder; + task_ioreg_t device_head; + task_ioreg_t command; +} task_struct_t; + +typedef struct hd_drive_hob_hdr { + task_ioreg_t data; + task_ioreg_t feature; + task_ioreg_t sector_count; + task_ioreg_t sector_number; + task_ioreg_t low_cylinder; + task_ioreg_t high_cylinder; + task_ioreg_t device_head; + task_ioreg_t control; +} hob_struct_t; + +typedef union ide_reg_valid_s { + unsigned all : 16; + struct { + unsigned data : 1; + unsigned error_feature : 1; + unsigned sector : 1; + unsigned nsector : 1; + unsigned lcyl : 1; + unsigned hcyl : 1; + unsigned select : 1; + unsigned status_command : 1; + + unsigned data_hob : 1; + unsigned error_feature_hob : 1; + unsigned sector_hob : 1; + unsigned nsector_hob : 1; + unsigned lcyl_hob : 1; + unsigned hcyl_hob : 1; + unsigned select_hob : 1; + unsigned control_hob : 1; + } b; +} ide_reg_valid_t; + +/* + * Define standard taskfile in/out register + */ +#define IDE_TASKFILE_STD_OUT_FLAGS 0xFE +#define IDE_TASKFILE_STD_IN_FLAGS 0xFE +#define IDE_HOB_STD_OUT_FLAGS 0xC0 +#define IDE_HOB_STD_IN_FLAGS 0xC0 + +typedef struct ide_task_request_s { + task_ioreg_t io_ports[8]; + task_ioreg_t hob_ports[8]; + ide_reg_valid_t out_flags; + ide_reg_valid_t in_flags; + int data_phase; + int req_cmd; + unsigned long out_size; + unsigned long in_size; +} ide_task_request_t; + +typedef struct ide_ioctl_request_s { + ide_task_request_t *task_request; + unsigned char *out_buffer; + unsigned char *in_buffer; +} ide_ioctl_request_t; + +#define TASKFILE_INVALID 0x7fff +#define TASKFILE_48 0x8000 + +#define TASKFILE_NO_DATA 0x0000 + +#define TASKFILE_IN 0x0001 +#define TASKFILE_MULTI_IN 0x0002 + +#define TASKFILE_OUT 0x0004 +#define TASKFILE_MULTI_OUT 0x0008 +#define TASKFILE_IN_OUT 0x0010 + +#define TASKFILE_IN_DMA 0x0020 +#define TASKFILE_OUT_DMA 0x0040 +#define TASKFILE_IN_DMAQ 0x0080 +#define TASKFILE_OUT_DMAQ 0x0100 + +#define TASKFILE_P_IN 0x0200 +#define TASKFILE_P_OUT 0x0400 +#define TASKFILE_P_IN_DMA 0x0800 +#define TASKFILE_P_OUT_DMA 0x1000 +#define TASKFILE_P_IN_DMAQ 0x2000 +#define TASKFILE_P_OUT_DMAQ 0x4000 + +/* ATA/ATAPI Commands pre T13 Spec */ +#define WIN_NOP 0x00 +#define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code */ +#define WIN_SRST 0x08 /* ATAPI soft reset command */ +#define WIN_DEVICE_RESET 0x08 +#define WIN_RESTORE 0x10 +#define WIN_READ 0x20 /* 28-Bit */ +#define WIN_READ_EXT 0x24 /* 48-Bit */ +#define WIN_READDMA_EXT 0x25 /* 48-Bit */ +#define WIN_READDMA_QUEUED_EXT 0x26 /* 48-Bit */ +#define WIN_READ_NATIVE_MAX_EXT 0x27 /* 48-Bit */ +#define WIN_MULTREAD_EXT 0x29 /* 48-Bit */ +#define WIN_WRITE 0x30 /* 28-Bit */ +#define WIN_WRITE_EXT 0x34 /* 48-Bit */ +#define WIN_WRITEDMA_EXT 0x35 /* 48-Bit */ +#define WIN_WRITEDMA_QUEUED_EXT 0x36 /* 48-Bit */ +#define WIN_SET_MAX_EXT 0x37 /* 48-Bit */ +#define CFA_WRITE_SECT_WO_ERASE 0x38 /* CFA Write Sectors without erase */ +#define WIN_MULTWRITE_EXT 0x39 /* 48-Bit */ +#define WIN_WRITE_VERIFY 0x3C /* 28-Bit */ +#define WIN_VERIFY 0x40 /* 28-Bit - Read Verify Sectors */ +#define WIN_VERIFY_EXT 0x42 /* 48-Bit */ +#define WIN_FORMAT 0x50 +#define WIN_INIT 0x60 +#define WIN_SEEK 0x70 +#define CFA_TRANSLATE_SECTOR 0x87 /* CFA Translate Sector */ +#define WIN_DIAGNOSE 0x90 +#define WIN_SPECIFY 0x91 /* set drive geometry translation */ +#define WIN_DOWNLOAD_MICROCODE 0x92 +#define WIN_STANDBYNOW2 0x94 +#define WIN_SETIDLE2 0x97 +#define WIN_CHECKPOWERMODE2 0x98 +#define WIN_SLEEPNOW2 0x99 +#define WIN_PACKETCMD 0xA0 /* Send a packet command. */ +#define WIN_PIDENTIFY 0xA1 /* identify ATAPI device */ +#define WIN_QUEUED_SERVICE 0xA2 +#define WIN_SMART 0xB0 /* self-monitoring and reporting */ +#define CFA_ERASE_SECTORS 0xC0 +#define WIN_MULTREAD 0xC4 /* read sectors using multiple mode*/ +#define WIN_MULTWRITE 0xC5 /* write sectors using multiple mode */ +#define WIN_SETMULT 0xC6 /* enable/disable multiple mode */ +#define WIN_READDMA_QUEUED 0xC7 /* read sectors using Queued DMA transfers */ +#define WIN_READDMA 0xC8 /* read sectors using DMA transfers */ +#define WIN_WRITEDMA 0xCA /* write sectors using DMA transfers */ +#define WIN_WRITEDMA_QUEUED 0xCC /* write sectors using Queued DMA transfers */ +#define CFA_WRITE_MULTI_WO_ERASE 0xCD /* CFA Write multiple without erase */ +#define WIN_GETMEDIASTATUS 0xDA +#define WIN_DOORLOCK 0xDE /* lock door on removable drives */ +#define WIN_DOORUNLOCK 0xDF /* unlock door on removable drives */ +#define WIN_STANDBYNOW1 0xE0 +#define WIN_IDLEIMMEDIATE 0xE1 /* force drive to become "ready" */ +#define WIN_STANDBY 0xE2 /* Set device in Standby Mode */ +#define WIN_SETIDLE1 0xE3 +#define WIN_READ_BUFFER 0xE4 /* force read only 1 sector */ +#define WIN_CHECKPOWERMODE1 0xE5 +#define WIN_SLEEPNOW1 0xE6 +#define WIN_FLUSH_CACHE 0xE7 +#define WIN_WRITE_BUFFER 0xE8 /* force write only 1 sector */ +#define WIN_FLUSH_CACHE_EXT 0xEA /* 48-Bit */ +#define WIN_IDENTIFY 0xEC /* ask drive to identify itself */ +#define WIN_MEDIAEJECT 0xED +#define WIN_IDENTIFY_DMA 0xEE /* same as WIN_IDENTIFY, but DMA */ +#define WIN_SETFEATURES 0xEF /* set special drive features */ +#define EXABYTE_ENABLE_NEST 0xF0 +#define WIN_SECURITY_SET_PASS 0xF1 +#define WIN_SECURITY_UNLOCK 0xF2 +#define WIN_SECURITY_ERASE_PREPARE 0xF3 +#define WIN_SECURITY_ERASE_UNIT 0xF4 +#define WIN_SECURITY_FREEZE_LOCK 0xF5 +#define WIN_SECURITY_DISABLE 0xF6 +#define WIN_READ_NATIVE_MAX 0xF8 /* return the native maximum address */ +#define WIN_SET_MAX 0xF9 +#define DISABLE_SEAGATE 0xFB -#define SMART_READ_VALUES 0xd0 -#define SMART_READ_THRESHOLDS 0xd1 -#define SMART_AUTOSAVE 0xd2 -#define SMART_SAVE 0xd3 -#define SMART_IMMEDIATE_OFFLINE 0xd4 -#define SMART_READ_LOG_SECTOR 0xd5 -#define SMART_WRITE_LOG_SECTOR 0xd6 -#define SMART_WRITE_THRESHOLDS 0xd7 -#define SMART_ENABLE 0xd8 -#define SMART_DISABLE 0xd9 -#define SMART_STATUS 0xda -#define SMART_AUTO_OFFLINE 0xdb +/* WIN_SMART sub-commands */ +#define SMART_READ_VALUES 0xD0 +#define SMART_READ_THRESHOLDS 0xD1 +#define SMART_AUTOSAVE 0xD2 +#define SMART_SAVE 0xD3 +#define SMART_IMMEDIATE_OFFLINE 0xD4 +#define SMART_READ_LOG_SECTOR 0xD5 +#define SMART_WRITE_LOG_SECTOR 0xD6 +#define SMART_WRITE_THRESHOLDS 0xD7 +#define SMART_ENABLE 0xD8 +#define SMART_DISABLE 0xD9 +#define SMART_STATUS 0xDA +#define SMART_AUTO_OFFLINE 0xDB + +/* Password used in TF4 & TF5 executing SMART commands */ + +#define SMART_LCYL_PASS 0x4F +#define SMART_HCYL_PASS 0xC2 + /* WIN_SETFEATURES sub-commands */ #define SETFEATURES_EN_WCACHE 0x02 /* Enable write cache */ @@ -131,6 +296,7 @@ #define SETFEATURES_DIS_DEFECT 0x04 /* Disable Defect Management */ #define SETFEATURES_EN_APM 0x05 /* Enable advanced power management */ #define SETFEATURES_DIS_MSN 0x31 /* Disable Media Status Notification */ +#define SETFEATURES_EN_AAM 0x42 /* Enable Automatic Acoustic Management */ #define SETFEATURES_DIS_RLA 0x55 /* Disable read look-ahead feature */ #define SETFEATURES_EN_RI 0x5D /* Enable release interrupt */ #define SETFEATURES_EN_SI 0x5E /* Enable SERVICE interrupt */ @@ -141,29 +307,19 @@ #define SETFEATURES_EN_MSN 0x95 /* Enable Media Status Notification */ #define SETFEATURES_EN_RLA 0xAA /* Enable read look-ahead feature */ #define SETFEATURES_PREFETCH 0xAB /* Sets drive prefetch value */ +#define SETFEATURES_DIS_AAM 0xC2 /* Disable Automatic Acoustic Management */ #define SETFEATURES_EN_RPOD 0xCC /* Enable reverting to power on defaults */ #define SETFEATURES_DIS_RI 0xDD /* Disable release interrupt */ #define SETFEATURES_DIS_SI 0xDE /* Disable SERVICE interrupt */ /* WIN_SECURITY sub-commands */ -#define SECURITY_SET_PASSWORD 0xBA /* 0xF1 */ -#define SECURITY_UNLOCK 0xBB /* 0xF2 */ -#define SECURITY_ERASE_PREPARE 0xBC /* 0xF3 */ -#define SECURITY_ERASE_UNIT 0xBD /* 0xF4 */ -#define SECURITY_FREEZE_LOCK 0xBE /* 0xF5 */ -#define SECURITY_DISABLE_PASSWORD 0xBF /* 0xF6 */ - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ +#define SECURITY_SET_PASSWORD 0xBA +#define SECURITY_UNLOCK 0xBB +#define SECURITY_ERASE_PREPARE 0xBC +#define SECURITY_ERASE_UNIT 0xBD +#define SECURITY_FREEZE_LOCK 0xBE +#define SECURITY_DISABLE_PASSWORD 0xBF struct hd_geometry { unsigned char heads; @@ -172,6 +328,14 @@ unsigned long start; }; +/* BIG GEOMETRY */ +struct hd_big_geometry { + unsigned char heads; + unsigned char sectors; + unsigned int cylinders; + unsigned long start; +}; + /* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x030n/0x031n */ #define HDIO_GETGEO 0x0301 /* get device geometry */ #define HDIO_GET_UNMASKINTR 0x0302 /* get current unmask setting */ @@ -186,9 +350,10 @@ #define HDIO_GET_IDENTITY 0x030d /* get IDE identification info */ #define HDIO_GET_WCACHE 0x030e /* get write cache mode on|off */ #define HDIO_GET_ACOUSTIC 0x030f /* get acoustic value */ +#define HDIO_GET_ADDRESS 0x0310 /* */ #define HDIO_GET_BUSSTATE 0x031a /* get the bus state of the hwif */ -#define HDIO_TRISTATE_HWIF 0x031b /* OBSOLETE - use SET_BUSSTATE */ +#define HDIO_TRISTATE_HWIF 0x031b /* execute a channel tristate */ #define HDIO_DRIVE_RESET 0x031c /* execute a device reset */ #define HDIO_DRIVE_TASKFILE 0x031d /* execute raw taskfile */ #define HDIO_DRIVE_TASK 0x031e /* execute task and special drive command */ @@ -211,6 +376,7 @@ #define HDIO_SET_ACOUSTIC 0x032c /* change acoustic behavior */ #define HDIO_SET_BUSSTATE 0x032d /* set the bus state of the hwif */ #define HDIO_SET_QDMA 0x032e /* change use-qdma flag */ +#define HDIO_SET_ADDRESS 0x032f /* change lba addressing modes */ /* bus states */ enum { @@ -219,34 +385,30 @@ BUSSTATE_TRISTATE }; -/* BIG GEOMETRY */ -struct hd_big_geometry { - unsigned char heads; - unsigned char sectors; - unsigned int cylinders; - unsigned long start; -}; - /* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x033n/0x033n */ #define HDIO_GETGEO_BIG 0x0330 /* */ #define HDIO_GETGEO_BIG_RAW 0x0331 /* */ #define __NEW_HD_DRIVE_ID -/* structure returned by HDIO_GET_IDENTITY, as per ANSI ATA2 rev.2f spec */ +/* structure returned by HDIO_GET_IDENTITY, + * as per ANSI NCITS ATA6 rev.1b spec + */ struct hd_driveid { unsigned short config; /* lots of obsolete bit flags */ - unsigned short cyls; /* "physical" cyls */ + unsigned short cyls; /* Obsolete, "physical" cyls */ unsigned short reserved2; /* reserved (word 2) */ - unsigned short heads; /* "physical" heads */ + unsigned short heads; /* Obsolete, "physical" heads */ unsigned short track_bytes; /* unformatted bytes per track */ unsigned short sector_bytes; /* unformatted bytes per sector */ - unsigned short sectors; /* "physical" sectors per track */ + unsigned short sectors; /* Obsolete, "physical" sectors per track */ unsigned short vendor0; /* vendor unique */ unsigned short vendor1; /* vendor unique */ - unsigned short vendor2; /* vendor unique */ + unsigned short vendor2; /* Retired vendor unique */ unsigned char serial_no[20]; /* 0 = not_specified */ - unsigned short buf_type; - unsigned short buf_size; /* 512 byte increments; 0 = not_specified */ + unsigned short buf_type; /* Retired */ + unsigned short buf_size; /* Retired, 512 byte increments + * 0 = not_specified + */ unsigned short ecc_bytes; /* for r/w long cmds; 0 = not_specified */ unsigned char fw_rev[8]; /* 0 = not_specified */ unsigned char model[40]; /* 0 = not_specified */ @@ -254,72 +416,223 @@ unsigned char vendor3; /* vendor unique */ unsigned short dword_io; /* 0=not_implemented; 1=implemented */ unsigned char vendor4; /* vendor unique */ - unsigned char capability; /* bits 0:DMA 1:LBA 2:IORDYsw 3:IORDYsup*/ + unsigned char capability; /* (upper byte of word 49) + * 3: IORDYsup + * 2: IORDYsw + * 1: LBA + * 0: DMA + */ unsigned short reserved50; /* reserved (word 50) */ - unsigned char vendor5; /* vendor unique */ - unsigned char tPIO; /* 0=slow, 1=medium, 2=fast */ - unsigned char vendor6; /* vendor unique */ - unsigned char tDMA; /* 0=slow, 1=medium, 2=fast */ - unsigned short field_valid; /* bits 0:cur_ok 1:eide_ok */ - unsigned short cur_cyls; /* logical cylinders */ - unsigned short cur_heads; /* logical heads */ - unsigned short cur_sectors; /* logical sectors per track */ - unsigned short cur_capacity0; /* logical total sectors on drive */ - unsigned short cur_capacity1; /* (2 words, misaligned int) */ + unsigned char vendor5; /* Obsolete, vendor unique */ + unsigned char tPIO; /* Obsolete, 0=slow, 1=medium, 2=fast */ + unsigned char vendor6; /* Obsolete, vendor unique */ + unsigned char tDMA; /* Obsolete, 0=slow, 1=medium, 2=fast */ + unsigned short field_valid; /* (word 53) + * 2: ultra_ok word 88 + * 1: eide_ok words 64-70 + * 0: cur_ok words 54-58 + */ + unsigned short cur_cyls; /* Obsolete, logical cylinders */ + unsigned short cur_heads; /* Obsolete, l heads */ + unsigned short cur_sectors; /* Obsolete, l sectors per track */ + unsigned short cur_capacity0; /* Obsolete, l total sectors on drive */ + unsigned short cur_capacity1; /* Obsolete, (2 words, misaligned int) */ unsigned char multsect; /* current multiple sector count */ unsigned char multsect_valid; /* when (bit0==1) multsect is ok */ - unsigned int lba_capacity; /* total number of sectors */ - unsigned short dma_1word; /* single-word dma info */ + unsigned int lba_capacity; /* Obsolete, total number of sectors */ + unsigned short dma_1word; /* Obsolete, single-word dma info */ unsigned short dma_mword; /* multiple-word dma info */ unsigned short eide_pio_modes; /* bits 0:mode3 1:mode4 */ unsigned short eide_dma_min; /* min mword dma cycle time (ns) */ unsigned short eide_dma_time; /* recommended mword dma cycle time (ns) */ unsigned short eide_pio; /* min cycle time (ns), no IORDY */ unsigned short eide_pio_iordy; /* min cycle time (ns), with IORDY */ - unsigned short words69_70[2]; /* reserved words 69-70 */ + unsigned short words69_70[2]; /* reserved words 69-70 + * future command overlap and queuing + */ /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */ - unsigned short words71_74[4]; /* reserved words 71-74 */ - unsigned short queue_depth; /* */ + unsigned short words71_74[4]; /* reserved words 71-74 + * for IDENTIFY PACKET DEVICE command + */ + unsigned short queue_depth; /* (word 75) + * 15:5 reserved + * 4:0 Maximum queue depth -1 + */ unsigned short words76_79[4]; /* reserved words 76-79 */ - unsigned short major_rev_num; /* */ - unsigned short minor_rev_num; /* */ - unsigned short command_set_1; /* bits 0:Smart 1:Security 2:Removable 3:PM */ - unsigned short command_set_2; /* bits 14:Smart Enabled 13:0 zero */ - unsigned short cfsse; /* command set-feature supported extensions */ - unsigned short cfs_enable_1; /* command set-feature enabled */ - unsigned short cfs_enable_2; /* command set-feature enabled */ - unsigned short csf_default; /* command set-feature default */ - unsigned short dma_ultra; /* */ + unsigned short major_rev_num; /* (word 80) */ + unsigned short minor_rev_num; /* (word 81) */ + unsigned short command_set_1; /* (word 82) supported + * 15: Obsolete + * 14: NOP command + * 13: READ_BUFFER + * 12: WRITE_BUFFER + * 11: Obsolete + * 10: Host Protected Area + * 9: DEVICE Reset + * 8: SERVICE Interrupt + * 7: Release Interrupt + * 6: look-ahead + * 5: write cache + * 4: PACKET Command + * 3: Power Management Feature Set + * 2: Removable Feature Set + * 1: Security Feature Set + * 0: SMART Feature Set + */ + unsigned short command_set_2; /* (word 83) + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: FLUSH CACHE EXT + * 12: FLUSH CACHE + * 11: Device Configuration Overlay + * 10: 48-bit Address Feature Set + * 9: Automatic Acoustic Management + * 8: SET MAX security + * 7: reserved 1407DT PARTIES + * 6: SetF sub-command Power-Up + * 5: Power-Up in Standby Feature Set + * 4: Removable Media Notification + * 3: APM Feature Set + * 2: CFA Feature Set + * 1: READ/WRITE DMA QUEUED + * 0: Download MicroCode + */ + unsigned short cfsse; /* (word 84) + * cmd set-feature supported extensions + * 15: Shall be ZERO + * 14: Shall be ONE + * 13:3 reserved + * 2: Media Serial Number Valid + * 1: SMART selt-test supported + * 0: SMART error logging + */ + unsigned short cfs_enable_1; /* (word 85) + * command set-feature enabled + * 15: Obsolete + * 14: NOP command + * 13: READ_BUFFER + * 12: WRITE_BUFFER + * 11: Obsolete + * 10: Host Protected Area + * 9: DEVICE Reset + * 8: SERVICE Interrupt + * 7: Release Interrupt + * 6: look-ahead + * 5: write cache + * 4: PACKET Command + * 3: Power Management Feature Set + * 2: Removable Feature Set + * 1: Security Feature Set + * 0: SMART Feature Set + */ + unsigned short cfs_enable_2; /* (word 86) + * command set-feature enabled + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: FLUSH CACHE EXT + * 12: FLUSH CACHE + * 11: Device Configuration Overlay + * 10: 48-bit Address Feature Set + * 9: Automatic Acoustic Management + * 8: SET MAX security + * 7: reserved 1407DT PARTIES + * 6: SetF sub-command Power-Up + * 5: Power-Up in Standby Feature Set + * 4: Removable Media Notification + * 3: APM Feature Set + * 2: CFA Feature Set + * 1: READ/WRITE DMA QUEUED + * 0: Download MicroCode + */ + unsigned short csf_default; /* (word 87) + * command set-feature default + * 15: Shall be ZERO + * 14: Shall be ONE + * 13:3 reserved + * 2: Media Serial Number Valid + * 1: SMART selt-test supported + * 0: SMART error logging + */ + unsigned short dma_ultra; /* (word 88) */ unsigned short word89; /* reserved (word 89) */ unsigned short word90; /* reserved (word 90) */ unsigned short CurAPMvalues; /* current APM values */ unsigned short word92; /* reserved (word 92) */ - unsigned short hw_config; /* hardware config */ - unsigned short words94_125[32];/* reserved words 94-125 */ - unsigned short last_lun; /* reserved (word 126) */ - unsigned short word127; /* reserved (word 127) */ - unsigned short dlf; /* device lock function + unsigned short hw_config; /* hardware config (word 93) + * 15: + * 14: + * 13: + * 12: + * 11: + * 10: + * 9: + * 8: + * 7: + * 6: + * 5: + * 4: + * 3: + * 2: + * 1: + * 0: + */ + unsigned short acoustic; /* (word 94) + * 15:8 Vendor's recommended value + * 7:0 current value + */ + unsigned short words95_99[5]; /* reserved words 95-99 */ +#if 0 + unsigned short words100_103[4] ;/* reserved words 100-103 */ +#else + unsigned long long lba_capacity_2;/* 48-bit total number of sectors */ +#endif + unsigned short words104_125[22];/* reserved words 104-125 */ + unsigned short last_lun; /* (word 126) */ + unsigned short word127; /* (word 127) Feature Set + * Removable Media Notification + * 15:2 reserved + * 1:0 00 = not supported + * 01 = supported + * 10 = reserved + * 11 = reserved + */ + unsigned short dlf; /* (word 128) + * device lock function * 15:9 reserved - * 8 security level 1:max 0:high - * 7:6 reserved - * 5 enhanced erase - * 4 expire - * 3 frozen - * 2 locked - * 1 en/disabled - * 0 capability + * 8 security level 1:max 0:high + * 7:6 reserved + * 5 enhanced erase + * 4 expire + * 3 frozen + * 2 locked + * 1 en/disabled + * 0 capability */ - unsigned short csfo; /* current set features options + unsigned short csfo; /* (word 129) + * current set features options * 15:4 reserved - * 3 auto reassign - * 2 reverting - * 1 read-look-ahead - * 0 write cache + * 3: auto reassign + * 2: reverting + * 1: read-look-ahead + * 0: write cache */ unsigned short words130_155[26];/* reserved vendor words 130-155 */ - unsigned short word156; + unsigned short word156; /* reserved vendor word 156 */ unsigned short words157_159[3];/* reserved vendor words 157-159 */ - unsigned short words160_255[95];/* reserved words 160-255 */ + unsigned short cfa_power; /* (word 160) CFA Power Mode + * 15 word 160 supported + * 14 reserved + * 13 + * 12 + * 11:0 + */ + unsigned short words161_175[14];/* Reserved for CFA */ + unsigned short words176_205[31];/* Current Media Serial Number */ + unsigned short words206_254[48];/* reserved words 206-254 */ + unsigned short integrity_word; /* (word 255) + * 15:8 Checksum + * 7:0 Signature + */ }; /* diff -urN linux-2.4.17-rc2-virgin/include/linux/highmem.h linux-2.4.17-rc2-wli2/include/linux/highmem.h --- linux-2.4.17-rc2-virgin/include/linux/highmem.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/highmem.h Thu Dec 20 18:55:30 2001 @@ -93,4 +93,15 @@ kunmap_atomic(vto, KM_USER1); } +static inline void copy_highpage(struct page *to, struct page *from) +{ + char *vfrom, *vto; + + vfrom = kmap(from); + vto = kmap(to); + copy_page(vto, vfrom); + kunmap(from); + kunmap(to); +} + #endif /* _LINUX_HIGHMEM_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/ide.h linux-2.4.17-rc2-wli2/include/linux/ide.h --- linux-2.4.17-rc2-virgin/include/linux/ide.h Thu Nov 22 11:48:07 2001 +++ linux-2.4.17-rc2-wli2/include/linux/ide.h Thu Dec 20 19:49:13 2001 @@ -65,13 +65,14 @@ /* * IDE_DRIVE_CMD is used to implement many features of the hdparm utility */ -#define IDE_DRIVE_CMD 99 /* (magic) undef to reduce kernel size*/ +#define IDE_DRIVE_CMD 99 /* (magic) undef to reduce kernel size*/ + +#define IDE_DRIVE_TASK 98 /* - * IDE_DRIVE_TASK is used to implement many features needed for raw tasks + * IDE_DRIVE_TASKFILE is used to implement many features needed for raw tasks */ -#define IDE_DRIVE_TASK 98 -#define IDE_DRIVE_CMD_AEB 98 +#define IDE_DRIVE_TASKFILE 97 /* * "No user-serviceable parts" beyond this point :) @@ -120,6 +121,17 @@ #define IDE_FEATURE_OFFSET IDE_ERROR_OFFSET #define IDE_COMMAND_OFFSET IDE_STATUS_OFFSET +#define IDE_DATA_OFFSET_HOB (0) +#define IDE_ERROR_OFFSET_HOB (1) +#define IDE_NSECTOR_OFFSET_HOB (2) +#define IDE_SECTOR_OFFSET_HOB (3) +#define IDE_LCYL_OFFSET_HOB (4) +#define IDE_HCYL_OFFSET_HOB (5) +#define IDE_SELECT_OFFSET_HOB (6) +#define IDE_CONTROL_OFFSET_HOB (7) + +#define IDE_FEATURE_OFFSET_HOB IDE_ERROR_OFFSET_HOB + #define IDE_DATA_REG (HWIF(drive)->io_ports[IDE_DATA_OFFSET]) #define IDE_ERROR_REG (HWIF(drive)->io_ports[IDE_ERROR_OFFSET]) #define IDE_NSECTOR_REG (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET]) @@ -131,6 +143,16 @@ #define IDE_CONTROL_REG (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]) #define IDE_IRQ_REG (HWIF(drive)->io_ports[IDE_IRQ_OFFSET]) +#define IDE_DATA_REG_HOB (HWIF(drive)->io_ports[IDE_DATA_OFFSET]) +#define IDE_ERROR_REG_HOB (HWIF(drive)->io_ports[IDE_ERROR_OFFSET]) +#define IDE_NSECTOR_REG_HOB (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET]) +#define IDE_SECTOR_REG_HOB (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET]) +#define IDE_LCYL_REG_HOB (HWIF(drive)->io_ports[IDE_LCYL_OFFSET]) +#define IDE_HCYL_REG_HOB (HWIF(drive)->io_ports[IDE_HCYL_OFFSET]) +#define IDE_SELECT_REG_HOB (HWIF(drive)->io_ports[IDE_SELECT_OFFSET]) +#define IDE_STATUS_REG_HOB (HWIF(drive)->io_ports[IDE_STATUS_OFFSET]) +#define IDE_CONTROL_REG_HOB (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]) + #define IDE_FEATURE_REG IDE_ERROR_REG #define IDE_COMMAND_REG IDE_STATUS_REG #define IDE_ALTSTATUS_REG IDE_CONTROL_REG @@ -156,11 +178,21 @@ #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define PARTN_MASK ((1< (b2) + (t)) || ((b2) > (b1) + (t))) #define IDE_MIN(a,b) ((a)<(b) ? (a):(b)) #define IDE_MAX(a,b) ((a)>(b) ? (a):(b)) +#ifndef SPLIT_WORD +# define SPLIT_WORD(W,HB,LB) ((HB)=(W>>8), (LB)=(W-((W>>8)<<8))) +#endif +#ifndef MAKE_WORD +# define MAKE_WORD(W,HB,LB) ((W)=((HB<<8)+LB)) +#endif + + /* * Timeouts for various operations: */ @@ -170,8 +202,7 @@ #else #define WAIT_READY (3*HZ/100) /* 30msec - should be instantaneous */ #endif /* CONFIG_APM || CONFIG_APM_MODULE */ -#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?) - if all ATAPI CD is closed at boot */ +#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */ #define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */ #define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ #define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ @@ -209,6 +240,11 @@ (drive)->quirk_list = hwif->quirkproc(drive); \ } +#define HOST(hwif,chipset) \ +{ \ + return ((hwif)->chipset == chipset) ? 1 : 0; \ +} + #define IDE_DEBUG(lineno) \ printk("%s,%s,line=%d\n", __FILE__, __FUNCTION__, (lineno)) @@ -223,6 +259,18 @@ #endif /* + * hwif_chipset_t is used to keep track of the specific hardware + * chipset used by each IDE interface, if known. + */ +typedef enum { ide_unknown, ide_generic, ide_pci, + ide_cmd640, ide_dtc2278, ide_ali14xx, + ide_qd65xx, ide_umc8672, ide_ht6560b, + ide_pdc4030, ide_rz1000, ide_trm290, + ide_cmd646, ide_cy82c693, ide_4drives, + ide_pmac, ide_etrax100 +} hwif_chipset_t; + +/* * Structure to hold all information about the location of this port */ typedef struct hw_regs_s { @@ -231,6 +279,7 @@ int dma; /* our dma entry */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ void *priv; /* interface specific data */ + hwif_chipset_t chipset; } hw_regs_t; /* @@ -259,16 +308,20 @@ #ifndef HAVE_ARCH_OUT_BYTE #ifdef REALLY_FAST_IO #define OUT_BYTE(b,p) outb((b),(p)) +#define OUT_WORD(w,p) outw((w),(p)) #else #define OUT_BYTE(b,p) outb_p((b),(p)) +#define OUT_WORD(w,p) outw_p((w),(p)) #endif #endif #ifndef HAVE_ARCH_IN_BYTE #ifdef REALLY_FAST_IO -#define IN_BYTE(p) (byte)inb_p(p) -#else #define IN_BYTE(p) (byte)inb(p) +#define IN_WORD(p) (short)inw(p) +#else +#define IN_BYTE(p) (byte)inb_p(p) +#define IN_WORD(p) (short)inw_p(p) #endif #endif @@ -328,6 +381,7 @@ unsigned autotune : 2; /* 1=autotune, 2=noautotune, 0=default */ unsigned remap_0_to_1 : 2; /* 0=remap if ezdrive, 1=remap, 2=noremap */ unsigned ata_flash : 1; /* 1=present, 0=default */ + unsigned addressing; /* : 2; 0=28-bit, 1=48-bit, 2=64-bit */ byte scsi; /* 0=default, 1=skip current ide-subdriver for ide-scsi emulation */ byte media; /* disk, cdrom, tape, floppy, ... */ select_t select; /* basic drive/head select reg value */ @@ -340,7 +394,7 @@ byte bad_wstat; /* used for ignoring WRERR_STAT */ byte nowerr; /* used for ignoring WRERR_STAT */ byte sect0; /* offset of first sector for DM6:DDO */ - byte usage; /* current "open()" count for drive */ + unsigned int usage; /* current "open()" count for drive */ byte head; /* "real" number of heads */ byte sect; /* "real" sectors per track */ byte bios_head; /* BIOS/fdisk/LILO number of heads */ @@ -348,6 +402,7 @@ unsigned int bios_cyl; /* BIOS/fdisk/LILO number of cyls */ unsigned int cyl; /* "real" number of cyls */ unsigned long capacity; /* total number of sectors */ + unsigned long long capacity48; /* total number of sectors */ unsigned int drive_data; /* for use by tuneproc/selectproc as needed */ void *hwif; /* actually (ide_hwif_t *) */ wait_queue_head_t wqueue; /* used to wait for drive in open() */ @@ -369,6 +424,8 @@ byte init_speed; /* transfer rate set at boot */ byte current_speed; /* current transfer rate set */ byte dn; /* now wide spread use */ + byte wcache; /* status of write cache */ + byte acoustic; /* acoustic management */ unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ } ide_drive_t; @@ -438,19 +495,7 @@ /* * ide soft-power support */ -typedef int (ide_busproc_t) (struct hwif_s *, int); - -/* - * hwif_chipset_t is used to keep track of the specific hardware - * chipset used by each IDE interface, if known. - */ -typedef enum { ide_unknown, ide_generic, ide_pci, - ide_cmd640, ide_dtc2278, ide_ali14xx, - ide_qd65xx, ide_umc8672, ide_ht6560b, - ide_pdc4030, ide_rz1000, ide_trm290, - ide_cmd646, ide_cy82c693, ide_4drives, - ide_pmac, ide_etrax100 -} hwif_chipset_t; +typedef int (ide_busproc_t) (ide_drive_t *, int); #define IDE_CHIPSET_PCI_MASK \ ((1<driver)) @@ -727,6 +785,8 @@ void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount); void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount); +int drive_is_ready (ide_drive_t *drive); + /* * This is used on exit from the driver, to designate the next irq handler * and also to start the safety timer. @@ -748,7 +808,7 @@ * Issue a simple drive command * The drive must be selected beforehand. */ -void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler); +void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler); /* * ide_fixstring() cleans up and (optionally) byte-swaps a text string, @@ -843,24 +903,92 @@ /* * Clean up after success/failure of an explicit drive cmd. * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD). + * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK). */ void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err); /* - * Issue ATA command and wait for completion. + * Issue ATA command and wait for completion. use for implementing commands in kernel */ int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf); + int ide_wait_cmd_task (ide_drive_t *drive, byte *buf); + +typedef struct ide_task_s { + task_ioreg_t tfRegister[8]; + task_ioreg_t hobRegister[8]; + ide_reg_valid_t tf_out_flags; + ide_reg_valid_t tf_in_flags; + int data_phase; + int command_type; + ide_pre_handler_t *prehandler; + ide_handler_t *handler; + ide_post_handler_t *posthandler; + void *special; /* valid_t generally */ + struct request *rq; /* copy of request */ + unsigned long block; /* copy of block */ +} ide_task_t; + +typedef struct pkt_task_s { + task_ioreg_t tfRegister[8]; + int data_phase; + int command_type; + ide_handler_t *handler; + void *special; + struct request *rq; /* copy of request */ + unsigned long block; /* copy of block */ +} pkt_task_t; + +/* + * taskfile io for disks for now... + */ +ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task); + +/* + * Builds request from ide_ioctl + */ +void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler); + +/* + * Special Flagged Register Validation Caller + */ +// ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task); + +ide_startstop_t set_multmode_intr (ide_drive_t *drive); +ide_startstop_t set_geometry_intr (ide_drive_t *drive); +ide_startstop_t recal_intr (ide_drive_t *drive); +ide_startstop_t task_no_data_intr (ide_drive_t *drive); +ide_startstop_t task_in_intr (ide_drive_t *drive); +ide_startstop_t task_mulin_intr (ide_drive_t *drive); +ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq); +ide_startstop_t task_out_intr (ide_drive_t *drive); +ide_startstop_t task_mulout_intr (ide_drive_t *drive); +void ide_init_drive_taskfile (struct request *rq); + +int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf); + +int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *cmd, byte *buf); + +ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile); +ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile); +/* Expects args is a full set of TF registers and parses the command type */ +int ide_cmd_type_parser (ide_task_t *args); + +int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); + +#ifdef CONFIG_PKT_TASK_IOCTL +int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +#endif /* CONFIG_PKT_TASK_IOCTL */ void ide_delay_50ms (void); int system_bus_clock(void); byte ide_auto_reduce_xfer (ide_drive_t *drive); int ide_driveid_update (ide_drive_t *drive); -int ide_ata66_check (ide_drive_t *drive, byte cmd, byte nsect, byte feature); +int ide_ata66_check (ide_drive_t *drive, ide_task_t *args); int ide_config_drive_speed (ide_drive_t *drive, byte speed); byte eighty_ninty_three (ide_drive_t *drive); -int set_transfer (ide_drive_t *drive, byte cmd, byte nsect, byte feature); +int set_transfer (ide_drive_t *drive, ide_task_t *args); /* * ide_system_bus_speed() returns what we think is the system VESA/PCI @@ -905,23 +1033,30 @@ extern ide_proc_entry_t generic_subdriver_entries[]; #endif +int ide_reinit_drive (ide_drive_t *drive); + #ifdef _IDE_C #ifdef CONFIG_BLK_DEV_IDE int ideprobe_init (void); #endif /* CONFIG_BLK_DEV_IDE */ #ifdef CONFIG_BLK_DEV_IDEDISK +int idedisk_reinit (ide_drive_t *drive); int idedisk_init (void); #endif /* CONFIG_BLK_DEV_IDEDISK */ #ifdef CONFIG_BLK_DEV_IDECD +int ide_cdrom_reinit (ide_drive_t *drive); int ide_cdrom_init (void); #endif /* CONFIG_BLK_DEV_IDECD */ #ifdef CONFIG_BLK_DEV_IDETAPE +int idetape_reinit (ide_drive_t *drive); int idetape_init (void); #endif /* CONFIG_BLK_DEV_IDETAPE */ #ifdef CONFIG_BLK_DEV_IDEFLOPPY +int idefloppy_reinit (ide_drive_t *drive); int idefloppy_init (void); #endif /* CONFIG_BLK_DEV_IDEFLOPPY */ #ifdef CONFIG_BLK_DEV_IDESCSI +int idescsi_reinit (ide_drive_t *drive); int idescsi_init (void); #endif /* CONFIG_BLK_DEV_IDESCSI */ #endif /* _IDE_C */ @@ -960,5 +1095,8 @@ #endif void hwif_unregister (ide_hwif_t *hwif); + +void export_ide_init_queue (ide_drive_t *drive); +byte export_probe_for_drive (ide_drive_t *drive); #endif /* _IDE_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/kallsyms.h linux-2.4.17-rc2-wli2/include/linux/kallsyms.h --- linux-2.4.17-rc2-virgin/include/linux/kallsyms.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/kallsyms.h Thu Dec 20 18:55:45 2001 @@ -0,0 +1,141 @@ +/* kallsyms headers + Copyright 2000 Keith Owens + + This file is part of the Linux modutils. It is exported to kernel + space so debuggers can access the kallsyms data. + + The kallsyms data contains all the non-stack symbols from a kernel + or a module. The kernel symbols are held between __start___kallsyms + and __stop___kallsyms. The symbols for a module are accessed via + the struct module chain which is based at module_list. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ident "$Id$" + +#ifndef MODUTILS_KALLSYMS_H +#define MODUTILS_KALLSYMS_H 1 + +/* Have to (re)define these ElfW entries here because external kallsyms + * code does not have access to modutils/include/obj.h. This code is + * included from user spaces tools (modutils) and kernel, they need + * different includes. + */ + +#ifndef ELFCLASS32 +#ifdef __KERNEL__ +#include +#else /* __KERNEL__ */ +#include +#endif /* __KERNEL__ */ +#endif /* ELFCLASS32 */ + +#ifndef ELFCLASSM +#define ELFCLASSM ELF_CLASS +#endif + +#ifndef ElfW +# if ELFCLASSM == ELFCLASS32 +# define ElfW(x) Elf32_ ## x +# define ELFW(x) ELF32_ ## x +# else +# define ElfW(x) Elf64_ ## x +# define ELFW(x) ELF64_ ## x +# endif +#endif + +/* Format of data in the kallsyms section. + * Most of the fields are small numbers but the total size and all + * offsets can be large so use the 32/64 bit types for these fields. + * + * Do not use sizeof() on these structures, modutils may be using extra + * fields. Instead use the size fields in the header to access the + * other bits of data. + */ + +struct kallsyms_header { + int size; /* Size of this header */ + ElfW(Word) total_size; /* Total size of kallsyms data */ + int sections; /* Number of section entries */ + ElfW(Off) section_off; /* Offset to first section entry */ + int section_size; /* Size of one section entry */ + int symbols; /* Number of symbol entries */ + ElfW(Off) symbol_off; /* Offset to first symbol entry */ + int symbol_size; /* Size of one symbol entry */ + ElfW(Off) string_off; /* Offset to first string */ + ElfW(Addr) start; /* Start address of first section */ + ElfW(Addr) end; /* End address of last section */ +}; + +struct kallsyms_section { + ElfW(Addr) start; /* Start address of section */ + ElfW(Word) size; /* Size of this section */ + ElfW(Off) name_off; /* Offset to section name */ + ElfW(Word) flags; /* Flags from section */ +}; + +struct kallsyms_symbol { + ElfW(Off) section_off; /* Offset to section that owns this symbol */ + ElfW(Addr) symbol_addr; /* Address of symbol */ + ElfW(Off) name_off; /* Offset to symbol name */ +}; + +#define KALLSYMS_SEC_NAME "__kallsyms" +#define KALLSYMS_IDX 2 /* obj_kallsyms creates kallsyms as section 2 */ + +#define kallsyms_next_sec(h,s) \ + ((s) = (struct kallsyms_section *)((char *)(s) + (h)->section_size)) +#define kallsyms_next_sym(h,s) \ + ((s) = (struct kallsyms_symbol *)((char *)(s) + (h)->symbol_size)) + +int kallsyms_symbol_to_address( + const char *name, /* Name to lookup */ + unsigned long *token, /* Which module to start with */ + const char **mod_name, /* Set to module name or "kernel" */ + unsigned long *mod_start, /* Set to start address of module */ + unsigned long *mod_end, /* Set to end address of module */ + const char **sec_name, /* Set to section name */ + unsigned long *sec_start, /* Set to start address of section */ + unsigned long *sec_end, /* Set to end address of section */ + const char **sym_name, /* Set to full symbol name */ + unsigned long *sym_start, /* Set to start address of symbol */ + unsigned long *sym_end /* Set to end address of symbol */ + ); + +int kallsyms_address_to_symbol( + unsigned long address, /* Address to lookup */ + const char **mod_name, /* Set to module name */ + unsigned long *mod_start, /* Set to start address of module */ + unsigned long *mod_end, /* Set to end address of module */ + const char **sec_name, /* Set to section name */ + unsigned long *sec_start, /* Set to start address of section */ + unsigned long *sec_end, /* Set to end address of section */ + const char **sym_name, /* Set to full symbol name */ + unsigned long *sym_start, /* Set to start address of symbol */ + unsigned long *sym_end /* Set to end address of symbol */ + ); + +int kallsyms_sections(void *token, + int (*callback)(void *, /* token */ + const char *, /* module name */ + const char *, /* section name */ + ElfW(Addr), /* Section start */ + ElfW(Addr), /* Section end */ + ElfW(Word) /* Section flags */ + ) + ); + +#endif /* kallsyms.h */ diff -urN linux-2.4.17-rc2-virgin/include/linux/kdb.h linux-2.4.17-rc2-wli2/include/linux/kdb.h --- linux-2.4.17-rc2-virgin/include/linux/kdb.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/kdb.h Thu Dec 20 18:55:30 2001 @@ -0,0 +1,229 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * Copyright (C) 2000 Stephane Eranian + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + * Stephane Eranian 2000/06/05 + * move to v1.2 + * Keith Owens 2000/09/16 + * KDB v1.4 + * kdb=on/off/early at boot, /proc/sys/kernel/kdb. + * Env BTAPROMPT. + */ + + +#if !defined(__KDB_H) +#define __KDB_H + +#include +#include + +#define KDB_MAJOR_VERSION 1 +#define KDB_MINOR_VERSION 9 +#define KDB_TEST_VERSION "" + + /* + * kdb_initial_cpu is initialized to -1, and is set to the cpu + * number whenever the kernel debugger is entered. + */ +extern volatile int kdb_initial_cpu; +#ifdef CONFIG_KDB +#define KDB_IS_RUNNING() (kdb_initial_cpu != -1) +#else +#define KDB_IS_RUNNING() (0) +#endif /* CONFIG_KDB */ + + /* + * kdb_on + * + * Defines whether kdb is on or not. Default value + * is set by CONFIG_KDB_OFF. Boot with kdb=on/off + * or echo "[01]" > /proc/sys/kernel/kdb to change it. + */ +extern int kdb_on; + + /* + * kdb_port is initialized to zero, and is set to the I/O port + * address of the serial port when the console is setup in + * serial_console_setup. + */ +extern int kdb_port; + + /* + * KDB_FLAG_EARLYKDB is set when the 'kdb' option is specified + * as a boot parameter (e.g. via lilo). It indicates that the + * kernel debugger should be entered as soon as practical. + */ +#define KDB_FLAG_EARLYKDB 0x00000001 + + /* + * Internal debug flags + */ +#define KDB_DEBUG_FLAG_BT 0x0001 /* Stack traceback debug */ +#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ +#define KDB_DEBUG_FLAG_LBR 0x0004 /* Print last branch register */ +#define KDB_DEBUG_FLAG_AR 0x0008 /* Activation record, generic */ +#define KDB_DEBUG_FLAG_ARA 0x0010 /* Activation record, arch specific */ +#define KDB_DEBUG_FLAG_CALLBACK 0x0020 /* Event callbacks to kdb */ +#define KDB_DEBUG_FLAG_STATE 0x0040 /* State flags */ +#define KDB_DEBUG_FLAG_MASK 0xffff /* All debug flags */ +#define KDB_DEBUG_FLAG_SHIFT 16 /* Shift factor for dbflags */ + +extern volatile int kdb_flags; /* Global flags, see kdb_state for per cpu state */ + +#define KDB_FLAG(flag) (kdb_flags & KDB_FLAG_##flag) +#define KDB_FLAG_SET(flag) ((void)(kdb_flags |= KDB_FLAG_##flag)) +#define KDB_FLAG_CLEAR(flag) ((void)(kdb_flags &= ~KDB_FLAG_##flag)) +#define KDB_DEBUG(flag) (kdb_flags & (KDB_DEBUG_FLAG_##flag << KDB_DEBUG_FLAG_SHIFT)) +#define KDB_DEBUG_STATE(text,value) if (KDB_DEBUG(STATE)) kdb_print_state(text, value) + + /* + * Per cpu kdb state. A cpu can be under kdb control but outside kdb, + * for example when doing single step. + */ +volatile extern int kdb_state[ /*NR_CPUS*/ ]; +#define KDB_STATE_KDB 0x00000001 /* Cpu is inside kdb */ +#define KDB_STATE_LEAVING 0x00000002 /* Cpu is leaving kdb */ +#define KDB_STATE_CMD 0x00000004 /* Running a kdb command */ +#define KDB_STATE_KDB_CONTROL 0x00000008 /* This cpu is under kdb control */ +#define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */ +#define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */ +#define KDB_STATE_DOING_SSB 0x00000040 /* Doing ssb command, DOING_SS is also set */ +#define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint after one ss, independent of DOING_SS */ +#define KDB_STATE_REENTRY 0x00000100 /* Valid re-entry into kdb */ +#define KDB_STATE_SUPPRESS 0x00000200 /* Suppress error messages */ +#define KDB_STATE_LONGJMP 0x00000400 /* longjmp() data is available */ + /* Spare, was NO_WATCHDOG 0x00000800 */ +#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */ +#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */ +#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */ +#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been adjusted */ +#define KDB_STATE_NO_BP_DELAY 0x00010000 /* No need to delay breakpoints */ +#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch specific use */ + +#define KDB_STATE_CPU(flag,cpu) (kdb_state[cpu] & KDB_STATE_##flag) +#define KDB_STATE_SET_CPU(flag,cpu) ((void)(kdb_state[cpu] |= KDB_STATE_##flag)) +#define KDB_STATE_CLEAR_CPU(flag,cpu) ((void)(kdb_state[cpu] &= ~KDB_STATE_##flag)) + +#define KDB_STATE(flag) KDB_STATE_CPU(flag,smp_processor_id()) +#define KDB_STATE_SET(flag) KDB_STATE_SET_CPU(flag,smp_processor_id()) +#define KDB_STATE_CLEAR(flag) KDB_STATE_CLEAR_CPU(flag,smp_processor_id()) + + /* + * External entry point for the kernel debugger. The pt_regs + * at the time of entry are supplied along with the reason for + * entry to the kernel debugger. + */ + +typedef enum { + KDB_REASON_CALL = 1, /* Call kdb() directly - regs invalid */ + KDB_REASON_FAULT, /* Kernel fault - regs valid */ + KDB_REASON_BREAK, /* Breakpoint inst. - regs valid */ + KDB_REASON_DEBUG, /* Debug Fault - regs valid */ + KDB_REASON_OOPS, /* Kernel Oops - regs valid */ + KDB_REASON_SWITCH, /* CPU switch - regs valid*/ + KDB_REASON_ENTER, /* KDB_ENTER() trap/fault - regs valid */ + KDB_REASON_KEYBOARD, /* Keyboard entry - regs valid */ + KDB_REASON_NMI, /* Non-maskable interrupt; regs valid */ + KDB_REASON_WATCHDOG, /* Watchdog interrupt; regs valid */ + KDB_REASON_RECURSE, /* Recursive entry to kdb; regs probably valid */ + KDB_REASON_SILENT, /* Silent entry/exit to kdb; regs invalid */ + KDB_REASON_PANIC, /* From panic() routine; regs invalid */ +} kdb_reason_t; + + +#ifdef CONFIG_KDB +extern int kdb(kdb_reason_t, int, kdb_eframe_t); +#else +#define kdb(reason,error_code,frame) (0) +#endif + +typedef int (*kdb_func_t)(int, const char **, const char **, kdb_eframe_t); + + /* + * Symbol table format returned by kallsyms. + */ + +typedef struct __ksymtab { + unsigned long value; /* Address of symbol */ + const char *mod_name; /* Module containing symbol or "kernel" */ + unsigned long mod_start; + unsigned long mod_end; + const char *sec_name; /* Section containing symbol */ + unsigned long sec_start; + unsigned long sec_end; + const char *sym_name; /* Full symbol name, including any version */ + unsigned long sym_start; + unsigned long sym_end; + } kdb_symtab_t; + + /* + * Exported Symbols for kernel loadable modules to use. + */ +extern int kdb_register(char *, kdb_func_t, char *, char *, short); +extern int kdb_unregister(char *); + +extern unsigned long kdba_getword(unsigned long, size_t); +extern unsigned long kdba_putword(unsigned long, size_t, unsigned long); + +extern int kdbgetularg(const char *, unsigned long *); +extern char *kdbgetenv(const char *); +extern int kdbgetintenv(const char *, int *); +extern int kdbgetaddrarg(int, const char**, int*, unsigned long *, + long *, char **, kdb_eframe_t); +extern int kdbgetsymval(const char *, kdb_symtab_t *); +extern int kdbnearsym(unsigned long, kdb_symtab_t *); +extern void kdb_printf(const char *,...) + __attribute__ ((format (printf, 1, 2))); +extern void kdb_init(void); +extern void kdb_symbol_print(kdb_machreg_t, const kdb_symtab_t *, unsigned int); +extern char *kdb_read(char *buffer, size_t bufsize); +extern char *kdb_strdup(const char *str, int type); + +#if defined(CONFIG_SMP) + /* + * Kernel debugger non-maskable IPI handler. + */ +extern int kdb_ipi(kdb_eframe_t, void (*ack_interrupt)(void)); +extern void smp_kdb_stop(void); +#else /* CONFIG_SMP */ +#define smp_kdb_stop() +#endif /* CONFIG_SMP */ + + /* + * Interface from general kernel to enable any hardware + * error reporting mechanisms. Such as the Intel Machine + * Check Architecture, for example. + */ +extern void kdb_enablehwfault(void); + + /* + * Determine if a kernel address is valid or not. + */ + +extern int kdb_vmlist_check(unsigned long, unsigned long); + + /* + * Routine for debugging the debugger state. + */ + +extern void kdb_print_state(const char *, int); + +#endif /* __KDB_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/kdbprivate.h linux-2.4.17-rc2-wli2/include/linux/kdbprivate.h --- linux-2.4.17-rc2-virgin/include/linux/kdbprivate.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/kdbprivate.h Thu Dec 20 19:05:19 2001 @@ -0,0 +1,320 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ +#if !defined(_KDBPRIVATE_H) +#define _KDBPRIVATE_H + +#include +#include + +#include + +/* + * Kernel Debugger Error codes. Must not overlap with command codes. + */ + +#define KDB_NOTFOUND (-1) +#define KDB_ARGCOUNT (-2) +#define KDB_BADWIDTH (-3) +#define KDB_BADRADIX (-4) +#define KDB_NOTENV (-5) +#define KDB_NOENVVALUE (-6) +#define KDB_NOTIMP (-7) +#define KDB_ENVFULL (-8) +#define KDB_ENVBUFFULL (-9 ) +#define KDB_TOOMANYBPT (-10) +#define KDB_TOOMANYDBREGS (-11) +#define KDB_DUPBPT (-12) +#define KDB_BPTNOTFOUND (-13) +#define KDB_BADMODE (-14) +#define KDB_BADINT (-15) +#define KDB_INVADDRFMT (-16) +#define KDB_BADREG (-17) +#define KDB_BADCPUNUM (-18) +#define KDB_BADLENGTH (-19) +#define KDB_NOBP (-20) + +/* + * Kernel Debugger Command codes. Must not overlap with error codes. + */ +#define KDB_CMD_GO (-1001) +#define KDB_CMD_CPU (-1002) +#define KDB_CMD_SS (-1003) +#define KDB_CMD_SSB (-1004) + + /* + * kdb_nextline + * + * Contains the current line number on the screen. Used + * to handle the built-in pager (LINES env variable) + */ +extern volatile int kdb_nextline; + + /* + * kdb_diemsg + * + * Contains a pointer to the last string supplied to the + * kernel 'die' panic function. + */ +extern char *kdb_diemsg; + + /* + * Breakpoint state + * + * Each active and inactive breakpoint is represented by + * an instance of the following data structure. + */ + +typedef struct _kdb_bp { + bfd_vma bp_addr; /* Address breakpoint is present at */ + kdb_machinst_t bp_inst; /* Replaced instruction */ + + unsigned int bp_free:1; /* This entry is available */ + + unsigned int bp_enabled:1; /* Breakpoint is active in register */ + unsigned int bp_global:1; /* Global to all processors */ + + unsigned int bp_hardtype:1; /* Uses hardware register */ + unsigned int bp_forcehw:1; /* Force hardware register */ + unsigned int bp_instvalid:1; /* 0=bp_inst invalid, 1=bp_inst valid */ + unsigned int bp_installed:1; /* Breakpoint is installed */ + unsigned int bp_delay:1; /* Do delayed bp handling */ + unsigned int bp_delayed:1; /* Delayed breakpoint */ + + int bp_cpu; /* Cpu # (if bp_global == 0) */ + kdbhard_bp_t bp_template; /* Hardware breakpoint template */ + kdbhard_bp_t *bp_hard; /* Hardware breakpoint structure */ + int bp_adjust; /* Adjustment to PC for real instruction */ +} kdb_bp_t; + + /* + * Breakpoint handling subsystem global variables + */ +extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */]; + + /* + * Breakpoint architecture dependent functions. Must be provided + * in some form for all architectures. + */ +extern void kdba_initbp(void); +extern void kdba_printbp(kdb_bp_t *); +extern void kdba_printbpreg(kdbhard_bp_t *); +extern kdbhard_bp_t *kdba_allocbp(kdbhard_bp_t *, int *); +extern void kdba_freebp(kdbhard_bp_t *); +extern int kdba_parsebp(int, const char**, int *, kdb_bp_t*); +extern char *kdba_bptype(kdbhard_bp_t *); +extern void kdba_setsinglestep(kdb_eframe_t); +extern void kdba_clearsinglestep(kdb_eframe_t); + + /* + * Adjust instruction pointer architecture dependent function. Must be + * provided in some form for all architectures. + */ +extern void kdba_adjust_ip(kdb_reason_t, int, kdb_eframe_t); + + /* + * KDB-only global function prototypes. + */ +extern void kdb_id1(unsigned long); +extern void kdb_id_init(void); + + /* + * Architecture dependent function to enable any + * processor machine check exception handling modes. + */ +extern void kdba_enable_mce(void); + +extern void kdba_enable_lbr(void); +extern void kdba_disable_lbr(void); +extern void kdba_print_lbr(void); + + /* + * Initialization functions. + */ +extern void kdba_init(void); +extern void kdb_io_init(void); + + /* + * Architecture specific function to read a string. + */ +extern char * kdba_read(char *, size_t); + + /* + * Data for a single activation record on stack. + */ + +typedef struct __kdb_activation_record { + kdb_machreg_t start; /* -> start of activation record */ + kdb_machreg_t end; /* -> end+1 of activation record */ + kdb_machreg_t ret; /* Return address to caller */ + kdb_machreg_t oldfp; /* Frame pointer for caller's frame */ + kdb_machreg_t fp; /* Frame pointer for callee's frame */ + kdb_machreg_t arg0; /* -> First argument on stack (in previous ar) */ + int locals; /* Bytes allocated for local variables */ + int regs; /* Bytes allocated for saved registers */ + int args; /* Bytes allocated for arguments (in previous ar) */ + int setup; /* Bytes allocated for setup data */ +} kdb_ar_t; + + /* + * General Stack Traceback functions. + */ + +extern int kdb_get_next_ar(kdb_machreg_t, kdb_machreg_t, + kdb_machreg_t, kdb_machreg_t, + kdb_machreg_t, + kdb_ar_t *, kdb_symtab_t *); + + /* + * Architecture specific Stack Traceback functions. + */ + +struct task_struct; + +extern int kdba_bt_stack(struct pt_regs *, kdb_machreg_t *, + int, struct task_struct *); +extern int kdba_bt_process(struct task_struct *, int); +extern int kdba_prologue(const kdb_symtab_t *, kdb_machreg_t, + kdb_machreg_t, kdb_machreg_t, kdb_machreg_t, + int, kdb_ar_t *); + /* + * KDB Command Table + */ + +typedef struct _kdbtab { + char *cmd_name; /* Command name */ + kdb_func_t cmd_func; /* Function to execute command */ + char *cmd_usage; /* Usage String for this command */ + char *cmd_help; /* Help message for this command */ + short cmd_flags; /* Parsing flags */ + short cmd_minlen; /* Minimum legal # command chars required */ +} kdbtab_t; + + /* + * External command function declarations + */ + +extern int kdb_id(int, const char **, const char **, kdb_eframe_t); +extern int kdb_bp(int, const char **, const char **, kdb_eframe_t); +extern int kdb_bc(int, const char **, const char **, kdb_eframe_t); +extern int kdb_bt(int, const char **, const char **, kdb_eframe_t); +extern int kdb_ss(int, const char **, const char **, kdb_eframe_t); + + /* + * External utility function declarations + */ +extern char* kdb_getstr(char *, size_t, char *); + + /* + * Register contents manipulation + */ +extern int kdba_getregcontents(const char *, kdb_eframe_t, kdb_machreg_t *); +extern int kdba_setregcontents(const char *, kdb_eframe_t, kdb_machreg_t); +extern int kdba_dumpregs(struct pt_regs *, const char *, const char *); +extern int kdba_setpc(kdb_eframe_t, kdb_machreg_t); +extern kdb_machreg_t kdba_getpc(kdb_eframe_t); + + /* + * Debug register handling. + */ +extern void kdba_installdbreg(kdb_bp_t*); +extern void kdba_removedbreg(kdb_bp_t*); + + /* + * Breakpoint handling - External interfaces + */ +extern void kdb_initbptab(void); +extern void kdb_bp_install_global(kdb_eframe_t); +extern void kdb_bp_install_local(kdb_eframe_t); +extern void kdb_bp_remove_global(void); +extern void kdb_bp_remove_local(void); + + /* + * Breakpoint handling - Internal to kdb_bp.c/kdba_bp.c + */ +extern void kdba_installbp(kdb_eframe_t ef, kdb_bp_t *); +extern void kdba_removebp(kdb_bp_t *); + + +typedef enum { + KDB_DB_BPT, /* Breakpoint */ + KDB_DB_SS, /* Single-step trap */ + KDB_DB_SSB, /* Single step to branch */ + KDB_DB_SSBPT, /* Single step over breakpoint */ + KDB_DB_NOBPT /* Spurious breakpoint */ +} kdb_dbtrap_t; + +extern kdb_dbtrap_t kdba_db_trap(kdb_eframe_t, int); /* DEBUG trap/fault handler */ +extern kdb_dbtrap_t kdba_bp_trap(kdb_eframe_t, int); /* Breakpoint trap/fault hdlr */ + + /* + * Interrupt Handling + */ +typedef int kdb_intstate_t; + +extern void kdba_disableint(kdb_intstate_t *); +extern void kdba_restoreint(kdb_intstate_t *); + + /* + * SMP and process stack manipulation routines. + */ +extern int kdba_ipi(kdb_eframe_t, void (*)(void)); +extern int kdba_main_loop(kdb_reason_t, kdb_reason_t, int, kdb_dbtrap_t, kdb_eframe_t); +extern int kdb_main_loop(kdb_reason_t, kdb_reason_t, int, kdb_dbtrap_t, kdb_eframe_t); + + /* + * General Disassembler interfaces + */ +extern int kdb_dis_fprintf(PTR, const char *, ...) __attribute__ ((format (printf, 2, 3))); +extern int kdb_dis_fprintf_dummy(PTR, const char *, ...) __attribute__ ((format (printf, 2, 3))); +extern disassemble_info kdb_di; + + /* + * Architecture Dependent Disassembler interfaces + */ +extern void kdba_printaddress(kdb_machreg_t, disassemble_info *, int); +extern int kdba_id_printinsn(kdb_machreg_t, disassemble_info *); +extern int kdba_id_parsemode(const char *, disassemble_info*); +extern void kdba_id_init(disassemble_info *); +extern void kdba_check_pc(kdb_machreg_t *); + + /* + * Miscellaneous functions and data areas + */ +#ifndef kdba_getcurrentframe +extern int kdba_getcurrentframe(kdb_eframe_t); +#endif +extern char *kdb_cmds[]; + + /* + * Defines for kdb_symbol_print. + */ +#define KDB_SP_SPACEB 0x0001 /* Space before string */ +#define KDB_SP_SPACEA 0x0002 /* Space after string */ +#define KDB_SP_PAREN 0x0004 /* Parenthesis around string */ +#define KDB_SP_VALUE 0x0008 /* Print the value of the address */ +#define KDB_SP_SYMSIZE 0x0010 /* Print the size of the symbol */ +#define KDB_SP_NEWLINE 0x0020 /* Newline after string */ +#define KDB_SP_DEFAULT (KDB_SP_VALUE|KDB_SP_PAREN) + +#endif /* !_KDBPRIVATE_H */ + diff -urN linux-2.4.17-rc2-virgin/include/linux/lock_break.h linux-2.4.17-rc2-wli2/include/linux/lock_break.h --- linux-2.4.17-rc2-virgin/include/linux/lock_break.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/lock_break.h Thu Dec 20 18:55:14 2001 @@ -0,0 +1,84 @@ +/* + * include/linux/lock_break.h - lock breaking routines + * + * since in-kernel preemption can not occur while a lock is held, + * we can just drop and reacquire long-held locks when they are + * in a natural quiescent state to further lower system latency. + * + * (C) 2001 Robert Love + * + */ + +#ifndef _LINUX_LOCK_BREAK_H +#define _LINUX_LOCK_BREAK_H + +#include + +/* + * setting this to 1 will instruct debug_lock_break to + * note when the expected lock count does not equal the + * actual count. if the lock count is higher than expected, + * we aren't dropping enough locks. if it is 0, we are + * wasting our time since the system is already preemptible. + */ +#ifndef DEBUG_LOCK_BREAK +#define DEBUG_LOCK_BREAK 0 +#endif + +#ifdef CONFIG_LOCK_BREAK + +#define conditional_schedule_needed() (unlikely(current->need_resched)) + +/* + * setting the task's state to TASK_RUNNING is nothing but paranoia, + * in the case where a task is delinquent in properly putting itself + * to sleep. we should test without it. + */ +#define unconditional_schedule() do { \ + __set_current_state(TASK_RUNNING); \ + schedule(); \ +} while(0) + +#define conditional_schedule() do { \ + if (conditional_schedule_needed()) \ + unconditional_schedule(); \ +} while(0) + +#define break_spin_lock(n) do { \ + spin_unlock(n); \ + spin_lock(n); \ +} while(0) + +#define break_spin_lock_and_resched(n) do { \ + spin_unlock(n); \ + conditional_schedule(); \ + spin_lock(n); \ +} while(0) + +#if DEBUG_LOCK_BREAK +#define debug_lock_break(n) do { \ + if (current->preempt_count != n) \ + printk(KERN_ERR "lock_break: %s:%d: count was %d not %d\n", \ + __FILE__, __LINE__, current->preempt_count, n); \ +} while(0) +#else +#define debug_lock_break(n) +#endif + +#define DEFINE_LOCK_COUNT() int _lock_break_count = 0 +#define TEST_LOCK_COUNT(n) (++_lock_break_count > (n)) +#define RESET_LOCK_COUNT() _lock_break_count = 0 + +#else +#define unconditional_schedule() +#define conditional_schedule() +#define conditional_schedule_needed() 0 +#define break_spin_lock(n) +#define break_spin_lock_and_resched(n) +#define debug_lock_break(n) +#define DEFINE_LOCK_COUNT() +#define TEST_LOCK_COUNT(n) 0 +#define RESET_LOCK_COUNT() +#endif + +#endif /* _LINUX_LOCK_BREAK_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/mm.h linux-2.4.17-rc2-wli2/include/linux/mm.h --- linux-2.4.17-rc2-virgin/include/linux/mm.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/mm.h Thu Dec 20 18:55:14 2001 @@ -19,7 +19,7 @@ extern int page_cluster; /* The inactive_clean lists are per zone. */ extern struct list_head active_list; -extern struct list_head inactive_list; +extern struct list_head inactive_dirty_list; #include #include @@ -121,6 +121,9 @@ */ extern pgprot_t protection_map[16]; +#define ZPR_MAX_BYTES 256*PAGE_SIZE +#define ZPR_NORMAL 0 /* perform zap_page_range request in one walk */ +#define ZPR_PARTITION 1 /* partition into a series of smaller operations */ /* * These are the virtual MM functions - opening of an area, closing and @@ -133,6 +136,9 @@ struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused); }; +/* forward declaration; pte_chain is meant to be internal to rmap.c */ +struct pte_chain; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -159,12 +165,25 @@ updated asynchronously */ struct list_head lru; /* Pageout list, eg. active_list; protected by pagemap_lru_lock !! */ + unsigned long age; /* Page aging counter. */ + struct pte_chain * pte_chain; /* Reverse pte mapping pointer. */ wait_queue_head_t wait; /* Page locked? Stand in line... */ struct page **pprev_hash; /* Complement to *next_hash. */ struct buffer_head * buffers; /* Buffer maps us to a disk block. */ + + + /* + * On ordinary machines the direct mapped kernel virtual address + * space allows kernel virtual addresses for given pages to be + * computed by address calculation. On machines where kernel + * virtual mapping for some regions of physical memory is done + * dynamically the only way to deduce the kernel virtual address + * of a physical page is by storing it somewhere, namely here. + */ +#ifdef CONFIG_HIGHMEM void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ - struct zone_struct *zone; /* Memory zone we are in. */ +#endif /* CONFIG_HIGHMEM */ } mem_map_t; /* @@ -275,9 +294,9 @@ #define PG_referenced 2 #define PG_uptodate 3 #define PG_dirty 4 -#define PG_unused 5 -#define PG_lru 6 -#define PG_active 7 +#define PG_inactive_clean 5 +#define PG_active 6 +#define PG_inactive_dirty 7 #define PG_slab 8 #define PG_skip 10 #define PG_highmem 11 @@ -286,6 +305,15 @@ #define PG_reserved 14 #define PG_launder 15 /* written out by VM pressure.. */ +/* + * PG_zone is actually a two-bit bitfield starting at bit 16 of the + * ->flags component of struct page. + * PG_zonemask is used to extract only those relevant bits of the word + * in order to help extract the PG_zone field of ->flags. + */ +#define PG_zone 16 +#define PG_zonemask (0x3UL << PG_zone) + /* Make it prettier to test the above... */ #define UnlockPage(page) unlock_page(page) #define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) @@ -302,6 +330,40 @@ #define PageLaunder(page) test_bit(PG_launder, &(page)->flags) #define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) +/* + * The zone field within ->flags is never updated after free_area_init_core() + * sets it, so none of the operations on it need be atomic. + */ +#define PageZone(page) \ + (zone_table[(((page)->flags & PG_zonemask) >> PG_zone)]) +#define SetPageZone(page, zone_number) \ + do { \ + (page)->flags &= ~PG_zonemask; \ + (page)->flags |= (zone_number << PG_zone) & PG_zonemask;\ + } while(0) + +/* + * In order to avoid #ifdefs within C code itself, a CONFIG_HIGHMEM + * conditional macro to access the ->virtual field of struct page is + * provided here. + */ +#ifdef CONFIG_HIGHMEM + +#define SetPageVirtual(page, address) \ + do { \ + (page)->virtual = (address); \ + } while(0) + +#else /* !CONFIG_HIGHMEM */ + +/* + * With no highmem, there is no field to be set, and so + * this expands to a no-op. + */ +#define SetPageVirtual(page, address) do { ; } while(0) + +#endif /* !CONFIG_HIGHMEM */ + extern void FASTCALL(set_page_dirty(struct page *)); /* @@ -325,10 +387,16 @@ #define PageActive(page) test_bit(PG_active, &(page)->flags) #define SetPageActive(page) set_bit(PG_active, &(page)->flags) #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) +#define TestandSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags) +#define TestandClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags) -#define PageLRU(page) test_bit(PG_lru, &(page)->flags) -#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) -#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) +#define PageInactiveDirty(page) test_bit(PG_inactive_dirty, &(page)->flags) +#define SetPageInactiveDirty(page) set_bit(PG_inactive_dirty, &(page)->flags) +#define ClearPageInactiveDirty(page) clear_bit(PG_inactive_dirty, &(page)->flags) + +#define PageInactiveClean(page) test_bit(PG_inactive_clean, &(page)->flags) +#define SetPageInactiveClean(page) set_bit(PG_inactive_clean, &(page)->flags) +#define ClearPageInactiveClean(page) clear_bit(PG_inactive_clean, &(page)->flags) #ifdef CONFIG_HIGHMEM #define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) @@ -339,6 +407,23 @@ #define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) +#define PageLRU(pp) \ + (PageActive(pp) | PageInactiveDirty(pp) | PageInactiveClean(pp)) + +/* + * Called whenever the VM references a page. We immediately reclaim + * the inactive clean pages because those are counted as freeable. + * We don't particularly care about the inactive dirty ones because + * we're never sure if those are freeable anyway. + */ +static inline void touch_page(struct page * page) +{ + if (PageInactiveClean(page)) + activate_page(page); + else + SetPageReferenced(page); +} + /* * Error return values for the *_nopage functions */ @@ -404,7 +489,7 @@ extern void shmem_lock(struct file * file, int lock); extern int shmem_zero_setup(struct vm_area_struct *); -extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); +extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions); extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); @@ -448,6 +533,9 @@ extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void swapin_readahead(swp_entry_t); + +struct zone_struct; +extern struct zone_struct *zone_table[]; extern struct address_space swapper_space; #define PageSwapCache(page) ((page)->mapping == &swapper_space) diff -urN linux-2.4.17-rc2-virgin/include/linux/mmzone.h linux-2.4.17-rc2-wli2/include/linux/mmzone.h --- linux-2.4.17-rc2-virgin/include/linux/mmzone.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/mmzone.h Thu Dec 20 18:55:14 2001 @@ -39,12 +39,15 @@ */ spinlock_t lock; unsigned long free_pages; + unsigned long inactive_clean_pages; + unsigned long inactive_dirty_pages; unsigned long pages_min, pages_low, pages_high; int need_balance; /* * free areas of different sizes */ + struct list_head inactive_clean_list; free_area_t free_area[MAX_ORDER]; /* @@ -112,9 +115,6 @@ extern int numnodes; extern pg_data_t *pgdat_list; - -#define memclass(pgzone, classzone) (((pgzone)->zone_pgdat == (classzone)->zone_pgdat) \ - && ((pgzone) <= (classzone))) /* * The following two are not meant for general usage. They are here as diff -urN linux-2.4.17-rc2-virgin/include/linux/pagemap.h linux-2.4.17-rc2-wli2/include/linux/pagemap.h --- linux-2.4.17-rc2-virgin/include/linux/pagemap.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/pagemap.h Thu Dec 20 18:55:30 2001 @@ -51,21 +51,17 @@ extern void page_cache_init(unsigned long); /* - * We use a power-of-two hash table to avoid a modulus, - * and get a reasonable hash by knowing roughly how the - * inode pointer and indexes are distributed (ie, we - * roughly know which bits are "significant") - * - * For the time being it will work for struct address_space too (most of - * them sitting inside the inodes). We might want to change it later. + * The multiplicative page cache hash from Chuck Lever's paper. + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * page 3 describes the behavior of the different page cache hash + * functions. This could be painful without integer multiplies, so + * perhaps for wider portability conditional definitions would win. + * -- wli */ -static inline unsigned long _page_hashfn(struct address_space * mapping, unsigned long index) +static inline unsigned long _page_hashfn (struct address_space *mapping, unsigned long index) { -#define i (((unsigned long) mapping)/(sizeof(struct inode) & ~ (sizeof(struct inode) - 1))) -#define s(x) ((x)+((x)>>PAGE_HASH_BITS)) - return s(i+index) & (PAGE_HASH_SIZE-1); -#undef i -#undef s + return ((((unsigned long) mapping + index) * 2654435761UL) >> + (BITS_PER_LONG - PAGE_HASH_BITS)) & (PAGE_HASH_SIZE - 1); } #define page_hash(mapping,index) (page_hash_table+_page_hashfn(mapping,index)) diff -urN linux-2.4.17-rc2-virgin/include/linux/sched.h linux-2.4.17-rc2-wli2/include/linux/sched.h --- linux-2.4.17-rc2-virgin/include/linux/sched.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/sched.h Thu Dec 20 18:55:14 2001 @@ -26,6 +26,7 @@ #include #include #include +#include struct exec_domain; @@ -88,6 +89,7 @@ #define TASK_UNINTERRUPTIBLE 2 #define TASK_ZOMBIE 4 #define TASK_STOPPED 8 +#define PREEMPT_ACTIVE 0x40000000 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -115,6 +117,21 @@ #define SCHED_OTHER 0 #define SCHED_FIFO 1 #define SCHED_RR 2 +#ifdef CONFIG_RTSCHED +#ifdef CONFIG_MAX_PRI +#if CONFIG_MAX_PRI < 99 +#define MAX_PRI 99 +#elif CONFIG_MAX_PRI > 2047 +#define MAX_PRI 2047 +#else +#define MAX_PRI CONFIG_MAX_PRI +#endif +#else +#define MAX_PRI 127 +#endif +#else +#define MAX_PRI 99 +#endif /* * This is an additional bit set when we want to @@ -154,6 +171,9 @@ #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); +#ifdef CONFIG_PREEMPT +asmlinkage void preempt_schedule(void); +#endif extern int schedule_task(struct tq_struct *task); extern void flush_scheduled_tasks(void); @@ -199,7 +219,9 @@ } /* Maximum number of active map areas.. This is a random (large) number */ -#define MAX_MAP_COUNT (65536) +#define DEFAULT_MAX_MAP_COUNT (65536) + +extern int max_map_count; struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ @@ -283,7 +305,17 @@ * offsets of these are hardcoded elsewhere - touch with care */ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - unsigned long flags; /* per process flags, defined below */ + /* + * We want the preempt_count in this cache line, but we + * a) don't want to mess up the offsets in asm code, and + * b) the alignment of the next line below, + * so we move "flags" down + * + * Also note we don't make preempt_count volatile, but we do + * need to make sure it is never hiding in a register when + * we have an interrupt, so we need to use barrier() + */ + int preempt_count; /* 0=> preemptable, < 0 => BUG */ int sigpending; mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead @@ -319,12 +351,14 @@ * that's just fine.) */ struct list_head run_list; +#ifdef CONFIG_RTSCHED + int counter_recalc; +#endif unsigned long sleep_time; struct task_struct *next_task, *prev_task; struct mm_struct *active_mm; - struct list_head local_pages; - unsigned int allocation_order, nr_local_pages; + unsigned long flags; /* task state */ struct linux_binfmt *binfmt; @@ -401,6 +435,10 @@ int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; +#ifdef CONFIG_RTSCHED + int effprio; /* effective real time priority */ + void (*newprio)(struct task_struct*, int); +#endif /* Thread group tracking */ u32 parent_exec_id; @@ -518,11 +556,22 @@ extern struct mm_struct init_mm; extern struct task_struct *init_tasks[NR_CPUS]; +/* + * A pid hash function using a prime near golden + * ratio to the machine word size (32 bits). The + * results of this are unknown. + * + * Added shift to extract high-order bits of computed + * hash function. + * -- wli + */ + /* PID hashing. (shouldnt this be dynamic?) */ #define PIDHASH_SZ (4096 >> 2) +#define PIDHASH_BITS 10 extern struct task_struct *pidhash[PIDHASH_SZ]; - -#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) +#define pid_hashfn(x) \ + (((2654435761UL*(x)) >> (BITS_PER_LONG-PIDHASH_BITS)) & (PIDHASH_SZ-1)) static inline void hash_pid(struct task_struct *p) { @@ -875,10 +924,16 @@ static inline void del_from_runqueue(struct task_struct * p) { +#ifdef CONFIG_RTSCHED +extern void __del_from_runqueue(struct task_struct * p); + + __del_from_runqueue(p); +#else nr_running--; p->sleep_time = jiffies; list_del(&p->run_list); p->run_list.next = NULL; +#endif } static inline int task_on_runqueue(struct task_struct *p) @@ -926,6 +981,11 @@ mntput(rootmnt); return res; } + +#define _TASK_STRUCT_DEFINED +#include +#include +#include #endif /* __KERNEL__ */ diff -urN linux-2.4.17-rc2-virgin/include/linux/segment_tree.h linux-2.4.17-rc2-wli2/include/linux/segment_tree.h --- linux-2.4.17-rc2-virgin/include/linux/segment_tree.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/segment_tree.h Thu Dec 20 18:56:26 2001 @@ -0,0 +1,362 @@ +/* + * linux/include/linux/segment_tree.h + * + * Copyright (C) Oct 2001 William Irwin, IBM + * + * Implementation of segment trees augmented with length information. + * + * In this context, "segment" refers to "line segment". In particular, + * I am storing closed intervals of numbers in this tree. One very + * important invariant maintained is that all the intervals in the + * tree are disjoint. This fact is actually used to help with efficient + * search, because since they are all disjoint, they are ordered + * according to any representative, in particular, the starting and + * ending points. + * + * The separate tree on length is used to help with searches for + * intervals of at least a particular length, and does not have + * any special properties otherwise. + */ + +#ifndef _SEGMENT_TREE_H +#define _SEGMENT_TREE_H + +#include +#include + +typedef struct segment_tree_node { + treap_node_t start; + treap_node_t length; +} segment_tree_node_t; + +typedef union segment_buf { + segment_tree_node_t segment; + union segment_buf *next; +} segment_buf_t; + +typedef struct segment_tree_root { + treap_node_t *start_tree; + treap_node_t *length_tree; +} segment_tree_root_t; + +#define segment_length(node) ((node)->length.value) +#define segment_start(node) ((node)->start.value) +#define segment_end(node) ((node)->start.value + (node)->length.value - 1) + +#define segment_above_point(node, point) \ + (segment_end(node) > (point)) + +#define segment_below_point(node, point) \ + (segment_start(node) < (point)) + +#define segment_contains_point(node, point) \ + (segment_start(node) <= (point) && segment_end(node) >= (point)) + +#define segment_above(node1, node2) \ + (segment_start(node1) > segment_end(node2)) + +#define segment_below(node1, node2) \ + (segment_end(node1) < segment_start(node2)) + +#define segment_disjoint(node1, node2) \ + (segment_above(node1, node2) || segment_below(node1, node2)) + +#define segment_intersect(node1, node2) \ + (segment_start(node1) <= segment_end(node2) \ + && segment_start(node2) <= segment_end(node1)) + +#define segment_contains(node1, node2) \ + (segment_start(node1) <= segment_start(node2) \ + && segment_end(node1) >= segment_end(node2)) + +#define segment_set_endpoints(node, start, end) \ + do { \ + segment_length(node) = (end) - (start) + 1; \ + segment_start(node) = (start); \ + } while(0) + +#define segment_unite(node1, node2) \ + segment_set_endpoints(node1, \ + min(segment_start(node1),segment_start(node2)), \ + max(segment_end(node1), segment_end(node2))) + +#define segment_union(seg_union, node1, node2) \ + segment_set_endpoints(seg_union, \ + min(segment_start(node1),segment_start(node2)), \ + max(segment_end(node1), segment_end(node2))) + +#define segment_intersection(intersect, node1, node2) \ + segment_set_endpoints(intersect, \ + max(segment_start(node1), segment_start(node2)), \ + min(segment_end(node1), segment_end(node2))) + +#define segment_set_start(node, start) \ + segment_set_endpoints(node, start, segment_end(node)) + +#define segment_set_end(node, end) \ + segment_set_endpoints(node, segment_start(node), end) + +#define start_segment_treap(node) \ + treap_entry((node), segment_tree_node_t, start) +#define length_segment_treap(node) \ + treap_entry((node), segment_tree_node_t, length) + +#define start_treap(node) segment_start(start_segment_treap(node)) +#define end_treap(node) segment_end(start_segment_treap(node)) + +static inline unsigned segment_tree_contains_point(segment_tree_node_t *root, + unsigned long point) +{ + treap_node_t *node; + + if(!root) + return 0; + + node = &root->start; + while(node) { + if(segment_contains_point(start_segment_treap(node), point)) + return 1; + else if(segment_below_point(start_segment_treap(node), point)) + node = node->right; + else if(segment_above_point(start_segment_treap(node), point)) + node = node->left; + else + BUG(); + } + return 0; +} + +static inline unsigned segment_tree_intersects(segment_tree_node_t *root, + segment_tree_node_t *segment) +{ + treap_node_t *node; + + if(!root) + return 0; + + node = &root->start; + while(node) { + if(segment_intersect(start_segment_treap(node), segment)) + return 1; + else if(segment_below(start_segment_treap(node), segment)) + node = node->right; + else if(segment_above(start_segment_treap(node), segment)) + node = node->left; + else + BUG(); + } + return 0; +} + +/* + * There are five cases here. + * (1) the segments are disjoint + * (2) the entire segment is removed + * (3) something from the beginning of the segment is removed + * (4) something from the end of the segment is removed + * (5) the segment is split into two fragments + */ +static inline void segment_complement( segment_tree_node_t **segment, + segment_tree_node_t *to_remove, + segment_tree_node_t **fragment) +{ + + if(segment_disjoint(*segment, to_remove)) { + + *fragment = NULL; + + } else if(segment_contains(to_remove, *segment)) { + + *segment = *fragment = NULL; + + } else if(segment_start(*segment) >= segment_start(to_remove)) { + unsigned long start, end; + *fragment = NULL; + start = segment_end(to_remove) + 1; + end = segment_end(*segment); + segment_set_endpoints(*segment, start, end); + + } else if(segment_end(*segment) <= segment_end(to_remove)) { + unsigned long start, end; + *fragment = NULL; + start = segment_start(*segment); + end = segment_start(to_remove) - 1; + segment_set_endpoints(*segment, start, end); + + } else { + unsigned long start_seg, end_seg, start_frag, end_frag; + + start_seg = segment_start(*segment); + end_seg = segment_start(to_remove) - 1; + + start_frag = segment_end(to_remove) + 1; + end_frag = segment_end(*segment); + + segment_set_endpoints(*segment, start_seg, end_seg); + segment_set_endpoints(*fragment, start_frag, end_frag); + + } +} + +/* + * Efficiently determining all possible line segments which intersect + * with another line segment requires splitting the start treap according + * to the endpoints. This is a derived key so it unfortunately may not be + * shared with the generic treap implementation. + */ +static inline void segment_end_split(treap_root_t root, unsigned long end, + treap_root_t less, treap_root_t more) +{ + treap_root_t tree = root; + treap_node_t sentinel; + + sentinel.value = end; + sentinel.priority = ULONG_MAX; + sentinel.left = sentinel.right = sentinel.parent = NULL; + + while(1) { + if(!*root) { + *root = &sentinel; + goto finish; + } else if(end > end_treap(*root) && !(*root)->right) { + (*root)->right = &sentinel; + sentinel.parent = *root; + root = &(*root)->right; + goto upward; + } else if(end <= end_treap(*root) && !(*root)->left) { + (*root)->left = &sentinel; + sentinel.parent = *root; + root = &(*root)->left; + goto upward; + } else if(end > end_treap(*root)) + root = &(*root)->right; + else /* end <= end_treap(*root) */ + root = &(*root)->left; + } + +upward: + + while(1) { + if((*root)->left && (*root)->left->priority > (*root)->priority) + treap_rotate_right(root); + else if((*root)->right + && (*root)->right->priority > (*root)->priority) + treap_rotate_left(root); + + if(!(*root)->parent) + goto finish; + else if(!(*root)->parent->parent) + root = tree; + else if((*root)->parent->parent->left == (*root)->parent) + root = &(*root)->parent->parent->left; + else if((*root)->parent->parent->right == (*root)->parent) + root = &(*root)->parent->parent->right; + } + +finish: + *less = (*root)->left; + *more = (*root)->right; + + if(*less) (*less)->parent = NULL; + if(*more) (*more)->parent = NULL; + + *root = NULL; +} + +#define segment_length_link(node) \ + treap_node_link(&start_segment_treap(node)->length) + +#define segment_start_link(node) \ + treap_node_link(&start_segment_treap(node)->start) + +#define segment_delete(node) \ + do { \ + treap_root_delete(segment_start_link(node)); \ + treap_root_delete(segment_length_link(node)); \ + } while(0) + +static inline void segment_all_intersect(treap_root_t root, + unsigned long start, + unsigned long end, + treap_root_t intersect) +{ + treap_node_t *less_end, *more_end, *more_start, *less_start; + less_start = more_start = NULL; + + if(start) { + less_end = more_end = NULL; + segment_end_split(root, start, &less_end, &more_end); + treap_split(&more_end, end + 1, &less_start, &more_start); + *root = NULL; + treap_join(root, &less_end, &more_start); + } else { + treap_split(root, end + 1, &less_start, &more_start); + *root = more_start; + } + *intersect = less_start; +} + +#if 0 +/* + * If for some reason there is a reason to visualize the trees, + * the following routines may be useful examples as to how they + * may be rendered using dot from AT&T's graphviz. + */ + +extern void early_printk(const char *fmt, ...); + +static void print_ptr_graph(treap_root_t root) { + if(!*root) + return; + else if(!(*root)->marker) { + segment_tree_node_t *seg = start_segment_treap(*root); + (*root)->marker = 1UL; + early_printk("x%p [label=\"%p, start=%lu,\\nlength=%lu\"];\n", + *root, *root, segment_start(seg), segment_length(seg)); + if((*root)->parent) + early_printk("x%p -> x%p [label=\"parent\"];\n", + *root, (*root)->parent); + if((*root)->left) + early_printk("x%p -> x%p [label=\"left\"];\n", + *root, (*root)->left); + if((*root)->right) + early_printk("x%p -> x%p [label=\"right\"];\n", + *root, (*root)->right); + + print_ptr_graph(&(*root)->parent); + print_ptr_graph(&(*root)->left); + print_ptr_graph(&(*root)->right); + (*root)->marker = 0UL; + } + /* + * This is no good for cycle detection since we also traverse + * the parent links. It's -very- cyclic with those. + */ +} +static void print_length_graph(treap_root_t root) { + if(!*root) + return; + else if(!(*root)->marker) { + segment_tree_node_t *seg = length_segment_treap(*root); + (*root)->marker = 1UL; + early_printk("x%p [label=\"%p: start=%lu,\\nlength=%lu\"];\n", + *root, *root, segment_start(seg), segment_length(seg)); + if((*root)->parent) + early_printk("x%p -> x%p [label=\"parent\"];\n", + *root, (*root)->parent); + if((*root)->left) + early_printk("x%p -> x%p [label=\"left\"];\n", + *root, (*root)->left); + if((*root)->right) + early_printk("x%p -> x%p [label=\"right\"];\n", + *root, (*root)->right); + + print_length_graph(&(*root)->parent); + print_length_graph(&(*root)->left); + print_length_graph(&(*root)->right); + (*root)->marker = 0UL; + } +} +#endif + +#endif /* _SEGMENT_TREE_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/smp.h linux-2.4.17-rc2-wli2/include/linux/smp.h --- linux-2.4.17-rc2-virgin/include/linux/smp.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/smp.h Thu Dec 20 18:55:14 2001 @@ -81,7 +81,9 @@ #define smp_processor_id() 0 #define hard_smp_processor_id() 0 #define smp_threads_ready 1 +#ifndef CONFIG_PREEMPT #define kernel_lock() +#endif #define cpu_logical_map(cpu) 0 #define cpu_number_map(cpu) 0 #define smp_call_function(func,info,retry,wait) ({ 0; }) diff -urN linux-2.4.17-rc2-virgin/include/linux/smp_lock.h linux-2.4.17-rc2-wli2/include/linux/smp_lock.h --- linux-2.4.17-rc2-virgin/include/linux/smp_lock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/smp_lock.h Thu Dec 20 18:55:14 2001 @@ -3,7 +3,7 @@ #include -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) diff -urN linux-2.4.17-rc2-virgin/include/linux/spinlock.h linux-2.4.17-rc2-wli2/include/linux/spinlock.h --- linux-2.4.17-rc2-virgin/include/linux/spinlock.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/spinlock.h Thu Dec 20 18:55:13 2001 @@ -2,6 +2,7 @@ #define __LINUX_SPINLOCK_H #include +#include /* * These are the generic versions of the spinlocks and read-write @@ -45,8 +46,10 @@ #if (DEBUG_SPINLOCKS < 1) +#ifndef CONFIG_PREEMPT #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) #define ATOMIC_DEC_AND_LOCK +#endif /* * Your basic spinlocks, allowing only a single CPU anywhere @@ -62,11 +65,11 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) +#define _raw_spin_trylock(lock) ({1; }) #define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) +#define _raw_spin_unlock(lock) do { } while(0) #elif (DEBUG_SPINLOCKS < 2) @@ -125,12 +128,76 @@ #endif #define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) +#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_read_unlock(lock) do { } while(0) +#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_write_unlock(lock) do { } while(0) #endif /* !SMP */ + +#ifdef CONFIG_PREEMPT + +#define preempt_is_disabled() (current->preempt_count) +#define preempt_prefetch(a) prefetchw(a) + +#define preempt_disable() \ +do { \ + ++current->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched() \ +do { \ + --current->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable() \ +do { \ + --current->preempt_count; \ + barrier(); \ + if (unlikely((current->preempt_count == 0) && current->need_resched)) \ + preempt_schedule(); \ +} while (0) + +#define spin_lock(lock) \ +do { \ + preempt_disable(); \ + _raw_spin_lock(lock); \ +} while(0) +#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) +#define spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ + preempt_enable(); \ +} while (0) + +#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);}) +#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();}) +#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);}) +#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();}) +#define write_trylock(lock) ({preempt_disable(); _raw_write_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) + +#else + +#define preempt_is_disabled() do { } while (0) +#define preempt_disable() do { } while (0) +#define preempt_enable_no_resched() +#define preempt_enable() do { } while (0) +#define preempt_prefetch(a) + +#define spin_lock(lock) _raw_spin_lock(lock) +#define spin_trylock(lock) _raw_spin_trylock(lock) +#define spin_unlock(lock) _raw_spin_unlock(lock) + +#define read_lock(lock) _raw_read_lock(lock) +#define read_unlock(lock) _raw_read_unlock(lock) +#define write_lock(lock) _raw_write_lock(lock) +#define write_unlock(lock) _raw_write_unlock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) +#endif /* "lock on reference count zero" */ #ifndef ATOMIC_DEC_AND_LOCK diff -urN linux-2.4.17-rc2-virgin/include/linux/swap.h linux-2.4.17-rc2-wli2/include/linux/swap.h --- linux-2.4.17-rc2-virgin/include/linux/swap.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/swap.h Thu Dec 20 18:55:14 2001 @@ -86,8 +86,8 @@ extern unsigned int nr_free_pages(void); extern unsigned int nr_free_buffer_pages(void); extern int nr_active_pages; -extern int nr_inactive_pages; -extern atomic_t nr_async_pages; +extern int nr_inactive_dirty_pages; +extern int nr_inactive_clean_pages; extern atomic_t page_cache_size; extern atomic_t buffermem_pages; extern spinlock_t pagecache_lock; @@ -100,18 +100,42 @@ struct zone_t; +/* linux/mm/rmap.c */ +extern int FASTCALL(page_referenced(struct page *)); +extern void FASTCALL(page_add_rmap(struct page *, pte_t *)); +extern void FASTCALL(page_remove_rmap(struct page *, pte_t *)); +extern int FASTCALL(try_to_unmap(struct page *)); + +/* try_to_unmap return values */ +#define SWAP_SUCCESS 0 +#define SWAP_AGAIN 1 +#define SWAP_FAIL 2 +#define SWAP_ERROR 3 + /* linux/mm/swap.c */ +extern int total_swap_pages; extern void FASTCALL(lru_cache_add(struct page *)); extern void FASTCALL(__lru_cache_del(struct page *)); extern void FASTCALL(lru_cache_del(struct page *)); extern void FASTCALL(activate_page(struct page *)); +extern void FASTCALL(activate_page_nolock(struct page *)); +extern void FASTCALL(deactivate_page(struct page *)); +extern void FASTCALL(deactivate_page_nolock(struct page *)); extern void swap_setup(void); /* linux/mm/vmscan.c */ +extern struct page * FASTCALL(reclaim_page(zone_t *)); extern wait_queue_head_t kswapd_wait; -extern int FASTCALL(try_to_free_pages(zone_t *, unsigned int, unsigned int)); +extern int FASTCALL(try_to_free_pages(unsigned int gfp_mask)); +extern void wakeup_kswapd(void); +extern int free_shortage(void); +extern int total_free_shortage(void); +extern int inactive_shortage(void); +extern int total_inactive_shortage(void); +extern unsigned int zone_free_shortage(zone_t *zone); +extern unsigned int zone_inactive_shortage(zone_t *zone); /* linux/mm/page_io.c */ extern void rw_swap_page(int, struct page *); @@ -125,6 +149,7 @@ extern void show_swap_cache_info(void); #endif extern int add_to_swap_cache(struct page *, swp_entry_t); +extern int add_to_swap(struct page *); extern void __delete_from_swap_cache(struct page *page); extern void delete_from_swap_cache(struct page *page); extern void free_page_and_swap_cache(struct page *page); @@ -158,7 +183,14 @@ extern spinlock_t pagemap_lru_lock; -extern void FASTCALL(mark_page_accessed(struct page *)); +/* + * Page aging defines. These seem to work great in FreeBSD, + * no need to reinvent the wheel. + */ +#define PAGE_AGE_START 5 +#define PAGE_AGE_ADV 3 +#define PAGE_AGE_DECL 1 +#define PAGE_AGE_MAX 64 /* * List add/del helper macros. These must be called @@ -166,39 +198,60 @@ */ #define DEBUG_LRU_PAGE(page) \ do { \ - if (!PageLRU(page)) \ - BUG(); \ if (PageActive(page)) \ BUG(); \ + if (PageInactiveDirty(page)) \ + BUG(); \ + if (PageInactiveClean(page)) \ + BUG(); \ } while (0) -#define add_page_to_active_list(page) \ -do { \ - DEBUG_LRU_PAGE(page); \ - SetPageActive(page); \ - list_add(&(page)->lru, &active_list); \ - nr_active_pages++; \ -} while (0) - -#define add_page_to_inactive_list(page) \ -do { \ - DEBUG_LRU_PAGE(page); \ - list_add(&(page)->lru, &inactive_list); \ - nr_inactive_pages++; \ -} while (0) - -#define del_page_from_active_list(page) \ -do { \ - list_del(&(page)->lru); \ - ClearPageActive(page); \ - nr_active_pages--; \ -} while (0) +#define add_page_to_active_list(page) { \ + DEBUG_LRU_PAGE(page); \ + SetPageActive(page); \ + list_add(&(page)->lru, &active_list); \ + nr_active_pages++; \ +} + +#define add_page_to_inactive_dirty_list(page) { \ + DEBUG_LRU_PAGE(page); \ + SetPageInactiveDirty(page); \ + list_add(&(page)->lru, &inactive_dirty_list); \ + nr_inactive_dirty_pages++; \ + PageZone(page)->inactive_dirty_pages++; \ +} + +#define add_page_to_inactive_clean_list(page) { \ + DEBUG_LRU_PAGE(page); \ + SetPageInactiveClean(page); \ + list_add(&(page)->lru, &PageZone(page)->inactive_clean_list); \ + PageZone(page)->inactive_clean_pages++; \ + nr_inactive_clean_pages++; \ +} + +#define del_page_from_active_list(page) { \ + list_del(&(page)->lru); \ + ClearPageActive(page); \ + nr_active_pages--; \ + DEBUG_LRU_PAGE(page); \ +} + +#define del_page_from_inactive_dirty_list(page) { \ + list_del(&(page)->lru); \ + ClearPageInactiveDirty(page); \ + nr_inactive_dirty_pages--; \ + PageZone(page)->inactive_dirty_pages--; \ + DEBUG_LRU_PAGE(page); \ +} + +#define del_page_from_inactive_clean_list(page) { \ + list_del(&(page)->lru); \ + ClearPageInactiveClean(page); \ + PageZone(page)->inactive_clean_pages--; \ + nr_inactive_clean_pages--; \ + DEBUG_LRU_PAGE(page); \ +} -#define del_page_from_inactive_list(page) \ -do { \ - list_del(&(page)->lru); \ - nr_inactive_pages--; \ -} while (0) extern spinlock_t swaplock; diff -urN linux-2.4.17-rc2-virgin/include/linux/swapctl.h linux-2.4.17-rc2-wli2/include/linux/swapctl.h --- linux-2.4.17-rc2-virgin/include/linux/swapctl.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/swapctl.h Tue Dec 18 22:28:42 2001 @@ -10,4 +10,13 @@ typedef pager_daemon_v1 pager_daemon_t; extern pager_daemon_t pager_daemon; +typedef struct freepages_v1 +{ + unsigned int min; + unsigned int low; + unsigned int high; +} freepages_v1; +typedef freepages_v1 freepages_t; +extern freepages_t freepages; + #endif /* _LINUX_SWAPCTL_H */ diff -urN linux-2.4.17-rc2-virgin/include/linux/sysctl.h linux-2.4.17-rc2-wli2/include/linux/sysctl.h --- linux-2.4.17-rc2-virgin/include/linux/sysctl.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/sysctl.h Thu Dec 20 18:55:18 2001 @@ -124,6 +124,7 @@ KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ KERN_TAINTED=53, /* int: various kernel tainted flags */ KERN_CADPID=54, /* int: PID of the process to notify on CAD */ + KERN_KDB=55, /* int: kdb on/off */ }; @@ -140,6 +141,7 @@ VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */ VM_PGT_CACHE=9, /* struct: Set page table cache parameters */ VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ + VM_MAX_MAP_COUNT=11, /* int: Maximum number of active map areas */ VM_MIN_READAHEAD=12, /* Min file readahead */ VM_MAX_READAHEAD=13 /* Max file readahead */ }; diff -urN linux-2.4.17-rc2-virgin/include/linux/tqueue.h linux-2.4.17-rc2-wli2/include/linux/tqueue.h --- linux-2.4.17-rc2-virgin/include/linux/tqueue.h Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/include/linux/tqueue.h Thu Dec 20 18:55:13 2001 @@ -94,6 +94,22 @@ extern spinlock_t tqueue_lock; /* + * Call all "bottom halfs" on a given list. + */ + +extern void __run_task_queue(task_queue *list); + +static inline void run_task_queue(task_queue *list) +{ + if (TQ_ACTIVE(*list)) + __run_task_queue(list); +} + +#endif /* _LINUX_TQUEUE_H */ + +#if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_TQUEUE_H_INLINES +/* * Queue a task on a tq. Return non-zero if it was successfully * added. */ @@ -109,17 +125,4 @@ } return ret; } - -/* - * Call all "bottom halfs" on a given list. - */ - -extern void __run_task_queue(task_queue *list); - -static inline void run_task_queue(task_queue *list) -{ - if (TQ_ACTIVE(*list)) - __run_task_queue(list); -} - -#endif /* _LINUX_TQUEUE_H */ +#endif diff -urN linux-2.4.17-rc2-virgin/include/linux/treap.h linux-2.4.17-rc2-wli2/include/linux/treap.h --- linux-2.4.17-rc2-virgin/include/linux/treap.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/include/linux/treap.h Thu Dec 20 18:56:26 2001 @@ -0,0 +1,300 @@ +/* + * linux/include/linux/treap.h + * + * Copyright (C) 2001 William Irwin, IBM + * + * Simple treap implementation, following Aragon and Seidel. + * + * Treaps are a simple binary search tree structure, with a twist that + * radically simplifies their management. That is that they keep both + * the search key and a randomly generated priority. They are then both + * heap-ordered according to the priority and binary search tree ordered + * according to the search keys. They are specifically designed for, and + * also reputed to be effective at range tree and segment tree structures + * according to both Knuth and dynamic sets according to the + * Blelloch/Reid-Miller paper. + * + * The rotations themselves are simple, and they are done less often + * than for some kinds of trees, where splay trees where specifically + * mentioned by Knuth. The decision process as to when to perform a + * rotation is simplified by the heap structure. Rotations are done in + * two instances: when rotating a node down to a leaf position before + * deletion, and in restoring the heap ordering after an insertion. + * + * Treaps also support fast splitting and joining operations, which + * make them convenient for interval searches. + * + * One important fact to observe is that when joining, all of the + * members of the left tree must be less than all the members of + * the right tree, or otherwise the search tree ordering breaks. + */ + +#ifndef _TREAP_H +#define _TREAP_H + +#include + +typedef struct treap_node { + unsigned long priority; + unsigned long value; + struct treap_node *left, *right, *parent; + unsigned long marker; +} treap_node_t; + +typedef treap_node_t **treap_root_t; + +#define TREAP_INIT(root) \ + do { \ + *root = NULL; \ + } while(0) + +#define treap_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +#define treap_node_link(node) \ + ((!(node) || !(node)->parent) ? NULL : \ + ((node) == (node)->parent->left) ? &(node)->parent->left \ + : &(node)->parent->right) + +#define treap_find_parent_and_remove_child(tmp, parent) \ + do { \ + parent = tmp->parent; \ + if(parent && parent->left == tmp) \ + parent->left = NULL; \ + else if(parent && parent->right == tmp) \ + parent->right = NULL; \ + else if(parent) \ + BUG(); \ + } while(0) + + +#define treap_find_leftmost_leaf(node) \ + do { \ + if(!node) \ + break; \ + while(1) { \ + if(node->left) \ + node = node->left; \ + else if(node->right) \ + node = node->right; \ + else \ + break; \ + } \ + } while(0) + +/* + * The diagram according to which the assignments in rotation are done: + * + * T T + * | | + * y <- left x + * / \ / \ + * x C right -> A y + * / \ / \ + * A B B C + * + * Some of these assignments are not necessary, as the edges do + * not change. In these cases the assignments are retained as comments. + */ + +static inline void treap_rotate_left(treap_root_t root) +{ + treap_node_t *x, *y, *B, *T; + /* treap_node_t *A, *C; */ + + if(*root) { + x = *root; + T = x->parent; + y = x->right; + if(y) { + if(T && T->left == x) T->left = y; + if(T && T->right == x) T->right = y; + + y->parent = T; + *root = y; + + /* A = x->left; */ + + B = y->left; + + /* C = y->right; */ + + y->left = x; + x->parent = y; + + /* + x->left = A; + if(A) A->parent = x; + */ + + x->right = B; + if(B) B->parent = x; + + /* + y->right = C; + if(C) C->parent = y; + */ + } + } +} + +static inline void treap_rotate_right(treap_root_t root) +{ + treap_node_t *x, *y, *B, *T; + /* treap_node_t *A, *C; */ + + if(*root) { + y = *root; + T = y->parent; + x = y->left; + if(x) { + if(T && T->left == y) T->left = x; + if(T && T->right == y) T->right = x; + + x->parent = T; + *root = x; + + /* A = x->left; */ + + B = x->right; + + /* C = y->right; */ + + x->right = y; + y->parent = x; + + /* + x->left = A; + if(A) A->parent = x; + */ + + y->left = B; + if(B) B->parent = y; + + /* + y->right = C; + if(C) C->parent = y; + */ + } + } +} + +static inline treap_node_t *treap_root_delete(treap_root_t root) +{ + struct treap_node *tmp; + + while(1) { + + if(!root || !*root) return NULL; + else if(!(*root)->left && !(*root)->right) { + tmp = *root; + *root = tmp->parent = NULL; + return tmp; + } else if(!(*root)->left) { + treap_rotate_left(root); + root = &(*root)->left; + } else if(!(*root)->right) { + treap_rotate_right(root); + root = &(*root)->right; + } else if((*root)->left->priority > (*root)->right->priority) { + treap_rotate_right(root); + root = &(*root)->right; + } else { + treap_rotate_left(root); + root = &(*root)->left; + } + } +} + +static inline void treap_insert(treap_root_t root, treap_node_t *node) +{ + treap_root_t tree = root; + node->left = node->right = node->parent = NULL; + + while(1) { + if(!*root) { + *root = node; + return; + } else if(node->value <= (*root)->value && !(*root)->left) { + (*root)->left = node; + node->parent = *root; + root = &(*root)->left; + break; + } else if(node->value > (*root)->value && !(*root)->right) { + (*root)->right = node; + node->parent = *root; + root = &(*root)->right; + break; + } else if(node->value <= (*root)->value) { + root = &(*root)->left; + } else { /* node->value > (*root)->value */ + root = &(*root)->right; + } + } + while(1) { + if(!*root) return; + else if((*root)->left + && (*root)->left->priority > (*root)->priority) + treap_rotate_right(root); + else if((*root)->right + && (*root)->right->priority > (*root)->priority) + treap_rotate_left(root); + + if(!(*root)->parent) + return; + else if(!(*root)->parent->parent) + root = tree; + else if((*root)->parent == (*root)->parent->parent->left) + root = &(*root)->parent->parent->left; + else if((*root)->parent == (*root)->parent->parent->right) + root = &(*root)->parent->parent->right; + + } +} + +static inline treap_node_t *treap_delete(treap_root_t root, unsigned long k) +{ + while(1) { + if(!*root) return NULL; + else if(k < (*root)->value) root = &(*root)->left; + else if(k > (*root)->value) root = &(*root)->right; + else return treap_root_delete(root); + } +} + +static inline void treap_split(treap_root_t root, unsigned long k, + treap_root_t less, treap_root_t more) +{ + treap_node_t sentinel; + + sentinel.value = k; + sentinel.priority = ULONG_MAX; + sentinel.parent = sentinel.left = sentinel.right = NULL; + + treap_insert(root, &sentinel); + *less = (*root)->left; + *more = (*root)->right; + + if(*less) (*less)->parent = NULL; + if(*more) (*more)->parent = NULL; + + *root = NULL; +} + +static inline void treap_join(treap_root_t root, + treap_root_t left, treap_root_t right) +{ + treap_node_t sentinel; + sentinel.priority = 0UL; + sentinel.left = *left; + sentinel.right = *right; + sentinel.parent = NULL; + + if(*left) (*left)->parent = &sentinel; + if(*right) (*right)->parent = &sentinel; + + *root = &sentinel; + treap_root_delete(root); +} + +#endif /* _TREAP_H */ diff -urN linux-2.4.17-rc2-virgin/include/net/af_unix.h linux-2.4.17-rc2-wli2/include/net/af_unix.h --- linux-2.4.17-rc2-virgin/include/net/af_unix.h Mon Apr 24 13:43:04 2000 +++ linux-2.4.17-rc2-wli2/include/net/af_unix.h Wed Dec 19 12:31:47 2001 @@ -6,7 +6,7 @@ typedef struct sock unix_socket; extern void unix_gc(void); -#define UNIX_HASH_SIZE 256 +#define UNIX_HASH_SIZE ((PAGE_SIZE/2)/sizeof(unix_socket *)) extern unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; extern rwlock_t unix_table_lock; diff -urN linux-2.4.17-rc2-virgin/init/main.c linux-2.4.17-rc2-wli2/init/main.c --- linux-2.4.17-rc2-virgin/init/main.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/init/main.c Tue Dec 18 22:25:46 2001 @@ -69,6 +69,10 @@ #include #endif +#if defined(CONFIG_KDB) +#include +#endif + /* * Versions of gcc older than that listed below may actually compile * and link okay, but the end product can have subtle run time bugs. @@ -445,6 +449,34 @@ } if (next != NULL) *next++ = 0; +#if defined(CONFIG_KDB) + /* kdb, kdb=on, kdb=off, kdb=early */ + if (strncmp(line, "kdb", 3) == 0) { + if (line[3] == '\0') { + /* Backward compatibility, kdb with no option means early activation */ + printk("Boot flag kdb with no options is obsolete, use kdb=early\n"); + kdb_on = 1; + kdb_flags |= KDB_FLAG_EARLYKDB; + continue; + } + if (line[3] == '=') { + if (strcmp(line+4, "on") == 0) { + kdb_on = 1; + continue; + } + if (strcmp(line+4, "off") == 0) { + kdb_on = 0; + continue; + } + if (strcmp(line+4, "early") == 0) { + kdb_on = 1; + kdb_flags |= KDB_FLAG_EARLYKDB; + continue; + } + printk("Boot flag %s not recognised, assumed to be environment variable\n", line); + } + } +#endif if (!strncmp(line,"init=",5)) { line += 5; execute_command = line; @@ -591,6 +623,12 @@ #endif mem_init(); kmem_cache_sizes_init(); +#if defined(CONFIG_KDB) + kdb_init(); + if (KDB_FLAG(EARLYKDB)) { + KDB_ENTER(); + } +#endif pgtable_cache_init(); mempages = num_physpages; diff -urN linux-2.4.17-rc2-virgin/kdb/ChangeLog linux-2.4.17-rc2-wli2/kdb/ChangeLog --- linux-2.4.17-rc2-virgin/kdb/ChangeLog Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/ChangeLog Tue Dec 18 22:21:49 2001 @@ -0,0 +1,428 @@ +2001-12-03 Keith Owens + + * Upgrade to 2.4.16. + * Add include/asm-um/kdb.h stub to allow XFS to be tested under UML. + * Check if an interrupt frame on i386 came from user space. + * Out of scope bug fix in kdb_id.c. Ethan Solomita. + * Changes to common code to support sparc64. Ethan Solomita. + * Change GFP_KERNEL to GFP_ATOMIC in disasm. Ethan Solomita. + +2001-11-16 Keith Owens + + * Upgrade to 2.4.15-pre5. + * Wrap () around #define expressions with unary operators. + +2001-11-13 Keith Owens + + * Upgrade to 2.4.15-pre4. + * kbdm_pg.c patch from Hugh Dickins. + +2001-11-07 Keith Owens + + * Upgrade to 2.4.14-ia64-011105. + * Change name of l1 serial I/O routine, add ia64 init command. SGI. + * Sync kdbm_pg with XFS. + +2001-11-06 Keith Owens + + * Upgrade to kernel 2.4.14. + +2001-11-02 Keith Owens + + * Sync kdbm_pg.c with XFS. + +2001-10-24 Keith Owens + + * Upgrade to kernel 2.4.13. + +2001-10-14 Keith Owens + + * More use of TMPPREFIX in top level Makefile to speed up NFS compiles. + + * Correct repeat calculations in md/mds commands. + +2001-10-10 Keith Owens + + * Copy bfd.h and ansidecl.h to arch/$(ARCH)/kdb, remove dependecies on + user space includes. + + * Update kdb v1.9 to kernel 2.4.11. + +2001-10-01 Keith Owens + + * Update kdb v1.9 to kernel 2.4.11-pre1 and 2.4.10-ac1. + + * Correct loop in kdb_parse, reported by Tachino Nobuhiro. + +2001-09-25 Keith Owens + + * Update kdb v1.8 to kernel 2.4.10. + + * kdbm_pg patch from Hugh Dickens. + + * DProbes patch from Bharata B Rao. + + * mdWcn and mmW patch from Vamsi Krishna S. + + * i386 disasm layout patch from Jean-Marc Saffroy. + + * Work around for 64 bit binutils, Simon Munton. + + * kdb.mm doc correction by Chris Pascoe. + + * Enter repeats the last command, IA64 disasm only prints one + instruction. Don Dugger. + + * Allow kdb/modules to be linked into vmlinux. + + * Remove obsolete code from kdb/modules/kdbm_{pg,vm}.c. + + * Warn when commands are entered at more prompt. + + * Add MODULE_AUTHOR, DESCRIPTION, LICENSE. + + * Release as kdb v1.9. + +2001-02-27 Keith Owens + + * Update kdb v1.8 to kernel 2.4.2, sync kdb/modules with XFS. + + * Hook into panic() call. + +2000-12-18 Keith Owens + + * Update kdb v1.7 to kernel 2.4.0-test13-pre3, sync kdb/modules with + XFS. + +2000-11-18 Keith Owens + + * Update to kernel 2.4.0-test11-pre7, including forward port of + bug fixes from WIP 2.4.0-test9 tree. + + * Update to Cygnus CVS trees for disassembly code. + + * Bump to kdb v1.6. + +2000-10-19 Keith Owens + + * Update to kernel 2.4.0-test10-pre4. + +2000-10-15 Keith Owens + + * kdb/kdbmain.c (kdb_parse): Correctly handle blank input. + + * kdb/kdbmain.c (kdb_local, kdb): Reason SILENT can have NULL ef. + +2000-10-13 Keith Owens + + * kdb/kdbmain.c: Reduce CMD_LEN to avoid overflowing kdb_printf buffer. + +2000-10-11 Keith Owens + + * kdb/kdbmain.c (kdb): Test for userspace breakpoints before driving + other cpus into kdb. Speeds up gdb and avoids SMP race. + + * arch/i386/kdb/kdba_io.c (get_serial_char, get_kbd_char): Ignore + unprintable characters. + + * arch/i386/kdb/kdba_io.c (kdba_read): Better handling of buffer size. + +2000-10-04 Keith Owens + + * arch/i386/kdb/kdba_bt.c (kdba_bt_process): Verify that esp is inside + task_struct. Original patch by Mike Galbraith. + + * kdb/kdb_io.c (kdb_getstr): Reset output line counter, remove + unnecessary prompts. + + * arch/i386/kdb/kdbasupport.c (kdb_getregcontents): Change " cs" to + "xcs", ditto ss, ds, es. gdb2kdb does not like leading spaces. + + * include/asm-xxx/kdb.h: Add dummy kdb.h for all architectures except + ix86. This allows #include to appear in arch independent + code without causing compile errors. + + * kdb/modules/kdbm_pg: Sync with XFS. + +2000-10-03 Keith Owens + + * kdb/kdb_io.c (kdb_read): Ignore NMI while waiting for input. + + * kdb/kdb_io.c, kdb/Makefile: Export kdb_read. + +2000-10-02 Keith Owens + + * arch/i386/kernel/smpboot.c (do_boot_cpu): Set nmi_watchdog_source to 2 + to avoid premature NMI oops during cpu bring up. We have to assume that + a box with more than 1 cpu has a working IO-APIC. + + * Documentation/kdb/{kdb.mm,kdb_md.man}: Add mdr command. + + * kdb/kdbmain.c (kdb_md): Add mdr command. + + * Release as kdb v1.5 against 2.4.0-test9-pre8. + + * arch/i386/kdb/kdba_io.c, arch/i386/kdb/kdbasupport.c, kdb/kdbmain.c, + kdb/kdb_io.c, kdb/kdb_id.c: Remove zero initializers for static + variables. + +2000-09-28 Keith Owens + + * various: Add nmi_watchdog_source, 1 local APIC, 2 IO-APIC. + Test nmi_watchdog_source instead of nr_ioapics so UP works on SMP hardware. + + * arch/i386/kernel/io_apic.c: Rename setup_nmi to setup_nmi_io for clarity. + + * kdb/kdbmain.c (kdb_parse): Only set NO_WATCHDOG if it was already set. + + * kdb/kdbmain.c (kdb): Clear NO_WATCHDOG on all exit paths. + + * include/linux/kdb.h: Add KDB_REASON_SILENT. + + * kdb/kdbmain.c (kdb_local): Treat reason SILENT as immediate 'go'. + + * kdb/kdbmain.c (kdb_init): Invoke kdb with reason SILENT to instantiate + any breakpoints on boot cpu. + + * arch/i386/kernel/smpboot.c (smp_callin): Invoke kdb with reason SILENT + to instantiate any global breakpoints on this cpu. + + * kdb/kdb_cmds: Remove comment that said initial commands only worked on + boot cpu. + +2000-09-27 Keith Owens + + * arch/i386/kernel/msr.c: Move {rd,wr}msr_eio to include/asm-i386/apic.h. + + * include/asm-i386/apic.h: Define NMI interfaces. + + * kernel/sysctl.c (kern_table): + * kernel/sysctl.c (do_proc_set_nmi_watchdog): + Add /proc/sys/kernel/nmi_watchdog. + + * arch/i386/kernel/apic.c: New routines set_nmi_counter_local, + setup_apic_nmi_watchdog. + + * arch/i386/kernel/traps.c: New routine set_nmi_watchdog(). Call apic + routines to set/clear local apic timer. + +2000-09-26 Keith Owens + + * include/linux/sysctl.h (enum): Add NMI_WATCHDOG. + + * arch/i386/kernel/traps.c (nmi_watchdog_tick): Check nmi_watchdog is + still on. + + * arch/i386/config.in: Add CONFIG_UP_NMI_WATCHDOG. + + * Documentation/Configure.help: Add CONFIG_UP_NMI_WATCHDOG. + + * Documentation/nmi_watchdog.txt: Update for UP NMI watchdog. + +2000-09-25 Keith Owens + + * arch/i386/kernel/apic.c (init_apic_mappings): + * arch/i386/kernel/io_apic.c (IO_APIC_init_uniprocessor): + Merge Keir Fraser's local APIC for uniprocessors patch. + +2000-09-24 Keith Owens + + * Various: Declare initialization routines as __init. + + * Makefile: Define and export AWK. + + * kdb/Makefile: Generate gen-kdb_cmds.c from kdb/kdb_cmds. + + * kdb/kdbmain.c (kdb_init): Call new routine kdb_cmds_init to execute + whatever the user put in kdb/kdb_cmds. + + * arch/i386/kdb/kdba_bt.c (kdba_bt_stack): New parameter to + indicate if esp in regs is known to be valid or not. + + * kdb/kdb_bp.c, arch/i386/kdb/kdba_bp.c: More trace prints for + breakpoint handling. + + * arch/i386/kdb/kdba_bp.c (kdba_installbp): Finally found and fixed the + annoying breakpoint bug where breakpoints where not always installed + after 'go'. + + * Documentation/kdb: Update man pages kdb.mm, kdb_env.man, kdb_ss.man. + + * Released as kdb-v1.5-beta1-2.4.0-test8. + + * Sync to 2.4.0-test9-pre6 and release as kdb-v1.5-beta1-2.4.0-test9-pre6. + +2000-09-23 Keith Owens + + * arch/i386/kdb/kdbasupport.c (kdba_getregcontents): New pseudo + registers cesp and ceflags to help with debugging the debugger. + + * kdb/kdbmain.c (kdb_local, kdb): Add KDB_REASON_RECURSE. Add + environment variable RECURSE. Add code to cope with some types of + recursion. + + * kdb/kdbmain.c (kdb), arch/i386/kdba/kdba_bp.c: Add + kdba_clearsinglestep. + +2000-09-22 Keith Owens + + * drivers/video/vgacon.c (write_vga): No cli() if kdb is running, avoid + console deadlock. + + * arch/i386/kernel/irq.c (get_irqlock): Warn if kdb is running, may hang. + + * include/linux/kdb.h: Define KDB_IS_RUNNING as (0) if no CONFIG_KDB. + + * arch/i386/kdb/kdba_bt.c (kdba_bt_stack): Do not attempt a backtrace if + the code segment is not in the kernel. + + * kdb/modules: Change modules from MX_OBJS to M_OBJS. Remove EXPORT_NOSYMBOLS. + +2000-09-21 Keith Owens + + * arch/i386/kernel/i386_ksyms.c: Move EXPORT_SYMBOLS for kdb to kdb/kdbmain.c. + + * kdb/Makefile: Change kdb/kdbmain.o from O_OBJS to OX_OBJS. + + * arch/i386/kernel/smp.c: Remove some #ifdef CONFIG_KDB. Remove kdbprivate.h. + + * include/linux/kdb.h: Add kdb_print_state. Add KDB_STATE_WAIT_IPI. + + * kdb/kdbmain.c (kdb): Only mark cpu as leaving if it is in KDB state. Maintain + WAIT_IPI state so a cpu is only driven through NMI once. + + * arch/i386/kernel/smp.c (smp_kdb_stop): All state fiddling moved to kdb(). + +2000-09-20 Keith Owens + + * include/linux/kdb.h: #define kdb() as (0) if kdb is not configured. + + * arch/i386/kernel/traps.c: Remove some #ifdef CONFIG_KDB. + + * include/linux/kdbprivate.h: Move per cpu state to kdb.h. + + * include/linux/kdb.h: Add KDB_STATE_NO_WATCHDOG, KDB_STATE_PRINTF_LOCK. + Rename KDB_DEBUG_xxx to KDB_DEBUG_FLAG_xxx. Clean up debug flag + definitions. + + * arch/i386/kernel/traps.c (nmi_watchdog_tick): Check no watchdog. + + * kdb/kdbmain.c (kdb): Set no watchdog in normal kdb code. + + * kdb/kdbmain.c (kdb_parse): Allow watchdog in commands. + + * kdb/kdb_io.c (kdb_printf): No watchdog during printing. Clean up lock handling. + + * kdb/kdbmain.c (kdb_set): Clean up debug flag handling. + +2000-09-19 Juan J. Quintela + + * kdb/arch/i386/kdb/kdba_io.c: Allow kdb to compile without CONFIG_VT and/or + serial console. + +2000-09-19 Keith Owens + + * include/linux/kdb.h: Define KDB_DEBUG_STATE(). + + * kdb/kdbmain.c (kdb): Add kdb_print_state(), calls to KDB_DEBUG_STATE(). + +2000-09-16 Keith Owens + + * Move to finer grained control over individual processors in kdb with + per cpu kdb state. Needed to allow ss[b] to only release one processor, + previously ss[b] released all processors. Also need to recover from + errors inside kdb commands, e.g. oops in kdbm_pg code. + + * various: + Move global flags KDB_FLAG_SSB, KDB_FLAG_SUPRESS, KDB_FLAG_FAULT, + KDB_FLAG_SS, KDB_FLAG_SSBPT, kdb_active, to per cpu state and macros + KDB_STATE(xxx). + Replace kdb_flags & KDB_FLAG_xxx with KDB_FLAG(xxx). + Replace kdb_flags & KDB_DEBUG_xxx with KDB_DEBUG(xxx). + Replace specific tests with wrapper KDB_IS_RUNNING(). + + * various: Remove #ifdef CONFIG_SMP from kdb code wherever + possible. Simplifies the code and makes it much more readable. + + * arch/i386/kdb/kdbasupport.c (kdb_setjmp): Record if we have reliable + longjmp data instead of assuming it is always set. + + * various: Replace smp_kdb_wait with per cpu state, HOLD_CPU. + + * init/main.c : Replace #ifdef KDB_DEBUG with KDB_DEBUG(CALLBACK). + + * include/linux/kdbprivate.h: Separate command return codes from error + codes. Add more detailed command codes. + + * arch/i386/kernel/traps.c (die): Change spin_lock_irq to + spin_lock_irqsave. Why did I do this? + + * kdb/kdbmain.c (kdb_parse): Set per cpu flag CMD before executing kdb + command. More detailed return codes for commands that affect + processors. + + * kdb/kdbmain.c (kdb_previous_event): New, check if any processors are + still executing the previous kdb event. Removes a race window where a + second event could enter kdb before the first had completely ended. + + * kdb/kdbmain.c (kdb): Document all the concurrency conditions and how + kdb handles them. ss[b] now releases only the current cpu. Do not set + breakpoints when releasing for ss[b]. Recover from errors in kdb + commands. Check that we have reliable longjmp data before using it. + + * various: Update return code documentation. + + * kdb/kdb_bp.c (kdb_ss): Separate ss and ssb return codes. + + * kdb/kdbsupport.c (kdb_ipi): Finer grained algorithm for deciding + whether to call send a stop signal to a cpu. + + * arch/i386/kdb/kdba_bp.c (kdba_db_trap): Separate ss and ssb return + codes. Reinstall delayed software breakpoints per cpu instead of + globally. Changed algorithm for handling ss[b]. + + * arch/i386/kdb/kdba_bp.c (kdba_bp_trap): Match software breakpoints per + cpu instead of globally. + + * include/linux/kdb.h: Bump version to kdb v1.5. + +2000-09-16 Keith Owens + + * kernel/sysctl.c (kern_table): add /proc/sys/kernel/kdb. + + * init/main.c (parse_options): add boot flags kdb=on, kdb=off, + kdb=early. + + * include/linux/sysctl.h (enum): add KERN_KDB. + + * drivers/char/serial.c (receive_chars): check kdb_on. + + * drivers/char/keyboard.c (handle_scancode): check kdb_on. + + * arch/i386/kernel/traps.c (nmi_watchdog_tick): check kdb_on. + + * arch/i386/config.in: add CONFIG_KDB_OFF. + + * Documentation/Configure.help: add CONFIG_KDB_OFF. + + * kdb/kdbmain.c: add kdb_initial_cpu, kdb_on. + + * kdb/kdbmain.c (kdb): check kdb_on, set kdb_initial_cpu. + + * kdb/kdbmain.c (kdb_init): add Keith Owens to kdb banner. + + * kdb/kdb_io.c (kdb_printf): serialize kdb_printf output. + + * kdb/kdb_bt.c (kdb_bt): check environment variable BTAPROMPT. + + * kdb/kdbsupport.c (kdb_ipi): ignore NMI for kdb_initial_cpu. + + * kdb/modules/kdbm_pg.c (kdbm_page): merge updates from 2.4.0-test5-xfs. + + * kdb/kdb_bt.man: add btp, bta, BTAPROMPT. + + * kdb/kdb.mm: add CONFIG_KDB_OFF, boot flags, btp, bta. + + * include/linux/kdbprivate.h: add kdb_initial_cpu. + + * include/linux/kdb.h: add kdb_on, bump version to kdb v1.4. diff -urN linux-2.4.17-rc2-virgin/kdb/Makefile linux-2.4.17-rc2-wli2/kdb/Makefile --- linux-2.4.17-rc2-virgin/kdb/Makefile Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/Makefile Tue Dec 18 22:21:49 2001 @@ -0,0 +1,20 @@ +O_TARGET := kdb.o +export-objs := kdbmain.o kdb_io.o +obj-y := kdb_bt.o kdb_bp.o kdb_id.o kdbsupport.o gen-kdb_cmds.o kdbmain.o kdb_io.o + +subdir-$(CONFIG_KDB_MODULES) := modules +obj-y += $(addsuffix /vmlinux-obj.o, $(subdir-y)) + +override CFLAGS := $(CFLAGS:%-pg=% ) + +EXTRA_CFLAGS += -I $(TOPDIR)/arch/$(ARCH)/kdb + +include $(TOPDIR)/Rules.make + +gen-kdb_cmds.c: kdb_cmds Makefile + $(AWK) 'BEGIN {print "#include "} \ + /^ *#/{next} \ + /^[ \t]*$$/{next} \ + {print "static __initdata char kdb_cmd" cmds++ "[] = \"" $$0 "\\n\";"} \ + END {print "char __initdata *kdb_cmds[] = {"; for (i = 0; i < cmds; ++i) {print " kdb_cmd" i ","}; print(" 0\n};");}' \ + kdb_cmds > gen-kdb_cmds.c diff -urN linux-2.4.17-rc2-virgin/kdb/kdb_bp.c linux-2.4.17-rc2-wli2/kdb/kdb_bp.c --- linux-2.4.17-rc2-virgin/kdb/kdb_bp.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/kdb_bp.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,622 @@ +/* + * Kernel Debugger Breakpoint Handler + * + * Copyright 1999, Silicon Graphics, Inc. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Keith Owens 2000/05/23 + * KDB v1.2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Table of kdb_breakpoints + */ +kdb_bp_t kdb_breakpoints[KDB_MAXBPT]; + +/* + * kdb_bp_install_global + * + * Install global kdb_breakpoints prior to returning from the + * kernel debugger. This allows the kdb_breakpoints to be set + * upon functions that are used internally by kdb, such as + * printk(). + * + * Parameters: + * ef Execution frame. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * + * This function is only called once per kdb session. + */ + +void +kdb_bp_install_global(kdb_eframe_t ef) +{ + int i; + + for(i=0; ibp_forcehw) { + kdb_printf("Forced "); + } + + if (!bp->bp_template.bph_free) { + kdb_printf("%s ", kdba_bptype(&bp->bp_template)); + } else { + kdb_printf("Instruction(i) "); + } + + kdb_printf("BP #%d at ", i); + kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT); + + if (bp->bp_enabled) { + kdba_printbp(bp); + if (bp->bp_global) + kdb_printf(" globally"); + else + kdb_printf(" on cpu %d", bp->bp_cpu); + if (bp->bp_adjust) + kdb_printf(" adjust %d", bp->bp_adjust); + } else { + kdb_printf("\n is disabled"); + } + + kdb_printf("\n"); +} + +/* + * kdb_bp + * + * Handle the bp, and bpa commands. + * + * [bp|bpa|bph] [DATAR|DATAW|IO [length]] + * + * Parameters: + * argc Count of arguments in argv + * argv Space delimited command line arguments + * envp Environment value + * regs Exception frame at entry to kernel debugger + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic if failure. + * Locking: + * None. + * Remarks: + * + * bp Set breakpoint. Only use hardware assist if necessary. + * bpa Set breakpoint on all cpus, only use hardware regs if necessary + * bph Set breakpoint - force hardware register + * bpha Set breakpoint on all cpus, force hardware register + */ + +int +kdb_bp(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int i; + kdb_bp_t *bp; + int diag; + int free, same; + kdb_machreg_t addr; + char *symname = NULL; + long offset = 0ul; + int nextarg; + int hardware; + int global; + + if (argc == 0) { + /* + * Display breakpoint table + */ + for(i=0,bp=kdb_breakpoints; ibp_free) continue; + + kdb_printbp(bp, i); + } + + return 0; + } + + global = ((strcmp(argv[0], "bpa") == 0) + || (strcmp(argv[0], "bpha") == 0)); + hardware = ((strcmp(argv[0], "bph") == 0) + || (strcmp(argv[0], "bpha") == 0)); + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, &symname, regs); + if (diag) + return diag; + + /* + * Allocate a new bp structure + */ + free = same = KDB_MAXBPT; + for(i=0,bp=kdb_breakpoints; ibp_free) { + break; + } + } + + if (i == KDB_MAXBPT) + return KDB_TOOMANYBPT; + + kdba_check_pc(&addr); + bp->bp_addr = addr; + bp->bp_adjust = 0; + + bp->bp_forcehw = hardware; + if (KDB_DEBUG(BP)) + kdb_printf("kdb_bp: forcehw is %d hardware is %d\n", bp->bp_forcehw, hardware); + + /* + * Handle architecture dependent parsing + */ + diag = kdba_parsebp(argc, argv, &nextarg, bp); + if (diag) { + return diag; + } + + bp->bp_enabled = 1; + bp->bp_free = 0; + bp->bp_global = 1; /* Most breakpoints are global */ + + if (hardware && !global) { + bp->bp_global = 0; + bp->bp_cpu = smp_processor_id(); + } + + /* + * Allocate a hardware breakpoint. If one is not available, + * disable the breakpoint, but leave it in the breakpoint + * table. When the breakpoint is re-enabled (via 'be'), we'll + * attempt to allocate a hardware register for it. + */ + if (!bp->bp_template.bph_free) { + bp->bp_hard = kdba_allocbp(&bp->bp_template, &diag); + if (diag) { + bp->bp_enabled = 0; + return diag; + } + bp->bp_hardtype = 1; + } + + kdb_printbp(bp, i); + + return 0; +} + +/* + * kdb_bc + * + * Handles the 'bc', 'be', and 'bd' commands + * + * [bd|bc|be] + * + * Parameters: + * argc Count of arguments in argv + * argv Space delimited command line arguments + * envp Environment value + * regs Exception frame at entry to kernel debugger + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic for failure + * Locking: + * None. + * Remarks: + */ + +#define KDBCMD_BC 0 +#define KDBCMD_BE 1 +#define KDBCMD_BD 2 + +int +kdb_bc(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + kdb_machreg_t addr; + kdb_bp_t *bp = 0; + int lowbp = KDB_MAXBPT; + int highbp = 0; + int done = 0; + int i; + int diag; + int cmd; /* KDBCMD_B? */ + + if (strcmp(argv[0], "be") == 0) { + cmd = KDBCMD_BE; + } else if (strcmp(argv[0], "bd") == 0) { + cmd = KDBCMD_BD; + } else + cmd = KDBCMD_BC; + + if (argc != 1) + return KDB_ARGCOUNT; + + if (strcmp(argv[1], "*") == 0) { + lowbp = 0; + highbp = KDB_MAXBPT; + } else { + diag = kdbgetularg(argv[1], &addr); + if (diag) + return diag; + + /* + * For addresses less than the maximum breakpoint number, + * assume that the breakpoint number is desired. + */ + if (addr < KDB_MAXBPT) { + bp = &kdb_breakpoints[addr]; + lowbp = highbp = addr; + highbp++; + } else { + for(i=0, bp=kdb_breakpoints; ibp_addr == addr) { + lowbp = highbp = i; + highbp++; + break; + } + } + } + } + + /* + * Now operate on the set of breakpoints matching the input + * criteria (either '*' for all, or an individual breakpoint). + */ + for(bp=&kdb_breakpoints[lowbp], i=lowbp; + i < highbp; + i++, bp++) { + if (bp->bp_free) + continue; + + done++; + + switch (cmd) { + case KDBCMD_BC: + if (bp->bp_hardtype) { + kdba_freebp(bp->bp_hard); + bp->bp_hard = 0; + bp->bp_hardtype = 0; + } + + bp->bp_enabled = 0; + bp->bp_global = 0; + + kdb_printf("Breakpoint %d at " kdb_bfd_vma_fmt " cleared\n", + i, bp->bp_addr); + + bp->bp_addr = 0; + bp->bp_free = 1; + + break; + case KDBCMD_BE: + /* + * Allocate a hardware breakpoint. If one is not + * available, don't enable the breakpoint. + */ + if (!bp->bp_template.bph_free + && !bp->bp_hardtype) { + bp->bp_hard = kdba_allocbp(&bp->bp_template, &diag); + if (diag) { + bp->bp_enabled = 0; + return diag; + } + bp->bp_hardtype = 1; + } + + bp->bp_enabled = 1; + + kdb_printf("Breakpoint %d at " kdb_bfd_vma_fmt " in enabled", + i, bp->bp_addr); + + kdb_printf("\n"); + break; + case KDBCMD_BD: + if (!bp->bp_enabled) { + return 0; + } + + /* + * Since this breakpoint is now disabled, we can + * give up the hardware register which is allocated + * to it. + */ + if (bp->bp_hardtype) { + kdba_freebp(bp->bp_hard); + bp->bp_hard = 0; + bp->bp_hardtype = 0; + } + + bp->bp_enabled = 0; + + kdb_printf("Breakpoint %d at " kdb_bfd_vma_fmt " disabled\n", + i, bp->bp_addr); + + break; + } + } + + return (!done)?KDB_BPTNOTFOUND:0; +} + +/* + * kdb_ss + * + * Process the 'ss' (Single Step) and 'ssb' (Single Step to Branch) + * commands. + * + * ss [] + * ssb + * + * Parameters: + * argc Argument count + * argv Argument vector + * envp Environment vector + * regs Registers at time of entry to kernel debugger + * Outputs: + * None. + * Returns: + * KDB_CMD_SS[B] for success, a kdb error if failure. + * Locking: + * None. + * Remarks: + * + * Set the arch specific option to trigger a debug trap after the next + * instruction. + * + * For 'ssb', set the trace flag in the debug trap handler + * after printing the current insn and return directly without + * invoking the kdb command processor, until a branch instruction + * is encountered or SSCOUNT lines are printed. + */ + +int +kdb_ss(int argc, const char **argv, const char **envp, kdb_eframe_t ef) +{ + int ssb = 0; + + ssb = (strcmp(argv[0], "ssb") == 0); + if ((ssb && (argc != 0)) + || (!ssb && (argc > 1))) { + return KDB_ARGCOUNT; + } + +#if 0 + /* + * Fetch provided count + */ + diag = kdbgetularg(argv[1], &sscount); + if (diag) + return diag; +#endif + + /* + * Set trace flag and go. + */ + KDB_STATE_SET(DOING_SS); + if (ssb) + KDB_STATE_SET(DOING_SSB); + + kdba_setsinglestep(ef); /* Enable single step */ + + if (ssb) + return KDB_CMD_SSB; + return KDB_CMD_SS; +} + +/* + * kdb_initbptab + * + * Initialize the breakpoint table. Register breakpoint commands. + * + * Parameters: + * None. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + */ + +void __init +kdb_initbptab(void) +{ + int i; + kdb_bp_t *bp; + + /* + * First time initialization. + */ + memset(&kdb_breakpoints, '\0', sizeof(kdb_breakpoints)); + + for (i=0, bp=kdb_breakpoints; ibp_free = 1; + /* + * The bph_free flag is architecturally required. It + * is set by architecture-dependent code to false (zero) + * in the event a hardware breakpoint register is required + * for this breakpoint. + * + * The rest of the template is reserved to the architecture + * dependent code and _must_ not be touched by the architecture + * independent code. + */ + bp->bp_template.bph_free = 1; + } + + kdb_register("bp", kdb_bp, "[]", "Set/Display breakpoints", 0); + kdb_register("bl", kdb_bp, "[]", "Display breakpoints", 0); + kdb_register("bpa", kdb_bp, "[]", "Set/Display global breakpoints", 0); + kdb_register("bph", kdb_bp, "[]", "Set hardware breakpoint", 0); + kdb_register("bpha", kdb_bp, "[]", "Set global hardware breakpoint", 0); + kdb_register("bc", kdb_bc, "", "Clear Breakpoint", 0); + kdb_register("be", kdb_bc, "", "Enable Breakpoint", 0); + kdb_register("bd", kdb_bc, "", "Disable Breakpoint", 0); + + kdb_register("ss", kdb_ss, "[<#steps>]", "Single Step", 1); + kdb_register("ssb", kdb_ss, "", "Single step to branch/call", 0); + /* + * Architecture dependent initialization. + */ + kdba_initbp(); +} + diff -urN linux-2.4.17-rc2-virgin/kdb/kdb_bt.c linux-2.4.17-rc2-wli2/kdb/kdb_bt.c --- linux-2.4.17-rc2-virgin/kdb/kdb_bt.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/kdb_bt.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,138 @@ +/* + * Minimalist Kernel Debugger - Architecture independent stack traceback + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Srinivasa Thirumalachar + * RSE support for ia64 + * Masahiro Adegawa 1999/12/01 + * 'sr' command, active flag in 'ps' + * Scott Lurndal 1999/12/12 + * Significantly restructure for linux2.3 + * Keith Owens 2000/05/23 + * KDB v1.2 + * Keith Owens 2000/09/16 + * KDB v1.4 + * Env BTAPROMPT. + * + */ + +#include +#include +#include +#include +#include +#include +#include + + +/* + * kdb_bt + * + * This function implements the 'bt' command. Print a stack + * traceback. + * + * bt [] (addr-exp is for alternate stacks) + * btp (Kernel stack for ) + * + * address expression refers to a return address on the stack. It + * is expected to be preceeded by a frame pointer. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * ef registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * Backtrack works best when the code uses frame pointers. But + * even without frame pointers we should get a reasonable trace. + * + * mds comes in handy when examining the stack to do a manual + * traceback. + */ + +int +kdb_bt(int argc, const char **argv, const char **envp, kdb_eframe_t ef) +{ + int diag; + int argcount = 5; + int btaprompt = 1; + char buffer[80]; + int nextarg; + unsigned long addr; + long offset; + + kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ + kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each proc in bta */ + + if (strcmp(argv[0], "bta") == 0) { + struct task_struct *p; + + for_each_task(p) { + kdb_printf("Stack traceback for pid %d\n", p->pid); + + diag = kdba_bt_process(p, argcount); + + if (btaprompt) { + kdb_getstr(buffer, sizeof(buffer), + "Enter to end, to continue:"); + + if (buffer[0] == 'q') { + return 0; + } + } + } + } else if (strcmp(argv[0], "btp") == 0) { + struct task_struct *p; + int pid; + + if (argc < 1) + return KDB_ARGCOUNT; + + diag = kdbgetularg((char *)argv[1], (unsigned long*)&pid); + if (diag) + return diag; + + for_each_task(p) { + if (p->pid == pid) { + return kdba_bt_process(p, argcount); + } + } + + kdb_printf("No process with pid == %d found\n", pid); + return 0; + } else { + if (argc) { + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, + &offset, NULL, ef); + if (diag) + return diag; + + return kdba_bt_stack(ef, &addr, argcount, current); + } else { + return kdba_bt_stack(ef, NULL, argcount, current); + } + } + + /* NOTREACHED */ + return 0; +} diff -urN linux-2.4.17-rc2-virgin/kdb/kdb_cmds linux-2.4.17-rc2-wli2/kdb/kdb_cmds --- linux-2.4.17-rc2-virgin/kdb/kdb_cmds Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/kdb_cmds Tue Dec 18 22:21:49 2001 @@ -0,0 +1,6 @@ +# Initial commands for kdb, alter to suit your needs. +# These commands are executed in kdb_init() context, no SMP, no +# processes. Commands that require process data (including stack or +# registers) are not reliable this early. set and bp commands should +# be safe. Global breakpoint commands affect each cpu as it is booted. + diff -urN linux-2.4.17-rc2-virgin/kdb/kdb_id.c linux-2.4.17-rc2-wli2/kdb/kdb_id.c --- linux-2.4.17-rc2-virgin/kdb/kdb_id.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/kdb_id.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,254 @@ +/* + * Minimalist Kernel Debugger - Architecture Independent Instruction Disassembly + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Srinivasa Thirumalachar + * RSE support for ia64 + * Masahiro Adegawa 1999/12/01 + * 'sr' command, active flag in 'ps' + * Scott Lurndal 1999/12/12 + * Significantly restructure for linux2.3 + * Keith Owens 2000/05/23 + * KDB v1.2 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +disassemble_info kdb_di; + +/* + * kdb_id + * + * Handle the id (instruction display) command. + * + * id [] + * + * Parameters: + * argc Count of arguments in argv + * argv Space delimited command line arguments + * envp Environment value + * regs Exception frame at entry to kernel debugger + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic if failure. + * Locking: + * None. + * Remarks: + */ + +int +kdb_id(int argc, const char **argv, const char **envp, struct pt_regs* regs) +{ + kdb_machreg_t pc; + int icount; + int diag; + int i; + char * mode; + int nextarg; + long offset = 0; + static kdb_machreg_t lastpc; + struct disassemble_info *dip = &kdb_di; + char lastbuf[50]; + + if (argc != 1) { + if (lastpc == 0) { + return KDB_ARGCOUNT; + } else { + sprintf(lastbuf, "0x%lx", lastpc); + argv[1] = lastbuf; + argc = 1; + } + } + + + /* + * Fetch PC. First, check to see if it is a symbol, if not, + * try address. + */ + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &pc, &offset, NULL, regs); + if (diag) + return diag; + kdba_check_pc(&pc); + + /* + * Number of lines to display + */ + diag = kdbgetintenv("IDCOUNT", &icount); + if (diag) + return diag; + + dip->fprintf_dummy = kdb_dis_fprintf; + + mode = kdbgetenv("IDMODE"); + diag = kdba_id_parsemode(mode, dip); + if (diag) { + return diag; + } + + for(i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_SPARC64 +#include +#else +static struct console *kdbcons; +#endif + +/* + * kdb_read + * + * This function reads a string of characters, terminated by + * a newline, or by reaching the end of the supplied buffer, + * from the current kernel debugger console device. + * Parameters: + * buffer - Address of character buffer to receive input characters. + * bufsize - size, in bytes, of the character buffer + * Returns: + * Returns a pointer to the buffer containing the received + * character string. This string will be terminated by a + * newline character. + * Locking: + * No locks are required to be held upon entry to this + * function. It is not reentrant - it relies on the fact + * that while kdb is running on any one processor all other + * processors will be spinning at the kdb barrier. + * Remarks: + * + * Davidm asks, why doesn't kdb use the console abstraction; + * here are some reasons: + * - you cannot debug the console abstraction with kdb if + * kdb uses it. + * - you rely on the correct functioning of the abstraction + * in the presence of general system failures. + * - You must acquire the console spinlock thus restricting + * the usability - what if the kernel fails with the spinlock + * held - one still wishes to debug such situations. + * - How about debugging before the console(s) are registered? + * - None of the current consoles (sercons, vt_console_driver) + * have read functions defined. + * - The standard pc keyboard and terminal drivers are interrupt + * driven. We cannot enable interrupts while kdb is active, + * so the standard input functions cannot be used by kdb. + * + * An implementation could be improved by removing the need for + * lock acquisition - just keep a 'struct console *kdbconsole;' global + * variable which refers to the preferred kdb console. + * + * The bulk of this function is architecture dependent. + */ + +char * +kdb_read(char *buffer, size_t bufsize) +{ + return(kdba_read(buffer, bufsize)); +} + +/* + * kdb_getstr + * + * Print the prompt string and read a command from the + * input device. + * + * Parameters: + * buffer Address of buffer to receive command + * bufsize Size of buffer in bytes + * prompt Pointer to string to use as prompt string + * Returns: + * Pointer to command buffer. + * Locking: + * None. + * Remarks: + * For SMP kernels, the processor number will be + * substituted for %d, %x or %o in the prompt. + */ + +char * +kdb_getstr(char *buffer, size_t bufsize, char *prompt) +{ +#if defined(CONFIG_SMP) + kdb_printf(prompt, smp_processor_id()); +#else + kdb_printf("%s", prompt); +#endif + kdb_nextline = 1; /* Prompt and input resets line number */ + return kdb_read(buffer, bufsize); +} + +/* + * kdb_printf + * + * Print a string to the output device(s). + * + * Parameters: + * printf-like format and optional args. + * Returns: + * 0 + * Locking: + * None. + * Remarks: + * use 'kdbcons->write()' to avoid polluting 'log_buf' with + * kdb output. + */ + +void +kdb_printf(const char *fmt, ...) +{ + char buffer[256]; + va_list ap; + int diag; + int linecount; + int logging, saved_loglevel = 0; + int do_longjmp = 0; + struct console *c = console_drivers; + static spinlock_t kdb_printf_lock = SPIN_LOCK_UNLOCKED; + + /* Serialize kdb_printf if multiple cpus try to write at once. + * But if any cpu goes recursive in kdb, just print the output, + * even if it is interleaved with any other text. + */ + if (!KDB_STATE(PRINTF_LOCK)) { + KDB_STATE_SET(PRINTF_LOCK); + spin_lock(&kdb_printf_lock); + } + + diag = kdbgetintenv("LINES", &linecount); + if (diag) + linecount = 22; + + diag = kdbgetintenv("LOGGING", &logging); + if (diag) + logging = 0; + + va_start(ap, fmt); + vsprintf(buffer, fmt, ap); + va_end(ap); + + /* + * Write to all consoles. + */ +#ifdef CONFIG_SPARC64 + if (c == NULL) + prom_printf("%s", buffer); + else +#endif + while (c) { + c->write(c, buffer, strlen(buffer)); + c = c->next; + } + if (logging) { + acquire_console_sem(); + saved_loglevel = console_loglevel; + console_loglevel = 0; + release_console_sem(); + printk("%s", buffer); + } + + if (strchr(buffer, '\n') != NULL) { + kdb_nextline++; + } + + if (kdb_nextline == linecount) { +#ifdef KDB_HAVE_LONGJMP + char buf1[16]; +#if defined(CONFIG_SMP) + char buf2[32]; +#endif + char *moreprompt; + + /* Watch out for recursion here. Any routine that calls + * kdb_printf will come back through here. And kdb_read + * uses kdb_printf to echo on serial consoles ... + */ + kdb_nextline = 1; /* In case of recursion */ + + /* + * Pause until cr. + */ + moreprompt = kdbgetenv("MOREPROMPT"); + if (moreprompt == NULL) { + moreprompt = "more> "; + } + +#if defined(CONFIG_SMP) + if (strchr(moreprompt, '%')) { + sprintf(buf2, moreprompt, smp_processor_id()); + moreprompt = buf2; + } +#endif + + c = console_drivers; +#ifdef CONFIG_SPARC64 + if (c == NULL) + prom_printf("%s", moreprompt); + else +#endif + while (c) { + c->write(c, moreprompt, strlen(moreprompt)); + c = c->next; + } + if (logging) + printk("%s", moreprompt); + + kdb_read(buf1, sizeof(buf1)); + kdb_nextline = 1; /* Really set output line 1 */ + + if ((buf1[0] == 'q') || (buf1[0] == 'Q')) + do_longjmp = 1; + else if (buf1[0] && buf1[0] != '\n') + kdb_printf("Only 'q' or 'Q' are processed at more prompt, input ignored\n"); +#endif /* KDB_HAVE_LONGJMP */ + } + + if (logging) { + acquire_console_sem(); + console_loglevel = saved_loglevel; + release_console_sem(); + } + if (KDB_STATE(PRINTF_LOCK)) { + spin_unlock(&kdb_printf_lock); + KDB_STATE_CLEAR(PRINTF_LOCK); + } + if (do_longjmp) +#ifdef KDB_HAVE_LONGJMP + kdba_longjmp(&kdbjmpbuf[smp_processor_id()], 1); +#else + ; +#endif /* KDB_HAVE_LONGJMP */ +} + +/* + * kdb_io_init + * + * Initialize kernel debugger output environment. + * + * Parameters: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * Select a console device. + */ + +void __init +kdb_io_init(void) +{ +#ifndef CONFIG_SPARC64 /* we don't register serial consoles in time */ + /* + * Select a console. + */ + struct console *c = console_drivers; + + while (c) { + if ((c->flags & CON_CONSDEV)) { + kdbcons = c; + break; + } + c = c->next; + } + + if (kdbcons == NULL) { + long long i; + + printk("kdb: Initialization failed - no console\n"); + while (1) i++; + } +#endif + return; +} + +EXPORT_SYMBOL(kdb_read); diff -urN linux-2.4.17-rc2-virgin/kdb/kdbmain.c linux-2.4.17-rc2-wli2/kdb/kdbmain.c --- linux-2.4.17-rc2-virgin/kdb/kdbmain.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/kdbmain.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,2812 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * Copyright (C) 2000 Stephane Eranian + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Srinivasa Thirumalachar + * RSE support for ia64 + * Masahiro Adegawa 1999/12/01 + * 'sr' command, active flag in 'ps' + * Scott Lurndal 1999/12/12 + * Significantly restructure for linux2.3 + * Keith Owens 2000/05/23 + * KDB v1.2 + * Keith Owens 2000/06/09 + * KDB v1.3. + * Rewrite SMP handling. + * Add NMI watchdog from Ted Kline, + * lsmod/rmmod commands from Marc Esipovich + * Stephane Eranian 2000/06/05 + * Enabled disassembler support. Added command history support. + * + * Keith Owens 2000/09/16 + * KDB v1.4 + * kdb=on/off/early at boot, /proc/sys/kernel/kdb. + * Env BTAPROMPT. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if defined(CONFIG_MODULES) +extern struct module *module_list; +#endif + + /* + * Kernel debugger state flags + */ +volatile int kdb_flags; + + /* + * kdb_lock protects updates to kdb_initial_cpu. Used to + * single thread processors through the kernel debugger. + */ +spinlock_t kdb_lock = SPIN_LOCK_UNLOCKED; +volatile int kdb_initial_cpu = -1; /* cpu number that owns kdb */ + +volatile int kdb_nextline = 1; +static volatile int kdb_new_cpu; /* Which cpu to switch to */ + +volatile int kdb_state[NR_CPUS]; /* Per cpu state */ + +#ifdef CONFIG_KDB_OFF +int kdb_on = 0; /* Default is off */ +#else +int kdb_on = 1; /* Default is on */ +#endif /* CONFIG_KDB_OFF */ + +#ifdef KDB_HAVE_LONGJMP + /* + * Must have a setjmp buffer per CPU. Switching cpus will + * cause the jump buffer to be setup for the new cpu, and + * subsequent switches (and pager aborts) will use the + * appropriate per-processor values. + */ +kdb_jmp_buf kdbjmpbuf[NR_CPUS]; +#endif /* KDB_HAVE_LONGJMP */ + + /* + * kdb_commands describes the available commands. + */ +static kdbtab_t kdb_commands[KDB_MAX_COMMANDS]; + +typedef struct _kdbmsg { + int km_diag; /* kdb diagnostic */ + char *km_msg; /* Corresponding message text */ +} kdbmsg_t; + +#define KDBMSG(msgnum, text) \ + { KDB_##msgnum, text } + +static kdbmsg_t kdbmsgs[] = { + KDBMSG(NOTFOUND,"Command Not Found"), + KDBMSG(ARGCOUNT, "Improper argument count, see usage."), + KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, 8 is only allowed on 64 bit systems"), + KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"), + KDBMSG(NOTENV, "Cannot find environment variable"), + KDBMSG(NOENVVALUE, "Environment variable should have value"), + KDBMSG(NOTIMP, "Command not implemented"), + KDBMSG(ENVFULL, "Environment full"), + KDBMSG(ENVBUFFULL, "Environment buffer full"), + KDBMSG(TOOMANYBPT, "Too many breakpoints defined"), + KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"), + KDBMSG(DUPBPT, "Duplicate breakpoint address"), + KDBMSG(BPTNOTFOUND, "Breakpoint not found"), + KDBMSG(BADMODE, "Invalid IDMODE"), + KDBMSG(BADINT, "Illegal numeric value"), + KDBMSG(INVADDRFMT, "Invalid symbolic address format"), + KDBMSG(BADREG, "Invalid register name"), + KDBMSG(BADCPUNUM, "Invalid cpu number"), + KDBMSG(BADLENGTH, "Invalid length field"), + KDBMSG(NOBP, "No Breakpoint exists"), +}; +#undef KDBMSG + +static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t); + + +/* + * Initial environment. This is all kept static and local to + * this file. We don't want to rely on the memory allocation + * mechanisms in the kernel, so we use a very limited allocate-only + * heap for new and altered environment variables. The entire + * environment is limited to a fixed number of entries (add more + * to __env[] if required) and a fixed amount of heap (add more to + * KDB_ENVBUFSIZE if required). + */ + +static char *__env[] = { +#if defined(CONFIG_SMP) + "PROMPT=[%d]kdb> ", + "MOREPROMPT=[%d]more> ", +#else + "PROMPT=kdb> ", + "MOREPROMPT=more> ", +#endif + "RADIX=16", + "LINES=24", + "COLUMNS=80", + "MDCOUNT=8", /* lines of md output */ + "BTARGS=5", /* 5 possible args in bt */ + "SSCOUNT=20", /* lines of ssb output */ + KDB_PLATFORM_ENV, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, + (char *)0, +}; + +static const int __nenv = (sizeof(__env) / sizeof(char *)); + +/* + * kdbgetenv + * + * This function will return the character string value of + * an environment variable. + * + * Parameters: + * match A character string representing an environment variable. + * Outputs: + * None. + * Returns: + * NULL No environment variable matches 'match' + * char* Pointer to string value of environment variable. + * Locking: + * No locking considerations required. + * Remarks: + */ +char * +kdbgetenv(const char *match) +{ + char **ep = __env; + int matchlen = strlen(match); + int i; + + for(i=0; i<__nenv; i++) { + char *e = *ep++; + + if (!e) continue; + + if ((strncmp(match, e, matchlen) == 0) + && ((e[matchlen] == '\0') + ||(e[matchlen] == '='))) { + char *cp = strchr(e, '='); + return (cp)?++cp:""; + } + } + return (char *)0; +} + +/* + * kdballocenv + * + * This function is used to allocate bytes for environment entries. + * + * Parameters: + * match A character string representing a numeric value + * Outputs: + * *value the unsigned long represntation of the env variable 'match' + * Returns: + * Zero on success, a kdb diagnostic on failure. + * Locking: + * No locking considerations required. Must be called with all + * processors halted. + * Remarks: + * We use a static environment buffer (envbuffer) to hold the values + * of dynamically generated environment variables (see kdb_set). Buffer + * space once allocated is never free'd, so over time, the amount of space + * (currently 512 bytes) will be exhausted if env variables are changed + * frequently. + */ +static char * +kdballocenv(size_t bytes) +{ +#define KDB_ENVBUFSIZE 512 + static char envbuffer[KDB_ENVBUFSIZE]; + static int envbufsize; + char *ep = (char *)0; + + if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) { + ep = &envbuffer[envbufsize]; + envbufsize += bytes; + } + return ep; +} + +/* + * kdbgetulenv + * + * This function will return the value of an unsigned long-valued + * environment variable. + * + * Parameters: + * match A character string representing a numeric value + * Outputs: + * *value the unsigned long represntation of the env variable 'match' + * Returns: + * Zero on success, a kdb diagnostic on failure. + * Locking: + * No locking considerations required. + * Remarks: + */ + +int +kdbgetulenv(const char *match, unsigned long *value) +{ + char *ep; + + ep = kdbgetenv(match); + if (!ep) return KDB_NOTENV; + if (strlen(ep) == 0) return KDB_NOENVVALUE; + + *value = simple_strtoul(ep, 0, 0); + + return 0; +} + +/* + * kdbgetintenv + * + * This function will return the value of an integer-valued + * environment variable. + * + * Parameters: + * match A character string representing an integer-valued env variable + * Outputs: + * *value the integer representation of the environment variable 'match' + * Returns: + * Zero on success, a kdb diagnostic on failure. + * Locking: + * No locking considerations required. + * Remarks: + */ + +int +kdbgetintenv(const char *match, int *value) { + unsigned long val; + int diag; + + diag = kdbgetulenv(match, &val); + if (!diag) { + *value = (int) val; + } + return diag; +} + +/* + * kdbgetularg + * + * This function will convert a numeric string + * into an unsigned long value. + * + * Parameters: + * arg A character string representing a numeric value + * Outputs: + * *value the unsigned long represntation of arg. + * Returns: + * Zero on success, a kdb diagnostic on failure. + * Locking: + * No locking considerations required. + * Remarks: + */ + +int +kdbgetularg(const char *arg, unsigned long *value) +{ + char *endp; + unsigned long val; + + val = simple_strtoul(arg, &endp, 0); + + if (endp == arg) { + /* + * Try base 16, for us folks too lazy to type the + * leading 0x... + */ + val = simple_strtoul(arg, &endp, 16); + if (endp == arg) + return KDB_BADINT; + } + + *value = val; + + return 0; +} + +/* + * kdbgetaddrarg + * + * This function is responsible for parsing an + * address-expression and returning the value of + * the expression, symbol name, and offset to the caller. + * + * The argument may consist of a numeric value (decimal or + * hexidecimal), a symbol name, a register name (preceeded + * by the percent sign), an environment variable with a numeric + * value (preceeded by a dollar sign) or a simple arithmetic + * expression consisting of a symbol name, +/-, and a numeric + * constant value (offset). + * + * Parameters: + * argc - count of arguments in argv + * argv - argument vector + * *nextarg - index to next unparsed argument in argv[] + * regs - Register state at time of KDB entry + * Outputs: + * *value - receives the value of the address-expression + * *offset - receives the offset specified, if any + * *name - receives the symbol name, if any + * *nextarg - index to next unparsed argument in argv[] + * + * Returns: + * zero is returned on success, a kdb diagnostic code is + * returned on error. + * + * Locking: + * No locking requirements. + * + * Remarks: + * + */ + +int +kdbgetaddrarg(int argc, const char **argv, int *nextarg, + kdb_machreg_t *value, long *offset, + char **name, kdb_eframe_t ef) +{ + kdb_machreg_t addr; + long off = 0; + int positive; + int diag; + int found = 0; + char *symname; + char symbol = '\0'; + char *cp; + kdb_symtab_t symtab; + + /* + * Process arguments which follow the following syntax: + * + * symbol | numeric-address [+/- numeric-offset] + * %register + * $environment-variable + */ + + if (*nextarg > argc) { + return KDB_ARGCOUNT; + } + + symname = (char *)argv[*nextarg]; + + /* + * If there is no whitespace between the symbol + * or address and the '+' or '-' symbols, we + * remember the character and replace it with a + * null so the symbol/value can be properly parsed + */ + if ((cp = strpbrk(symname, "+-")) != NULL) { + symbol = *cp; + *cp++ = '\0'; + } + + if (symname[0] == '$') { + diag = kdbgetulenv(&symname[1], &addr); + if (diag) + return diag; + } else if (symname[0] == '%') { + diag = kdba_getregcontents(&symname[1], ef, &addr); + if (diag) + return diag; + } else { + found = kdbgetsymval(symname, &symtab); + if (found) { + addr = symtab.sym_start; + } else { + diag = kdbgetularg(argv[*nextarg], &addr); + if (diag) + return diag; + } + } + + if (!found) + found = kdbnearsym(addr, &symtab); + + (*nextarg)++; + + if (name) + *name = symname; + if (value) + *value = addr; + if (offset && name && *name) + *offset = addr - symtab.sym_start; + + if ((*nextarg > argc) + && (symbol == '\0')) + return 0; + + /* + * check for +/- and offset + */ + + if (symbol == '\0') { + if ((argv[*nextarg][0] != '+') + && (argv[*nextarg][0] != '-')) { + /* + * Not our argument. Return. + */ + return 0; + } else { + positive = (argv[*nextarg][0] == '+'); + (*nextarg)++; + } + } else + positive = (symbol == '+'); + + /* + * Now there must be an offset! + */ + if ((*nextarg > argc) + && (symbol == '\0')) { + return KDB_INVADDRFMT; + } + + if (!symbol) { + cp = (char *)argv[*nextarg]; + (*nextarg)++; + } + + diag = kdbgetularg(cp, &off); + if (diag) + return diag; + + if (!positive) + off = -off; + + if (offset) + *offset += off; + + if (value) + *value += off; + + return 0; +} + +static void +kdb_cmderror(int diag) +{ + int i; + + if (diag >= 0) { + kdb_printf("no error detected\n"); + return; + } + + for(i=0; i<__nkdb_err; i++) { + if (kdbmsgs[i].km_diag == diag) { + kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg); + return; + } + } + + kdb_printf("Unknown diag %d\n", -diag); +} + +/* The command history feature is not functional at the moment. It + * will be replaced by something that understands editting keys, + * including left, right, insert, delete as well as up, down. + * Keith Owens, November 18 2000 + */ +#define KDB_CMD_HISTORY_COUNT 32 +#define CMD_BUFLEN 200 /* kdb_printf: max printline size == 256 */ +static unsigned int cmd_head, cmd_tail; +static unsigned int cmdptr; +static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN]; + +/* + * kdb_parse + * + * Parse the command line, search the command table for a + * matching command and invoke the command function. + * + * Parameters: + * cmdstr The input command line to be parsed. + * regs The registers at the time kdb was entered. + * Outputs: + * None. + * Returns: + * Zero for success, a kdb diagnostic if failure. + * Locking: + * None. + * Remarks: + * Limited to 20 tokens. + * + * Real rudimentary tokenization. Basically only whitespace + * is considered a token delimeter (but special consideration + * is taken of the '=' sign as used by the 'set' command). + * + * The algorithm used to tokenize the input string relies on + * there being at least one whitespace (or otherwise useless) + * character between tokens as the character immediately following + * the token is altered in-place to a null-byte to terminate the + * token string. + */ + +#define MAXARGC 20 + +static int +kdb_parse(char *cmdstr, kdb_eframe_t ef) +{ + static char *argv[MAXARGC]; + static int argc = 0; + static char cbuf[CMD_BUFLEN]; + char *cp, *cpp; + kdbtab_t *tp; + int i; + + /* + * First tokenize the command string. + */ + cp = cmdstr; + + if (*cp != '\n' && *cp != '\0') { + argc = 0; + cpp = cbuf; + while (*cp) { + /* skip whitespace */ + while (isspace(*cp)) cp++; + if ((*cp == '\0') || (*cp == '\n')) + break; + argv[argc++] = cpp; + /* Copy to next whitespace or '=' */ + while (*cp && !isspace(*cp)) { + if ((*cpp = *cp++) == '=') + break; + ++cpp; + } + *cpp++ = '\0'; /* Squash a ws or '=' character */ + } + } + if (!argc) + return 0; + + for(tp=kdb_commands, i=0; i < KDB_MAX_COMMANDS; i++,tp++) { + if (tp->cmd_name) { + /* + * If this command is allowed to be abbreviated, + * check to see if this is it. + */ + + if (tp->cmd_minlen + && (strlen(argv[0]) <= tp->cmd_minlen)) { + if (strncmp(argv[0], + tp->cmd_name, + tp->cmd_minlen) == 0) { + break; + } + } + + if (strcmp(argv[0], tp->cmd_name)==0) { + break; + } + } + } + + /* + * If we don't find a command by this name, see if the first + * few characters of this match any of the known commands. + * e.g., md1c20 should match md. + */ + if (i == KDB_MAX_COMMANDS) { + for(tp=kdb_commands, i=0; i < KDB_MAX_COMMANDS; i++,tp++) { + if (tp->cmd_name) { + if (strncmp(argv[0], + tp->cmd_name, + strlen(tp->cmd_name))==0) { + break; + } + } + } + } + + if (i < KDB_MAX_COMMANDS) { + int result; + KDB_STATE_SET(CMD); + result = (*tp->cmd_func)(argc-1, + (const char**)argv, + (const char**)__env, + ef); + KDB_STATE_CLEAR(CMD); + return result; + } + + /* + * If the input with which we were presented does not + * map to an existing command, attempt to parse it as an + * address argument and display the result. Useful for + * obtaining the address of a variable, or the nearest symbol + * to an address contained in a register. + */ + { + kdb_machreg_t value; + char *name = NULL; + long offset; + int nextarg = 0; + + if (kdbgetaddrarg(0, (const char **)argv, &nextarg, + &value, &offset, &name, ef)) { + return KDB_NOTFOUND; + } + + kdb_printf("%s = ", argv[0]); + kdb_symbol_print(value, NULL, KDB_SP_DEFAULT); + kdb_printf("\n"); + return 0; + } +} + + +static int +handle_ctrl_cmd(char *cmd) +{ +#define CTRL_P 16 +#define CTRL_N 14 + + /* initial situation */ + if (cmd_head == cmd_tail) return 1; + + switch(*cmd) { + case '\n': + case CTRL_P: + if (cmdptr != cmd_tail) + cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT; + strcpy(cmd, cmd_hist[cmdptr]); + return 0; + case CTRL_N: + if (cmdptr != (cmd_head-1)) + cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT; + strcpy(cmd, cmd_hist[cmdptr]); + return 0; + } + return 1; +} + + +/* + * kdb_local + * + * The main code for kdb. This routine is invoked on a specific + * processor, it is not global. The main kdb() routine ensures + * that only one processor at a time is in this routine. This + * code is called with the real reason code on the first entry + * to a kdb session, thereafter it is called with reason SWITCH, + * even if the user goes back to the original cpu. + * + * Inputs: + * reason The reason KDB was invoked + * error The hardware-defined error code + * ef The exception frame at time of fault/breakpoint. NULL + * for reason SILENT, otherwise valid. + * db_result Result code from the break or debug point. + * Returns: + * 0 KDB was invoked for an event which it wasn't responsible + * 1 KDB handled the event for which it was invoked. + * KDB_CMD_GO User typed 'go'. + * KDB_CMD_CPU User switched to another cpu. + * KDB_CMD_SS Single step. + * KDB_CMD_SSB Single step until branch. + * Locking: + * none + * Remarks: + * none + */ + +static int +kdb_local(kdb_reason_t reason, int error, kdb_eframe_t ef, kdb_dbtrap_t db_result) +{ + char *cmdbuf; + char cmd[CMD_BUFLEN]; + int diag; + typeof (*ef) local_ef; + + if (reason != KDB_REASON_DEBUG && + reason != KDB_REASON_SILENT) { + kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", (void *)current, current->pid); +#if defined(CONFIG_SMP) + kdb_printf("on processor %d ", smp_processor_id()); +#endif + } + + switch (reason) { + case KDB_REASON_DEBUG: + { + /* + * If re-entering kdb after a single step + * command, don't print the message. + */ + switch(db_result) { + case KDB_DB_BPT: + kdb_printf("\nEntering kdb (0x%p) ", (void *)current); +#if defined(CONFIG_SMP) + kdb_printf("on processor %d ", smp_processor_id()); +#endif + kdb_printf("due to Debug @ " kdb_machreg_fmt "\n", kdba_getpc(ef)); + break; + case KDB_DB_SSB: + /* + * In the midst of ssb command. Just return. + */ + return KDB_CMD_SSB; /* Continue with SSB command */ + + break; + case KDB_DB_SS: + break; + case KDB_DB_SSBPT: + return 1; /* kdba_db_trap did the work */ + default: + kdb_printf("kdb: Bad result from kdba_db_trap: %d\n", + db_result); + break; + } + + } + break; + case KDB_REASON_FAULT: + break; + case KDB_REASON_ENTER: + kdb_printf("due to KDB_ENTER()\n"); + break; + case KDB_REASON_KEYBOARD: + kdb_printf("due to Keyboard Entry\n"); + break; + case KDB_REASON_SWITCH: + kdb_printf("due to cpu switch\n"); + break; + case KDB_REASON_CALL: + if (ef) break; /* drop through if regs is not specified */ + case KDB_REASON_PANIC: + if (reason == KDB_REASON_CALL) + kdb_printf("due to direct function call\n"); + else + kdb_printf("due to panic\n"); + /* + * Get a set of registers that defines the current + * context (as of the call to kdb). + */ + memset(&local_ef, 0, sizeof(local_ef)); + ef = &local_ef; + kdba_getcurrentframe(ef); + kdba_setpc(ef, (kdb_machreg_t)(&kdb)); /* for traceback */ + break; + case KDB_REASON_OOPS: + kdb_printf("Oops: %s\n", kdb_diemsg); + kdb_printf("due to oops @ " kdb_machreg_fmt "\n", kdba_getpc(ef)); + kdba_dumpregs(ef, NULL, NULL); + break; + case KDB_REASON_NMI: + kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "\n", + kdba_getpc(ef)); + kdba_dumpregs(ef, NULL, NULL); + break; + case KDB_REASON_WATCHDOG: + kdb_printf("due to WatchDog Interrupt @ " kdb_machreg_fmt "\n", + kdba_getpc(ef)); + kdba_dumpregs(ef, NULL, NULL); + break; + case KDB_REASON_BREAK: + kdb_printf("due to Breakpoint @ " kdb_machreg_fmt "\n", kdba_getpc(ef)); + /* + * Determine if this breakpoint is one that we + * are interested in. + */ + if (db_result != KDB_DB_BPT) { + kdb_printf("kdb: error return from kdba_bp_trap: %d\n", db_result); + return 0; /* Not for us, dismiss it */ + } + break; + case KDB_REASON_RECURSE: + kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n", kdba_getpc(ef)); + break; + case KDB_REASON_SILENT: + return KDB_CMD_GO; /* Silent entry, silent exit */ + break; + default: + kdb_printf("kdb: unexpected reason code: %d\n", reason); + return 0; /* Not for us, dismiss it */ + } + + while (1) { + /* + * Initialize pager context. + */ + kdb_nextline = 1; +#ifdef KDB_HAVE_LONGJMP + /* + * Use kdba_setjmp/kdba_longjmp to break out of + * the pager early and to attempt to recover from kdb errors. + */ + KDB_STATE_CLEAR(LONGJMP); + if (kdba_setjmp(&kdbjmpbuf[smp_processor_id()])) { + /* + * Command aborted (usually in pager) + */ + + /* + * XXX - need to abort a SSB ? + */ + continue; + } + else + KDB_STATE_SET(LONGJMP); +#endif /* KDB_HAVE_LONGJMP */ + +do_full_getstr: +#if defined(CONFIG_SMP) + kdb_printf(kdbgetenv("PROMPT"), smp_processor_id()); +#else + kdb_printf(kdbgetenv("PROMPT")); +#endif + + + cmdbuf = cmd_hist[cmd_head]; + *cmdbuf = '\0'; + /* + * Fetch command from keyboard + */ + cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN,""); + if (*cmdbuf < 32 && *cmdbuf != '\n') + if (handle_ctrl_cmd(cmdbuf)) + goto do_full_getstr; + + if (*cmdbuf != '\n') { + cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT; + if (cmd_head == cmd_tail) cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT; + + } + + cmdptr = cmd_head; + strcpy(cmd, cmdbuf); /* copy because of destructive parsing */ + diag = kdb_parse(cmd, ef); + if (diag == KDB_NOTFOUND) { + kdb_printf("Unknown kdb command: '%s'\n", cmd); + diag = 0; + } + if (diag == KDB_CMD_GO + || diag == KDB_CMD_CPU + || diag == KDB_CMD_SS + || diag == KDB_CMD_SSB) + break; + + if (diag) + kdb_cmderror(diag); + } + + return(diag); +} + + +/* + * kdb_print_state + * + * Print the state data for the current processor for debugging. + * + * Inputs: + * text Identifies the debug point + * value Any integer value to be printed, e.g. reason code. + * Returns: + * None. + * Locking: + * none + * Remarks: + * none + */ + +void kdb_print_state(const char *text, int value) +{ + kdb_printf("state: %s cpu %d value %d initial %d state %x\n", + text, smp_processor_id(), value, kdb_initial_cpu, kdb_state[smp_processor_id()]); +} + +/* + * kdb_previous_event + * + * Return a count of cpus that are leaving kdb, i.e. the number + * of processors that are still handling the previous kdb event. + * + * Inputs: + * None. + * Returns: + * Count of cpus in previous event. + * Locking: + * none + * Remarks: + * none + */ + +static int +kdb_previous_event(void) +{ + int i, leaving = 0; + for (i = 0; i < NR_CPUS; ++i) { + if (KDB_STATE_CPU(LEAVING, i)) + ++leaving; + } + return(leaving); +} + +/* + * kdb_main_loop + * + * The main kdb loop. After initial setup and assignment of the controlling + * cpu, all cpus are in this loop. One cpu is in control and will issue the kdb + * prompt, the others will spin until 'go' or cpu switch. + * + * To get a consistent view of the kernel stacks for all processes, this routine + * is invoked from the main kdb code via an architecture specific routine. + * kdba_main_loop is responsible for making the kernel stacks consistent for all + * processes, there should be no difference between a blocked process and a + * running process as far as kdb is concerned. + * + * Inputs: + * reason The reason KDB was invoked + * error The hardware-defined error code + * reason2 kdb's current reason code. Initially error but can change + * acording to kdb state. + * db_result Result code from break or debug point. + * ef The exception frame at time of fault/breakpoint. If reason + * is KDB_REASON_SILENT or KDB_REASON_PANIC then ef is NULL, + * otherwise it should always be valid. + * Returns: + * 0 KDB was invoked for an event which it wasn't responsible + * 1 KDB handled the event for which it was invoked. + * Locking: + * none + * Remarks: + * none + */ + +int +kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, + kdb_dbtrap_t db_result, kdb_eframe_t ef) +{ + int result = 1; + /* Stay in kdb() until 'go', 'ss[b]' or an error */ + while (1) { + int i; + /* + * All processors except the one that is in control + * will spin here. + */ + KDB_DEBUG_STATE("kdb_main_loop 1", reason); + while (KDB_STATE(HOLD_CPU)) + ; + KDB_DEBUG_STATE("kdb_main_loop 2", reason); + if (KDB_STATE(LEAVING)) + break; /* Another cpu said 'go' */ + + /* Still using kdb, this processor is in control */ + result = kdb_local(reason2, error, ef, db_result); + KDB_DEBUG_STATE("kdb_main_loop 3", result); + + if (result == KDB_CMD_CPU) { + /* Cpu switch, hold the current cpu, release the target one. */ + reason2 = KDB_REASON_SWITCH; + KDB_STATE_SET(HOLD_CPU); + KDB_STATE_CLEAR_CPU(HOLD_CPU, kdb_new_cpu); + continue; + } + + if (result == KDB_CMD_SS) { + KDB_STATE_SET(DOING_SS); + break; + } + + if (result == KDB_CMD_SSB) { + KDB_STATE_SET(DOING_SS); + KDB_STATE_SET(DOING_SSB); + break; + } + + if (result && result != 1 && result != KDB_CMD_GO) + kdb_printf("\nUnexpected kdb_local return code %d\n", result); + + /* + * All other return codes (including KDB_CMD_GO) from + * kdb_local will end kdb(). Release all other cpus + * which will see KDB_STATE(LEAVING) is set. + */ + for (i = 0; i < NR_CPUS; ++i) { + if (KDB_STATE_CPU(KDB, i)) + KDB_STATE_SET_CPU(LEAVING, i); + KDB_STATE_CLEAR_CPU(WAIT_IPI, i); + KDB_STATE_CLEAR_CPU(HOLD_CPU, i); + } + KDB_DEBUG_STATE("kdb_main_loop 4", reason); + break; + } + return(result != 0); +} + +/* + * kdb + * + * This function is the entry point for the kernel debugger. It + * provides a command parser and associated support functions to + * allow examination and control of an active kernel. + * + * This function may be invoked directly from any + * point in the kernel by calling with reason == KDB_REASON_CALL + * (XXX - note that the regs aren't set up this way - could + * use a software interrupt to enter kdb to get regs...) + * + * The breakpoint trap code should invoke this function with + * one of KDB_REASON_BREAK (int 03) or KDB_REASON_DEBUG (debug register) + * + * the die_if_kernel function should invoke this function with + * KDB_REASON_OOPS. + * + * the panic function should invoke this function with KDB_REASON_PANIC. + * + * The kernel fault handler should invoke this function with + * reason == KDB_REASON_FAULT and error == trap vector #. + * + * In single step mode, one cpu is released to run without + * breakpoints. Interrupts and NMI are reset to their original values, + * the cpu is allowed to do one instruction which causes a trap + * into kdb with KDB_REASON_DEBUG. + * + * Inputs: + * reason The reason KDB was invoked + * error The hardware-defined error code + * ef The exception frame at time of fault/breakpoint. If reason + * is KDB_REASON_SILENT or KDB_REASON_PANIC then ef is NULL, + * otherwise it should always be valid. + * Returns: + * 0 KDB was invoked for an event which it wasn't responsible + * 1 KDB handled the event for which it was invoked. + * Locking: + * none + * Remarks: + * No assumptions of system state. This function may be invoked + * with arbitrary locks held. It will stop all other processors + * in an SMP environment, disable all interrupts and does not use + * the operating systems keyboard driver. + * + * This code is reentrant but only for cpu switch. Any other + * reentrancy is an error, although kdb will attempt to recover. + * + * At the start of a kdb session the initial processor is running + * kdb() and the other processors can be doing anything. When the + * initial processor calls smp_kdb_stop() the other processors are + * driven through kdb_ipi which calls kdb() with reason SWITCH. + * That brings all processors into this routine, one with a "real" + * reason code, the other with SWITCH. + * + * Because the other processors are driven via smp_kdb_stop(), + * they enter here from the NMI handler. Until the other + * processors exit from here and exit from kdb_ipi, they will not + * take any more NMI requests. The initial cpu will still take NMI. + * + * Multiple race and reentrancy conditions, each with different + * advoidance mechanisms. + * + * Two cpus hit debug points at the same time. + * + * kdb_lock and kdb_initial_cpu ensure that only one cpu gets + * control of kdb. The others spin on kdb_initial_cpu until + * they are driven through NMI into kdb_ipi. When the initial + * cpu releases the others from NMI, they resume trying to get + * kdb_initial_cpu to start a new event. + * + * A cpu is released from kdb and starts a new event before the + * original event has completely ended. + * + * kdb_previous_event() prevents any cpu from entering + * kdb_initial_cpu state until the previous event has completely + * ended on all cpus. + * + * An exception occurs inside kdb. + * + * kdb_initial_cpu detects recursive entry to kdb and attempts + * to recover. The recovery uses longjmp() which means that + * recursive calls to kdb never return. Beware of assumptions + * like + * + * ++depth; + * kdb(); + * --depth; + * + * If the kdb call is recursive then longjmp takes over and + * --depth is never executed. + * + * NMI handling. + * + * NMI handling is tricky. The initial cpu is invoked by some kdb event, + * this event could be NMI driven but usually is not. The other cpus are + * driven into kdb() via kdb_ipi which uses NMI so at the start the other + * cpus will not accept NMI. Some operations such as SS release one cpu + * but hold all the others. Releasing a cpu means it drops back to + * whatever it was doing before the kdb event, this means it drops out of + * kdb_ipi and hence out of NMI status. But the software watchdog uses + * NMI and we do not want spurious watchdog calls into kdb. kdba_read() + * resets the watchdog counters in its input polling loop, when a kdb + * command is running it is subject to NMI watchdog events. + * + * Another problem with NMI handling is the NMI used to drive the other + * cpus into kdb cannot be distinguished from the watchdog NMI. State + * flag WAIT_IPI indicates that a cpu is waiting for NMI via kdb_ipi, + * if not set then software NMI is ignored by kdb_ipi. + * + * Cpu switching. + * + * All cpus are in kdb (or they should be), all but one are + * spinning on KDB_STATE(HOLD_CPU). Only one cpu is not in + * HOLD_CPU state, only that cpu can handle commands. + * + */ + +int +kdb(kdb_reason_t reason, int error, kdb_eframe_t ef) +{ + kdb_intstate_t int_state; /* Interrupt state */ + kdb_reason_t reason2 = reason; + int result = 1; /* Default is kdb handled it */ + int ss_event; + kdb_dbtrap_t db_result=KDB_DB_NOBPT; + + if (!kdb_on) + return 0; + + KDB_DEBUG_STATE("kdb 1", reason); + + /* Filter out userspace breakpoints first, no point in doing all + * the kdb smp fiddling when it is really a gdb trap. + * Save the single step status first, kdba_db_trap clears ss status. + */ + ss_event = (KDB_STATE(DOING_SS) || KDB_STATE(SSBPT)); + if (reason == KDB_REASON_BREAK) + db_result = kdba_bp_trap(ef, error); /* Only call this once */ + if (reason == KDB_REASON_DEBUG) + db_result = kdba_db_trap(ef, error); /* Only call this once */ + + if ((reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG) + && db_result == KDB_DB_NOBPT) { + KDB_DEBUG_STATE("kdb 2", reason); + return 0; /* Not one of mine */ + } + + /* Turn off single step if it was being used */ + if (ss_event) { + kdba_clearsinglestep(ef); + /* Single step after a breakpoint removes the need for a delayed reinstall */ + if (reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG) { + KDB_STATE_SET(NO_BP_DELAY); + } + } + + /* kdb can validly reenter but only for certain well defined conditions */ + if (reason == KDB_REASON_DEBUG + && !KDB_STATE(HOLD_CPU) + && ss_event) + KDB_STATE_SET(REENTRY); + else + KDB_STATE_CLEAR(REENTRY); + + /* Wait for previous kdb event to completely exit before starting + * a new event. + */ + while (kdb_previous_event()) + ; + KDB_DEBUG_STATE("kdb 3", reason); + + /* + * If kdb is already active, print a message and try to recover. + * If recovery is not possible and recursion is allowed or + * forced recursion without recovery is set then try to recurse + * in kdb. Not guaranteed to work but it makes an attempt at + * debugging the debugger. + */ + if (reason != KDB_REASON_SWITCH) { + if (KDB_IS_RUNNING() && !KDB_STATE(REENTRY)) { + int recover = 1; + unsigned long recurse = 0; + kdb_printf("kdb: Debugger re-entered on cpu %d, new reason = %d\n", + smp_processor_id(), reason); + /* Should only re-enter from released cpu */ + if (KDB_STATE(HOLD_CPU)) { + kdb_printf(" Strange, cpu %d should not be running\n", smp_processor_id()); + recover = 0; + } + if (!KDB_STATE(CMD)) { + kdb_printf(" Not executing a kdb command\n"); + recover = 0; + } + if (!KDB_STATE(LONGJMP)) { + kdb_printf(" No longjmp available for recovery\n"); + recover = 0; + } + kdbgetulenv("RECURSE", &recurse); + if (recurse > 1) { + kdb_printf(" Forced recursion is set\n"); + recover = 0; + } + if (recover) { + kdb_printf(" Attempting to abort command and recover\n"); +#ifdef KDB_HAVE_LONGJMP + kdba_longjmp(&kdbjmpbuf[smp_processor_id()], 0); +#endif + } + if (recurse) { + if (KDB_STATE(RECURSE)) { + kdb_printf(" Already in recursive mode\n"); + } else { + kdb_printf(" Attempting recursive mode\n"); + KDB_STATE_SET(RECURSE); + KDB_STATE_SET(REENTRY); + reason2 = KDB_REASON_RECURSE; + recover = 1; + } + } + if (!recover) { + kdb_printf(" Cannot recover, allowing event to proceed\n"); + return(0); + } + } + } else if (!KDB_IS_RUNNING()) { + kdb_printf("kdb: CPU switch without kdb running, I'm confused\n"); + return(0); + } + + /* + * Disable interrupts, breakpoints etc. on this processor + * during kdb command processing + */ + KDB_STATE_SET(KDB); + kdba_disableint(&int_state); + if (!KDB_STATE(KDB_CONTROL)) { + kdb_bp_remove_local(); + kdba_disable_lbr(); + KDB_STATE_SET(KDB_CONTROL); + } + else if (KDB_DEBUG(LBR)) + kdba_print_lbr(); + + /* + * If not entering the debugger due to CPU switch or single step + * reentry, serialize access here. + * The processors may race getting to this point - if, + * for example, more than one processor hits a breakpoint + * at the same time. We'll serialize access to kdb here - + * other processors will loop here, and the NMI from the stop + * IPI will take them into kdb as switch candidates. Once + * the initial processor releases the debugger, the rest of + * the processors will race for it. + */ + if (reason == KDB_REASON_SWITCH + || KDB_STATE(REENTRY)) + ; /* drop through */ + else { + KDB_DEBUG_STATE("kdb 4", reason); + spin_lock(&kdb_lock); + + while (KDB_IS_RUNNING() || kdb_previous_event()) { + spin_unlock(&kdb_lock); + + while (KDB_IS_RUNNING() || kdb_previous_event()) + ; + + spin_lock(&kdb_lock); + } + KDB_DEBUG_STATE("kdb 5", reason); + + kdb_initial_cpu = smp_processor_id(); + spin_unlock(&kdb_lock); + } + + if (smp_processor_id() == kdb_initial_cpu + && !KDB_STATE(REENTRY)) { + KDB_STATE_CLEAR(HOLD_CPU); + KDB_STATE_CLEAR(WAIT_IPI); + /* + * Remove the global breakpoints. This is only done + * once from the initial processor on initial entry. + */ + kdb_bp_remove_global(); + + /* + * If SMP, stop other processors. The other processors + * will enter kdb() with KDB_REASON_SWITCH and spin + * below. + */ + KDB_DEBUG_STATE("kdb 6", reason); + if (smp_num_cpus > 1) { + int i; + for (i = 0; i < NR_CPUS; ++i) { + if (i != kdb_initial_cpu) { + KDB_STATE_SET_CPU(HOLD_CPU, i); + KDB_STATE_SET_CPU(WAIT_IPI, i); + } + } + KDB_DEBUG_STATE("kdb 7", reason); + smp_kdb_stop(); + KDB_DEBUG_STATE("kdb 8", reason); + } + } + + /* Set up a consistent set of process stacks before talking to the user */ + KDB_DEBUG_STATE("kdb 9", result); + result = kdba_main_loop(reason, reason2, error, db_result, ef); + + KDB_DEBUG_STATE("kdb 10", result); + kdba_adjust_ip(reason, error, ef); + KDB_STATE_CLEAR(LONGJMP); + KDB_DEBUG_STATE("kdb 11", result); + + /* No breakpoints installed for SS */ + if (!KDB_STATE(DOING_SS) && + !KDB_STATE(SSBPT) && + !KDB_STATE(RECURSE)) { + KDB_DEBUG_STATE("kdb 12", result); + kdba_enable_lbr(); + kdb_bp_install_local(ef); + KDB_STATE_CLEAR(NO_BP_DELAY); + KDB_STATE_CLEAR(KDB_CONTROL); + } + + KDB_DEBUG_STATE("kdb 13", result); + kdba_restoreint(&int_state); + + KDB_STATE_CLEAR(KDB); /* Main kdb state has been cleared */ + KDB_STATE_CLEAR(LEAVING); /* Elvis has left the building ... */ + KDB_DEBUG_STATE("kdb 14", result); + + if (smp_processor_id() == kdb_initial_cpu && + !KDB_STATE(DOING_SS) && + !KDB_STATE(RECURSE)) { + /* + * (Re)install the global breakpoints. This is only done + * once from the initial processor on final exit. + */ + KDB_DEBUG_STATE("kdb 15", reason); + kdb_bp_install_global(ef); + /* Wait until all the other processors leave kdb */ + while (kdb_previous_event()) + ; + kdb_initial_cpu = -1; /* release kdb control */ + KDB_DEBUG_STATE("kdb 16", reason); + } + + KDB_STATE_CLEAR(RECURSE); + KDB_DEBUG_STATE("kdb 17", reason); + return(result != 0); +} + +/* + * kdb_mdr + * + * This function implements the guts of the 'mdr' command. + * + * mdr , + * + * Inputs: + * addr Start address + * count Number of bytes + * Outputs: + * None. + * Returns: + * Always 0. Any errors are detected and printed by kdba_getword. + * Locking: + * none. + * Remarks: + */ + +static int +kdb_mdr(kdb_machreg_t addr, unsigned int count) +{ + kdb_machreg_t addr2 = addr; + unsigned int count2 = count; + + KDB_STATE_CLEAR(SUPPRESS); + while (count2--) { + kdba_getword(addr2++, 1); + if (KDB_STATE(SUPPRESS)) { + KDB_STATE_CLEAR(SUPPRESS); + return(0); /* Error message already printed */ + } + } + + while (count--) + kdb_printf("%02lx", kdba_getword(addr++, 1)); + kdb_printf("\n"); + return(0); +} + +/* + * kdb_md + * + * This function implements the 'md', 'md1', 'md2', 'md4', 'md8' + * 'mdr' and 'mds' commands. + * + * md|mds [ [ []]] + * mdWcN [ [ []]] + * where W = is the width (1, 2, 4 or 8) and N is the count. + * for eg., md1c20 reads 20 bytes, 1 at a time. + * mdr , + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_md(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + static kdb_machreg_t last_addr; + static int last_radix, last_bytesperword, last_repeat; + int radix = 16, mdcount = 8, bytesperword = sizeof(kdb_machreg_t), repeat; + int nosect = 0; + char fmtchar, fmtstr[64]; + kdb_machreg_t addr; + unsigned long word; + long offset = 0; + kdb_symtab_t symtab; + int symbolic = 0; + + kdbgetintenv("MDCOUNT", &mdcount); + kdbgetintenv("RADIX", &radix); + kdbgetintenv("BYTESPERWORD", &bytesperword); + + /* Assume 'md ' and start with environment values */ + repeat = mdcount * 16 / bytesperword; + + if (strcmp(argv[0], "mdr") == 0) { + if (argc != 2) + return KDB_ARGCOUNT; + } else if (isdigit(argv[0][2])) { + bytesperword = (int)(argv[0][2] - '0'); + last_bytesperword = bytesperword; + repeat = mdcount * 16 / bytesperword; + if (argv[0][3] == 'c') { + repeat = simple_strtoul(argv[0]+4, NULL, 10); + mdcount = ((repeat * bytesperword) + 15) / 16; + } + last_repeat = repeat; + } + + if (argc == 0) { + if (last_addr == 0) + return KDB_ARGCOUNT; + addr = last_addr; + radix = last_radix; + bytesperword = last_bytesperword; + repeat = last_repeat; + mdcount = ((repeat * bytesperword) + 15) / 16; + } + + if (argc) { + kdb_machreg_t val; + int diag, nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + if (argc > nextarg+2) + return KDB_ARGCOUNT; + + if (argc >= nextarg) { + diag = kdbgetularg(argv[nextarg], &val); + if (!diag) { + mdcount = (int) val; + repeat = mdcount * 16 / bytesperword; + } + } + if (argc >= nextarg+1) { + diag = kdbgetularg(argv[nextarg+1], &val); + if (!diag) + radix = (int) val; + } + } + + if (strcmp(argv[0], "mdr") == 0) { + return(kdb_mdr(addr, mdcount)); + } + + switch (radix) { + case 10: + fmtchar = 'd'; + break; + case 16: + fmtchar = 'x'; + break; + case 8: + fmtchar = 'o'; + break; + default: + return KDB_BADRADIX; + } + + last_radix = radix; + + if (bytesperword > sizeof(kdb_machreg_t)) + return KDB_BADWIDTH; + + switch (bytesperword) { + case 8: + sprintf(fmtstr, "%%16.16l%c ", fmtchar); + break; + case 4: + sprintf(fmtstr, "%%8.8l%c ", fmtchar); + break; + case 2: + sprintf(fmtstr, "%%4.4l%c ", fmtchar); + break; + case 1: + sprintf(fmtstr, "%%2.2l%c ", fmtchar); + break; + default: + return KDB_BADWIDTH; + } + + last_repeat = repeat; + last_bytesperword = bytesperword; + + if (strcmp(argv[0], "mds") == 0) { + symbolic = 1; + /* Do not save these changes as last_*, they are temporary mds + * overrides. + */ + bytesperword = sizeof(kdb_machreg_t); + repeat = mdcount; + kdbgetintenv("NOSECT", &nosect); + } + + /* Round address down modulo BYTESPERWORD */ + + addr &= ~(bytesperword-1); + + while (repeat > 0) { + int num = (symbolic?1 :(16 / bytesperword)); + char cbuf[32]; + char *c = cbuf; + char t; + int i; + + memset(cbuf, '\0', sizeof(cbuf)); + kdb_printf(kdb_machreg_fmt0 " ", addr); + + for(i = 0; i < num && repeat--; i++) { + word = kdba_getword(addr, bytesperword); + if (KDB_STATE(SUPPRESS)) { + KDB_STATE_CLEAR(SUPPRESS); + return 0; /* Error message already printed */ + } + + kdb_printf(fmtstr, word); + if (symbolic) { + kdbnearsym(word, &symtab); + } + else { + memset(&symtab, 0, sizeof(symtab)); + } + if (symtab.sym_name) { + kdb_symbol_print(word, &symtab, 0); + if (!nosect) { + kdb_printf("\n"); + kdb_printf(" %s %s " + kdb_machreg_fmt " " kdb_machreg_fmt " " kdb_machreg_fmt, + symtab.mod_name, + symtab.sec_name, + symtab.sec_start, + symtab.sym_start, + symtab.sym_end); + } + addr += bytesperword; + } else { + switch (bytesperword) { + case 8: + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + case 4: + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + case 2: + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + case 1: + *c++ = isprint(t=kdba_getword(addr++, 1)) + ?t:'.'; + break; + } + } + } + kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1), " ", cbuf); + } + last_addr = addr; + + return 0; +} + +/* + * kdb_mm + * + * This function implements the 'mm' command. + * + * mm address-expression new-value + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * mm works on machine words, mmW works on bytes. + */ + +int +kdb_mm(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int diag; + kdb_machreg_t addr; + long offset = 0; + unsigned long contents; + unsigned long word; + int nextarg; + int width; + + if (argv[0][2] && !isdigit(argv[0][2])) + return KDB_NOTFOUND; + + if (argc < 2) { + return KDB_ARGCOUNT; + } + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + if (nextarg > argc) + return KDB_ARGCOUNT; + + diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL, regs); + if (diag) + return diag; + + if (nextarg != argc + 1) + return KDB_ARGCOUNT; + + width = argv[0][2] ? (argv[0][2] - '0') : (sizeof(kdb_machreg_t)); + + /* + * To prevent modification of invalid addresses, check first. + */ + word = kdba_getword(addr, width); + if (KDB_STATE(SUPPRESS)) { + KDB_STATE_CLEAR(SUPPRESS); + return 0; + } + + diag = kdba_putword(addr, width, contents); + + kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents); + + return 0; +} + +/* + * kdb_go + * + * This function implements the 'go' command. + * + * go [address-expression] + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * KDB_CMD_GO for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_go(int argc, const char **argv, const char **envp, kdb_eframe_t ef) +{ + kdb_machreg_t addr; + int diag; + int nextarg; + long offset; + + if (argc == 1) { + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, + &addr, &offset, NULL, ef); + if (diag) + return diag; + + kdba_setpc(ef, addr); + } else if (argc) + return KDB_ARGCOUNT; + + return KDB_CMD_GO; +} + +/* + * kdb_rd + * + * This function implements the 'rd' command. + * + * rd display all general registers. + * rd c display all control registers. + * rd d display all debug registers. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_rd(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + /* + */ + + if (argc == 0) { + return kdba_dumpregs(regs, NULL, NULL); + } + + if (argc > 2) { + return KDB_ARGCOUNT; + } + + return kdba_dumpregs(regs, argv[1], argv[2]); +} + +/* + * kdb_rm + * + * This function implements the 'rm' (register modify) command. + * + * rm register-name new-contents + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * Currently doesn't allow modification of control or + * debug registers, nor does it allow modification + * of model-specific registers (MSR). + */ + +int +kdb_rm(int argc, const char **argv, const char **envp, kdb_eframe_t ef) +{ + int diag; + int ind = 0; + kdb_machreg_t contents; + + if (argc != 2) { + return KDB_ARGCOUNT; + } + + /* + * Allow presence or absence of leading '%' symbol. + */ + + if (argv[1][0] == '%') + ind = 1; + + diag = kdbgetularg(argv[2], &contents); + if (diag) + return diag; + + diag = kdba_setregcontents(&argv[1][ind], ef, contents); + if (diag) + return diag; + + return 0; +} + +#if defined(CONFIG_MAGIC_SYSRQ) +/* + * kdb_sr + * + * This function implements the 'sr' (SYSRQ key) command which + * interfaces to the soi-disant MAGIC SYSRQ functionality. + * + * sr + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * None. + */ +int +kdb_sr(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + if (argc != 1) { + return KDB_ARGCOUNT; + } + + handle_sysrq(*argv[1], regs, 0, 0); + + return 0; +} +#endif /* CONFIG_MAGIC_SYSRQ */ + +/* + * kdb_ef + * + * This function implements the 'ef' (display exception frame) + * command. This command takes an address and expects to find + * an exception frame at that address, formats and prints it. + * + * ef address-expression + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * Not done yet. + */ + +int +kdb_ef(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int diag; + kdb_machreg_t addr; + long offset; + int nextarg; + + if (argc == 1) { + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + return kdba_dumpregs((struct pt_regs *)addr, NULL, NULL); + } + + return KDB_ARGCOUNT; +} + +/* + * kdb_reboot + * + * This function implements the 'reboot' command. Reboot the system + * immediately. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * Shouldn't return from this function. + */ + +int +kdb_reboot(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + machine_restart(0); + /* NOTREACHED */ + return 0; +} + + +#if defined(CONFIG_MODULES) +extern struct module *find_module(const char *); +extern void free_module(struct module *, int); + +/* + * kdb_lsmod + * + * This function implements the 'lsmod' command. Lists currently + * loaded kernel modules. + * + * Mostly taken from userland lsmod. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * + */ + +int +kdb_lsmod(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct module *mod; + struct module_ref *mr; + + if (argc != 0) + return KDB_ARGCOUNT; + + kdb_printf("Module Size modstruct Used by\n"); + for (mod = module_list; mod && mod->next ;mod = mod->next) { + kdb_printf("%-20s%8lu 0x%p %4ld ", mod->name, mod->size, (void *)mod, + (long)atomic_read(&mod->uc.usecount)); + + if (mod->flags & MOD_DELETED) + kdb_printf(" (deleted)"); + else if (mod->flags & MOD_INITIALIZING) + kdb_printf(" (initializing)"); + else if (!(mod->flags & MOD_RUNNING)) + kdb_printf(" (uninitialized)"); + else { + if (mod->flags & MOD_AUTOCLEAN) + kdb_printf(" (autoclean)"); + if (!(mod->flags & MOD_USED_ONCE)) + kdb_printf(" (unused)"); + } + + if (mod->refs) { + kdb_printf(" [ "); + + mr = mod->refs; + while (mr) { + kdb_printf("%s ", mr->ref->name); + mr = mr->next_ref; + } + + kdb_printf("]"); + } + + kdb_printf("\n"); + } + + return 0; +} + +/* + * kdb_rmmod + * + * This function implements the 'rmmod' command. Removes a given + * kernel module. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * Danger: free_module() calls mod->cleanup(). If the cleanup routine + * relies on interrupts then it will hang, kdb has interrupts disabled. + */ + +int +kdb_rmmod(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct module *mod; + + + if (argc != 1) + return KDB_ARGCOUNT; + + kdb_printf("Attempting to remove module: [%s]\n", argv[1]); + if ((mod = find_module(argv[1])) == NULL) { + kdb_printf("Unable to find a module by that name\n"); + return 0; + } + + if (mod->refs != NULL || __MOD_IN_USE(mod)) { + kdb_printf("Module is in use, unable to unload\n"); + return 0; + } + + free_module(mod, 0); + kdb_printf("Module successfully unloaded\n"); + + return 0; +} +#endif /* CONFIG_MODULES */ + +/* + * kdb_env + * + * This function implements the 'env' command. Display the current + * environment variables. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_env(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int i; + + for(i=0; i<__nenv; i++) { + if (__env[i]) { + kdb_printf("%s\n", __env[i]); + } + } + + if (KDB_DEBUG(MASK)) + kdb_printf("KDBFLAGS=0x%x\n", kdb_flags); + + return 0; +} + +/* + * kdb_set + * + * This function implements the 'set' command. Alter an existing + * environment variable or create a new one. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_set(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int i; + char *ep; + size_t varlen, vallen; + + /* + * we can be invoked two ways: + * set var=value argv[1]="var", argv[2]="value" + * set var = value argv[1]="var", argv[2]="=", argv[3]="value" + * - if the latter, shift 'em down. + */ + if (argc == 3) { + argv[2] = argv[3]; + argc--; + } + + if (argc != 2) + return KDB_ARGCOUNT; + + /* + * Check for internal variables + */ + if (strcmp(argv[1], "KDBDEBUG") == 0) { + unsigned int debugflags; + char *cp; + + debugflags = simple_strtoul(argv[2], &cp, 0); + if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) { + kdb_printf("kdb: illegal debug flags '%s'\n", + argv[2]); + return 0; + } + kdb_flags = (kdb_flags & ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT)) + | (debugflags << KDB_DEBUG_FLAG_SHIFT); + + return 0; + } + + /* + * Tokenizer squashed the '=' sign. argv[1] is variable + * name, argv[2] = value. + */ + varlen = strlen(argv[1]); + vallen = strlen(argv[2]); + ep = kdballocenv(varlen + vallen + 2); + if (ep == (char *)0) + return KDB_ENVBUFFULL; + + sprintf(ep, "%s=%s", argv[1], argv[2]); + + ep[varlen+vallen+1]='\0'; + + for(i=0; i<__nenv; i++) { + if (__env[i] + && ((strncmp(__env[i], argv[1], varlen)==0) + && ((__env[i][varlen] == '\0') + || (__env[i][varlen] == '=')))) { + __env[i] = ep; + return 0; + } + } + + /* + * Wasn't existing variable. Fit into slot. + */ + for(i=0; i<__nenv-1; i++) { + if (__env[i] == (char *)0) { + __env[i] = ep; + return 0; + } + } + + return KDB_ENVFULL; +} + +/* + * kdb_cpu + * + * This function implements the 'cpu' command. + * + * cpu [] + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * KDB_CMD_CPU for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + * All cpu's should be spinning in kdb(). However just in case + * a cpu did not take the smp_kdb_stop NMI, check that a cpu + * entered kdb() before passing control to it. + */ + +int +kdb_cpu(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + unsigned long cpunum; + int diag; + + if (argc == 0) { + int i; + + kdb_printf("Currently on cpu %d\n", smp_processor_id()); + kdb_printf("Available cpus: "); + for (i=0; i NR_CPUS) + || !(cpu_online_map & (1UL << cpunum)) + || !KDB_STATE_CPU(KDB, cpunum)) + return KDB_BADCPUNUM; + + kdb_new_cpu = cpunum; + + /* + * Switch to other cpu + */ + return KDB_CMD_CPU; +} + +/* + * kdb_ps + * + * This function implements the 'ps' command which shows + * a list of the active processes. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_ps(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct task_struct *p; + + kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n", + (int)(2*sizeof(void *))+2, "Task Addr", + (int)(2*sizeof(void *))+2, "Thread"); + for_each_task(p) { + kdb_printf("0x%p %08d %08d %1.1d %3.3d %s 0x%p%c%s\n", + (void *)p, p->pid, p->p_pptr->pid, + task_has_cpu(p), p->processor, + (p->state == 0)?"run ":(p->state>0)?"stop":"unrn", + (void *)(&p->thread), + (p == current) ? '*': ' ', + p->comm); + } + + return 0; +} + +/* + * kdb_ll + * + * This function implements the 'll' command which follows a linked + * list and executes an arbitrary command for each element. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_ll(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int diag; + kdb_machreg_t addr; + long offset = 0; + kdb_machreg_t va; + unsigned long linkoffset; + int nextarg; + + if (argc != 3) { + return KDB_ARGCOUNT; + } + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + diag = kdbgetularg(argv[2], &linkoffset); + if (diag) + return diag; + + /* + * Using the starting address as + * the first element in the list, and assuming that + * the list ends with a null pointer. + */ + + va = addr; + + while (va) { + char buf[80]; + + sprintf(buf, "%s " kdb_machreg_fmt "\n", argv[3], va); + diag = kdb_parse(buf, regs); + if (diag) + return diag; + + addr = va + linkoffset; + va = kdba_getword(addr, sizeof(va)); + if (KDB_STATE(SUPPRESS)) { + KDB_STATE_CLEAR(SUPPRESS); + return 0; + } + } + + return 0; +} + +/* + * kdb_sections_callback + * + * Invoked from kallsyms_sections for each section. + * + * Inputs: + * prevmod Previous module name + * modname Module name + * secname Section name + * secstart Start of section + * secend End of section + * secflags Section flags + * Outputs: + * None. + * Returns: + * Always zero + * Locking: + * none. + * Remarks: + */ + +static int +kdb_sections_callback(void *token, const char *modname, const char *secname, + ElfW(Addr) secstart, ElfW(Addr) secend, ElfW(Word) secflags) +{ + const char **prevmod = (const char **)token; + if (*prevmod != modname) { + *prevmod = modname; + kdb_printf("\n%s", modname); + } + kdb_printf(" %s " kdb_elfw_addr_fmt0 " " kdb_elfw_addr_fmt0 " 0x%x", + secname, secstart, secend, secflags); + return(0); +} + +/* + * kdb_sections + * + * This function implements the 'sections' command which prints the + * kernel and module sections. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * Always zero + * Locking: + * none. + * Remarks: + */ + +int +kdb_sections(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + char *prev_mod = NULL; + if (argc != 0) { + return KDB_ARGCOUNT; + } + kallsyms_sections(&prev_mod, kdb_sections_callback); + kdb_printf("\n"); /* End last module */ + return(0); +} + +/* + * kdb_help + * + * This function implements the 'help' and '?' commands. + * + * Inputs: + * argc argument count + * argv argument vector + * envp environment vector + * regs registers at time kdb was entered. + * Outputs: + * None. + * Returns: + * zero for success, a kdb diagnostic if error + * Locking: + * none. + * Remarks: + */ + +int +kdb_help(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + kdbtab_t *kt; + + kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description"); + kdb_printf("----------------------------------------------------------\n"); + for(kt=kdb_commands; kt->cmd_name; kt++) { + kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name, + kt->cmd_usage, kt->cmd_help); + } + return 0; +} + +/* + * kdb_register + * + * This function is used to register a kernel debugger command. + * + * Inputs: + * cmd Command name + * func Function to execute the command + * usage A simple usage string showing arguments + * help A simple help string describing command + * Outputs: + * None. + * Returns: + * zero for success, one if a duplicate command. + * Locking: + * none. + * Remarks: + * + */ + +int +kdb_register(char *cmd, + kdb_func_t func, + char *usage, + char *help, + short minlen) +{ + int i; + kdbtab_t *kp; + + /* + * Brute force method to determine duplicates + */ + for (i=0, kp=kdb_commands; icmd_name && (strcmp(kp->cmd_name, cmd)==0)) { + kdb_printf("Duplicate kdb command registered: '%s'\n", + cmd); + return 1; + } + } + + /* + * Insert command into first available location in table + */ + for (i=0, kp=kdb_commands; icmd_name == NULL) { + kp->cmd_name = cmd; + kp->cmd_func = func; + kp->cmd_usage = usage; + kp->cmd_help = help; + kp->cmd_flags = 0; + kp->cmd_minlen = minlen; + break; + } + } + return 0; +} + +/* + * kdb_unregister + * + * This function is used to unregister a kernel debugger command. + * It is generally called when a module which implements kdb + * commands is unloaded. + * + * Inputs: + * cmd Command name + * Outputs: + * None. + * Returns: + * zero for success, one command not registered. + * Locking: + * none. + * Remarks: + * + */ + +int +kdb_unregister(char *cmd) +{ + int i; + kdbtab_t *kp; + + /* + * find the command. + */ + for (i=0, kp=kdb_commands; icmd_name && (strcmp(kp->cmd_name, cmd)==0)) { + kp->cmd_name = NULL; + return 0; + } + } + + /* + * Couldn't find it. + */ + return 1; +} + +/* + * kdb_inittab + * + * This function is called by the kdb_init function to initialize + * the kdb command table. It must be called prior to any other + * call to kdb_register. + * + * Inputs: + * None. + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * + */ + +static void __init +kdb_inittab(void) +{ + int i; + kdbtab_t *kp; + + for(i=0, kp=kdb_commands; i < KDB_MAX_COMMANDS; i++,kp++) { + kp->cmd_name = NULL; + } + + kdb_register("md", kdb_md, "", "Display Memory Contents", 1); + kdb_register("mdr", kdb_md, " ", "Display Raw Memory", 0); + kdb_register("mds", kdb_md, "", "Display Memory Symbolically", 0); + kdb_register("mm", kdb_mm, " ", "Modify Memory Contents", 0); + kdb_register("id", kdb_id, "", "Display Instructions", 1); + kdb_register("go", kdb_go, "[]", "Continue Execution", 1); + kdb_register("rd", kdb_rd, "", "Display Registers", 1); + kdb_register("rm", kdb_rm, " ", "Modify Registers", 0); + kdb_register("ef", kdb_ef, "", "Display exception frame", 0); + kdb_register("bt", kdb_bt, "[]", "Stack traceback", 1); + kdb_register("btp", kdb_bt, "", "Display stack for process ", 0); + kdb_register("bta", kdb_bt, "", "Display stack all processes", 0); + kdb_register("ll", kdb_ll, " ", "Execute cmd for each element in linked list", 0); + kdb_register("env", kdb_env, "", "Show environment variables", 0); + kdb_register("set", kdb_set, "", "Set environment variables", 0); + kdb_register("help", kdb_help, "", "Display Help Message", 1); + kdb_register("?", kdb_help, "", "Display Help Message", 0); + kdb_register("cpu", kdb_cpu, "","Switch to new cpu", 0); + kdb_register("ps", kdb_ps, "", "Display active task list", 0); + kdb_register("reboot", kdb_reboot, "", "Reboot the machine immediately", 0); + kdb_register("sections", kdb_sections, "", "List kernel and module sections", 0); +#if defined(CONFIG_MODULES) + kdb_register("lsmod", kdb_lsmod, "", "List loaded kernel modules", 0); + kdb_register("rmmod", kdb_rmmod, "", "Remove a kernel module", 1); +#endif +#if defined(CONFIG_MAGIC_SYSRQ) + kdb_register("sr", kdb_sr, "", "Magic SysRq key", 0); +#endif +} + +/* + * kdb_cmd_init + * + * This function is called by the kdb_init function to execute any + * commands defined in kdb_cmds. + * + * Inputs: + * Commands in *kdb_cmds[]; + * Outputs: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * + */ + +static void __init +kdb_cmd_init(void) +{ + int i, diag; + for (i = 0; kdb_cmds[i]; ++i) { + kdb_printf("kdb_cmd[%d]: %s", i, kdb_cmds[i]); + diag = kdb_parse(kdb_cmds[i], NULL); + if (diag) + kdb_printf("command failed, kdb diag %d\n", diag); + } +} + +/* + * kdb_panic + * + * Invoked via the panic_notifier_list. + * + * Inputs: + * None. + * Outputs: + * None. + * Returns: + * Zero. + * Locking: + * None. + * Remarks: + * When this function is called from panic(), the other cpus have already + * been stopped. + * + */ + +static int +kdb_panic(struct notifier_block *self, unsigned long command, void *ptr) +{ + kdb(KDB_REASON_PANIC, 0, NULL); + return(0); +} + +static struct notifier_block kdb_block = { kdb_panic, NULL, 0 }; + +/* + * kdb_init + * + * Initialize the kernel debugger environment. + * + * Parameters: + * None. + * Returns: + * None. + * Locking: + * None. + * Remarks: + * None. + */ + +void __init +kdb_init(void) +{ + /* + * This must be called before any calls to kdb_printf. + */ + kdb_io_init(); + + kdb_inittab(); /* Initialize Command Table */ + kdb_initbptab(); /* Initialize Breakpoint Table */ + kdb_id_init(); /* Initialize Disassembler */ + kdba_init(); /* Architecture Dependent Initialization */ + + /* + * Use printk() to get message in log_buf[]; + */ + printk("kdb version %d.%d%s by Scott Lurndal, Keith Owens. "\ + "Copyright SGI, All Rights Reserved\n", + KDB_MAJOR_VERSION, KDB_MINOR_VERSION, KDB_TEST_VERSION); + + kdb_cmd_init(); /* Preset commands from kdb_cmds */ + kdb(KDB_REASON_SILENT, 0, 0); /* Activate any preset breakpoints on boot cpu */ + notifier_chain_register(&panic_notifier_list, &kdb_block); +} + +EXPORT_SYMBOL(kdb_register); +EXPORT_SYMBOL(kdb_unregister); +EXPORT_SYMBOL(kdba_getword); +EXPORT_SYMBOL(kdba_putword); +EXPORT_SYMBOL(kdbgetularg); +EXPORT_SYMBOL(kdbgetenv); +EXPORT_SYMBOL(kdbgetintenv); +EXPORT_SYMBOL(kdbgetaddrarg); +EXPORT_SYMBOL(kdb); +EXPORT_SYMBOL(kdb_on); +EXPORT_SYMBOL(kdbgetsymval); +EXPORT_SYMBOL(kdbnearsym); +EXPORT_SYMBOL(kdb_printf); +EXPORT_SYMBOL(kdb_symbol_print); diff -urN linux-2.4.17-rc2-virgin/kdb/kdbsupport.c linux-2.4.17-rc2-wli2/kdb/kdbsupport.c --- linux-2.4.17-rc2-virgin/kdb/kdbsupport.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/kdbsupport.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,472 @@ +/* + * Kernel Debugger Architecture Independent Support Functions + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Symbol table functions. + */ + +/* + * kdbgetsymval + * + * Return the address of the given symbol. + * + * Parameters: + * symname Character string containing symbol name + * symtab Structure to receive results + * Outputs: + * Returns: + * 0 Symbol not found, symtab zero filled + * 1 Symbol mapped to module/symbol/section, data in symtab + * Locking: + * None. + * Remarks: + */ + +int +kdbgetsymval(const char *symname, kdb_symtab_t *symtab) +{ + memset(symtab, 0, sizeof(*symtab)); + return(kallsyms_symbol_to_address( + symname, + NULL, + &symtab->mod_name, + &symtab->mod_start, + &symtab->mod_end, + &symtab->sec_name, + &symtab->sec_start, + &symtab->sec_end, + &symtab->sym_name, + &symtab->sym_start, + &symtab->sym_end)); +} + +/* + * kdbnearsym + * + * Return the name of the symbol with the nearest address + * less than 'addr'. + * + * Parameters: + * addr Address to check for symbol near + * symtab Structure to receive results + * Outputs: + * Returns: + * 0 No sections contain this address, symtab zero filled + * 1 Address mapped to module/symbol/section, data in symtab + * Locking: + * None. + * Remarks: + */ + +int +kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) +{ + memset(symtab, 0, sizeof(*symtab)); + return(kallsyms_address_to_symbol( + addr, + &symtab->mod_name, + &symtab->mod_start, + &symtab->mod_end, + &symtab->sec_name, + &symtab->sec_start, + &symtab->sec_end, + &symtab->sym_name, + &symtab->sym_start, + &symtab->sym_end)); +} + +#if defined(CONFIG_SMP) +/* + * kdb_ipi + * + * This function is called from the non-maskable interrupt + * handler to handle a kdb IPI instruction. + * + * Inputs: + * ef = Exception frame pointer + * Outputs: + * None. + * Returns: + * 0 - Did not handle NMI + * 1 - Handled NMI + * Locking: + * None. + * Remarks: + * Initially one processor is invoked in the kdb() code. That + * processor sends an ipi which drives this routine on the other + * processors. All this does is call kdb() with reason SWITCH. + * This puts all processors into the kdb() routine and all the + * code for breakpoints etc. is in one place. + * One problem with the way the kdb NMI is sent, the NMI has no + * identification that says it came from kdb. If the cpu's kdb state is + * marked as "waiting for kdb_ipi" then the NMI is treated as coming from + * kdb, otherwise it is assumed to be for another reason and is ignored. + */ + +int +kdb_ipi(kdb_eframe_t ef, void (*ack_interrupt)(void)) +{ + /* Do not print before checking and clearing WAIT_IPI, IPIs are + * going all the time. + */ + if (KDB_STATE(WAIT_IPI)) { + /* + * Stopping other processors via smp_kdb_stop(). + */ + if (ack_interrupt) + (*ack_interrupt)(); /* Acknowledge the interrupt */ + KDB_STATE_CLEAR(WAIT_IPI); + KDB_DEBUG_STATE("kdb_ipi 1", 0); + kdb(KDB_REASON_SWITCH, 0, ef); /* Spin in kdb() */ + KDB_DEBUG_STATE("kdb_ipi 2", 0); + return 1; + } + return 0; +} +#endif /* CONFIG_SMP */ + +void +kdb_enablehwfault(void) +{ + kdba_enable_mce(); +} + +/* + * kdb_get_next_ar + * + * Get the next activation record from the stack. + * + * Inputs: + * arend Last byte +1 of the activation record. sp for the first + * frame, start of callee's activation record otherwise. + * func Start address of function. + * pc Current program counter within this function. pc for + * the first frame, caller's return address otherwise. + * fp Current frame pointer. Register fp for the first + * frame, oldfp otherwise. 0 if not known. + * ss Start of stack for the current process. + * Outputs: + * ar Activation record. + * symtab kallsyms symbol table data for the calling function. + * Returns: + * 1 if ar is usable, 0 if not. + * Locking: + * None. + * Remarks: + * Activation Record format, assuming a stack that grows down + * (KDB_STACK_DIRECTION == -1). + * + * +-----------------------------+ ^ ===================== + * | Return address, frame 3 | | + * +-----------------------------+ | + * | Frame Pointer, frame 3 |>--' + * +-----------------------------+<--. + * | Locals and automatics, | | + * | frame 2. (variable size) | | AR 2 + * +-----------------------------+ | + * | Save registers, | | + * | frame 2. (variable size) | | + * +-----------------------------+ | + * | Arguments to frame 1, | | + * | (variable size) | | + * +-----------------------------+ | ===================== + * | Return address, frame 2 | | + * +-----------------------------+ | + * | Frame Pointer, frame 2 |>--' + * +-----------------------------+<--. + * | Locals and automatics, | | + * | frame 1. (variable size) | | AR 1 + * +-----------------------------+ | + * | Save registers, | | + * | frame 1. (variable size) | | + * +-----------------------------+ | + * | Arguments to frame 0, | | + * | (variable size) | | + * +-----------------------------+ | -- (5) ===================== + * | Return address, frame 1 | | + * +-----------------------------+ | -- (0) + * | Frame Pointer, frame 1 |>--' + * +-----------------------------+ -- (1), (2) + * | Locals and automatics, | + * | frame 0. (variable size) | AR 0 + * +-----------------------------+ -- (3) + * | Save registers, | + * | frame 0. (variable size) | + * +-----------------------------+ -- (4) ===================== + * + * The stack for the top frame can be in one of several states. + * (0) Immediately on entry to the function, stack pointer (sp) is + * here. + * (1) If the function was compiled with frame pointers and the 'push + * fp' instruction has been executed then the pointer to the + * previous frame is on the stack. However there is no guarantee + * that this saved pointer is valid, the calling function might + * not have frame pointers. sp is adjusted by wordsize after + * 'push fp'. + * (2) If the function was compiled with frame pointers and the 'copy + * sp to fp' instruction has been executed then fp points here. + * (3) If the function startup has 'adjust sp by 0xnn bytes' and that + * instruction has been executed then sp has been adjusted by + * 0xnn bytes for local and automatic variables. + * (4) If the function startup has one or more 'push reg' instructions + * and any have been executed then sp has been adjusted by + * wordsize bytes for each register saved. + * + * As the function exits it rewinds the stack, typically to (1) then (0). + * + * The stack entries for the lower frames is normally are in state (5). + * (5) Arguments for the called frame are on to the stack. + * However lower frames can be incomplete if there is an interrupt in + * progress. + * + * An activation record runs from the return address for a function + * through to the return address for the next function or sp, whichever + * comes first. For each activation record we extract :- + * + * start Address of the activation record. + * end Address of the last byte+1 in the activation record. + * ret Return address to caller. + * oldfp Frame pointer to previous frame, 0 if this function was + * not compiled with frame pointers. + * fp Frame pointer for the current frame, 0 if this function + * was not compiled with frame pointers or fp has not been + * set yet. + * arg0 Address of the first argument (in the previous activation + * record). + * locals Bytes allocated to locals and automatics. + * regs Bytes allocated to saved registers. + * args Bytes allocated to arguments (in the previous activation + * record). + * setup Bytes allocated to setup data on stack (return address, + * frame pointer). + * + * Although the kernel might be compiled with frame pointers, we still + * have to assume the worst and validate the frame. Some calls from + * asm code to C code might not use frame pointers. Third party binary + * only modules might be compiled without frame pointers, even when the + * rest of the kernel has frame pointers. Some routines are always + * compiled with frame pointers, even if the overall kernel is not. A + * routine compiled with frame pointers can be called from a routine + * without frame pointers, the previous "frame pointer" is saved on + * stack but it contains garbage. + * + * We check the object code to see if it saved a frame pointer and we + * validate that pointer. Basically frame pointers are hints. + */ + +#define FORCE_ARG(ar,n) (ar)->setup = (ar)->locals = (ar)->regs = \ + (ar)->fp = (ar)->oldfp = (ar)->ret = 0; \ + (ar)->start = (ar)->end - KDB_STACK_DIRECTION*(n)*sizeof(unsigned long); + +int +kdb_get_next_ar(kdb_machreg_t arend, kdb_machreg_t func, + kdb_machreg_t pc, kdb_machreg_t fp, kdb_machreg_t ss, + kdb_ar_t *ar, kdb_symtab_t *symtab) +{ + if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: arend=0x%lx func=0x%lx pc=0x%lx fp=0x%lx\n", + arend, func, pc, fp); + } + + memset(ar, 0, sizeof(*ar)); + if (!kdbnearsym(pc, symtab)) { + symtab->sym_name = symtab->sec_name = ""; + symtab->mod_name = "kernel"; + if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: callee not in kernel\n"); + } + pc = 0; + } + + if (!kdba_prologue(symtab, pc, arend, fp, ss, 0, ar)) { + if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: callee prologue failed\n"); + } + return(0); + } + if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: callee activation record\n"); + kdb_printf(" start=0x%lx end=0x%lx ret=0x%lx oldfp=0x%lx fp=0x%lx\n", + ar->start, ar->end, ar->ret, ar->oldfp, ar->fp); + kdb_printf(" locals=%d regs=%d setup=%d\n", + ar->locals, ar->regs, ar->setup); + } + + if (ar->ret) { + /* Run the caller code to get arguments to callee function */ + kdb_symtab_t caller_symtab; + kdb_ar_t caller_ar; + memset(&caller_ar, 0, sizeof(caller_ar)); + if (!kdbnearsym(ar->ret, &caller_symtab)) { + if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: caller not in kernel\n"); + } + } else if (kdba_prologue(&caller_symtab, ar->ret, + ar->start, ar->oldfp, ss, 1, &caller_ar)) { + /* some caller data extracted */ ; + } else if (strcmp(symtab->sym_name, "do_exit") == 0) { + /* non-standard caller, force one argument */ + FORCE_ARG(&caller_ar, 1); + } else if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: caller prologue failed\n"); + } + if (KDB_DEBUG(AR)) { + kdb_printf("kdb_get_next_ar: caller activation record\n"); + kdb_printf(" start=0x%lx end=0x%lx ret=0x%lx" + " oldfp=0x%lx fp=0x%lx\n", + caller_ar.start, caller_ar.end, caller_ar.ret, + caller_ar.oldfp, caller_ar.fp); + kdb_printf(" locals=%d regs=%d args=%d setup=%d\n", + caller_ar.locals, caller_ar.regs, + caller_ar.args, caller_ar.setup); + } + if (caller_ar.start) { + ar->args = KDB_STACK_DIRECTION*(caller_ar.end - caller_ar.start) - + (caller_ar.setup + caller_ar.locals + caller_ar.regs); + if (ar->args < 0) + ar->args = 0; + if (ar->args) { + ar->arg0 = ar->start - + KDB_STACK_DIRECTION*(ar->args - 4); + if (KDB_DEBUG(AR)) { + kdb_printf(" callee arg0=0x%lx args=%d\n", + ar->arg0, ar->args); + } + } + } + } + + return(1); +} + +/* + * kdb_symbol_print + * + * Standard method for printing a symbol name and offset. + * Inputs: + * addr Address to be printed. + * symtab Address of symbol data, if NULL this routine does its + * own lookup. + * punc Punctuation for string, bit field. + * Outputs: + * None. + * Returns: + * Always 0. + * Locking: + * none. + * Remarks: + * The string and its punctuation is only printed if the address + * is inside the kernel, except that the value is always printed + * when requested. + */ + +void +kdb_symbol_print(kdb_machreg_t addr, const kdb_symtab_t *symtab_p, unsigned int punc) +{ + kdb_symtab_t symtab, *symtab_p2; + if (symtab_p) { + symtab_p2 = (kdb_symtab_t *)symtab_p; + } + else { + symtab_p2 = &symtab; + kdbnearsym(addr, symtab_p2); + } + if (symtab_p2->sym_name || (punc & KDB_SP_VALUE)) { + ; /* drop through */ + } + else { + return; + } + if (punc & KDB_SP_SPACEB) { + kdb_printf(" "); + } + if (punc & KDB_SP_VALUE) { + kdb_printf(kdb_machreg_fmt0, addr); + } + if (!symtab_p2->sym_name) { + return; + } + if (punc & KDB_SP_VALUE) { + kdb_printf(" "); + } + if (punc & KDB_SP_PAREN) { + kdb_printf("("); + } + if (strcmp(symtab_p2->mod_name, "kernel")) { + kdb_printf("[%s]", symtab_p2->mod_name); + } + kdb_printf("%s", symtab_p2->sym_name); + if (addr != symtab_p2->sym_start) { + kdb_printf("+0x%lx", addr - symtab_p2->sym_start); + } + if (punc & KDB_SP_SYMSIZE) { + kdb_printf("/0x%lx", symtab_p2->sym_end - symtab_p2->sym_start); + } + if (punc & KDB_SP_PAREN) { + kdb_printf(")"); + } + if (punc & KDB_SP_SPACEA) { + kdb_printf(" "); + } + if (punc & KDB_SP_NEWLINE) { + kdb_printf("\n"); + } +} + +/* + * kdb_strdup + * + * kdb equivalent of strdup, for disasm code. + * Inputs: + * str The string to duplicate. + * type Flags to kmalloc for the new string. + * Outputs: + * None. + * Returns: + * Address of the new string, NULL if storage could not be allocated. + * Locking: + * none. + * Remarks: + * This is not in lib/string.c because it uses kmalloc which is not + * available when string.o is used in boot loaders. + */ + +char *kdb_strdup(const char *str, int type) +{ + int n = strlen(str)+1; + char *s = kmalloc(n, type); + if (!s) return NULL; + return strcpy(s, str); +} diff -urN linux-2.4.17-rc2-virgin/kdb/modules/Makefile linux-2.4.17-rc2-wli2/kdb/modules/Makefile --- linux-2.4.17-rc2-virgin/kdb/modules/Makefile Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/modules/Makefile Tue Dec 18 22:21:49 2001 @@ -0,0 +1,15 @@ +# +# Makefile for i386-specific kdb files.. +# +# Copyright 1999, Silicon Graphics Inc. +# +# Written April 1999 by Scott Lurndal at Silicon Graphics, Inc. +# + +O_TARGET := vmlinux-obj.o +obj-$(CONFIG_KDB_MODULES) += kdbm_vm.o kdbm_pg.o +CFLAGS_kdbm_vm.o += -I $(TOPDIR)/drivers/scsi + +EXTRA_CFLAGS += -I $(TOPDIR)/arch/$(ARCH)/kdb + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17-rc2-virgin/kdb/modules/kdbm_pg.c linux-2.4.17-rc2-wli2/kdb/modules/kdbm_pg.c --- linux-2.4.17-rc2-virgin/kdb/modules/kdbm_pg.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/modules/kdbm_pg.c Tue Dec 18 22:56:43 2001 @@ -0,0 +1,885 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("SGI"); +MODULE_DESCRIPTION("Debug page information"); +MODULE_LICENSE("GPL"); + +#undef KDB_DO_PAGEBUF +#if defined(CONFIG_PAGE_BUF) || defined(CONFIG_PAGE_BUF_MODULE) +#define KDB_DO_PAGEBUF +#define _PAGE_BUF_INTERNAL_ 1 +#include +#endif /* CONFIG_PAGE_BUF || CONFIG_PAGE_BUF_MODULE */ + +/* Standard Linux page stuff */ + +static char *pg_flag_vals[] = { + "PG_locked", "PG_error", "PG_referenced", "PG_uptodate", + "PG_dirty", "PG_unused_5", "PG_lru", "PG_active", + "PG_slab", "PG_unused_9", "PG_skip", "PG_highmem", + "PG_checked", "PG_arch_1", "PG_reserved", "PG_launder", + NULL }; + +static char *bh_state_vals[] = { + "Uptodate", "Dirty", "Lock", "Req", + "Mapped", "New", "Async", "Wait_IO", + "Launder", "JBD", + /*XFS*/ "Delay", + NULL }; + +static char *inode_flag_vals[] = { + "I_DIRTY_SYNC", "I_DIRTY_DATASYNC", "I_DIRTY_PAGES", "I_LOCK", + "I_FREEING", "I_CLEAR", + /*XFS*/ "I_NEW", + NULL }; + +static char *map_flags(unsigned long flags, char *mapping[]) +{ + static char buffer[256]; + int index; + int offset = 12; + + buffer[0] = '\0'; + + for (index = 0; flags && mapping[index]; flags >>= 1, index++) { + if (flags & 1) { + if ((offset + strlen(mapping[index]) + 1) >= 80) { + strcat(buffer, "\n "); + offset = 12; + } else if (offset > 12) { + strcat(buffer, " "); + offset++; + } + strcat(buffer, mapping[index]); + offset += strlen(mapping[index]); + } + } + + return (buffer); +} + +static char *page_flags(unsigned long flags) +{ + return(map_flags(flags, pg_flag_vals)); +} + +static int +kdbm_buffers(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + struct buffer_head bh; + unsigned char *p = (unsigned char *)&bh; + unsigned long addr; + long offset=0; + int nextarg; + int diag; + int i; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i = 0; i < sizeof(struct buffer_head); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + kdb_printf("buffer_head at 0x%lx\n", addr); + kdb_printf(" next 0x%p bno %ld rsec %ld size %d dev 0x%x rdev 0x%x\n", + bh.b_next, bh.b_blocknr, bh.b_rsector, + bh.b_size, bh.b_dev, bh.b_rdev); + kdb_printf(" count %d state 0x%lx [%s] ftime 0x%lx b_list %d b_reqnext 0x%p b_data 0x%p\n", + bh.b_count.counter, bh.b_state, map_flags(bh.b_state, bh_state_vals), + bh.b_flushtime, bh.b_list, bh.b_reqnext, bh.b_data); + kdb_printf(" b_page 0x%p b_this_page 0x%p b_private 0x%p\n", + bh.b_page, bh.b_this_page, bh.b_private); + + return 0; +} + +static int +kdbm_page(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + struct page page; + unsigned char *p = (unsigned char *)&page; + unsigned long addr; + long offset=0; + int nextarg; + int diag; + int i; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + if (addr < PAGE_OFFSET) { + printk("Treating 0x%lx as page index, page at 0x%p\n", + addr, &mem_map[addr]); + addr = (unsigned long) &mem_map[addr]; + } + + for (i = 0; i < sizeof(struct page); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + kdb_printf("struct page at 0x%lx\n", addr); + kdb_printf(" next 0x%p prev 0x%p addr space 0x%p index %lu (offset 0x%x)\n", + page.list.next, page.list.prev, page.mapping, page.index, + (int)(page.index << PAGE_CACHE_SHIFT)); + kdb_printf(" count %d flags %s virtual 0x%p\n", + page.count.counter, page_flags(page.flags), + page_address(&page)); + kdb_printf(" buffers 0x%p\n", page.buffers); + + return 0; +} + +unsigned long +print_request(unsigned long addr) +{ + struct request rq; + unsigned char *p = (unsigned char *)&rq; + int i; + + for (i = 0; i < sizeof(struct request); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + kdb_printf("struct request at 0x%lx\n", addr); + kdb_printf(" rq_dev 0x%x cmd %d errors %d sector %ld nr_sectors %ld\n", + rq.rq_dev, rq.cmd, rq.errors, rq.sector, + rq.nr_sectors); + + kdb_printf(" hsect %ld hnrsect %ld nrseg %d nrhwseg %d currnrsect %ld seq %d\n", + rq.hard_sector, rq.hard_nr_sectors, + rq.nr_segments, rq.nr_hw_segments, + rq.current_nr_sectors, rq.elevator_sequence); + kdb_printf(" "); +#ifdef STRUCT_REQUEST_HAS_KIOBUF + kdb_printf("kiobuf 0x%p ", rq.kiobuf); +#endif /* STRUCT_REQUEST_HAS_KIOBUF */ + kdb_printf("bh 0x%p bhtail 0x%p req_q 0x%p\n\n", + rq.bh, rq.bhtail, rq.q); + + return (unsigned long) rq.queue.next; +} + +#ifdef KDB_DO_PAGEBUF +static int +kdbm_request(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + long offset=0; + unsigned long addr; + int nextarg; + int diag; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + print_request(addr); + return 0; +} + + +static int +kdbm_rqueue(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + struct request_queue rq; + unsigned char *p = (unsigned char *)&rq; + unsigned long addr, head_addr, next; + long offset=0; + int nextarg; + int diag; + int i; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i = 0; i < sizeof(struct request_queue); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + kdb_printf("struct request_queue at 0x%lx [%s]\n", addr, + rq.plugged ? "plugged" : "running"); + kdb_printf(" read free_list [0x%p, 0x%p]\n", + rq.rq[READ].free.prev, + rq.rq[READ].free.next); + kdb_printf(" write free_list [0x%p, 0x%p]\n", + rq.rq[WRITE].free.prev, + rq.rq[WRITE].free.next); + + i = 0; + next = (unsigned long)rq.queue_head.next; + head_addr = addr + offsetof(struct request_queue, queue_head); + kdb_printf(" request queue: %s\n", next == head_addr ? + "empty" : ""); + while (next != head_addr) { + i++; + next = print_request(next); + } + + if (i) + kdb_printf("%d requests found\n", i); + + return 0; +} +#endif /* KDB_DO_PAGEBUF */ + + +static void +do_buffer(unsigned long addr) +{ + struct buffer_head bh; + unsigned char *p = (unsigned char *)&bh; + int i; + + for (i = 0; i < sizeof(struct buffer_head); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + kdb_printf("bh 0x%lx bno %8ld [%s]\n", addr, bh.b_blocknr, + map_flags(bh.b_state, bh_state_vals)); +} + +static int +kdbm_inode_pages(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + struct inode inode; + struct address_space ap; + unsigned char *p = (unsigned char *)&inode; + unsigned long addr, addr1 = 0; + long offset=0; + int nextarg; + int diag; + int i, j; + int which=0; + + struct list_head *head, *curr; + + if (argc < 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + if (argc == 2) { + nextarg = 2; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr1, + &offset, NULL, regs); + if (diag) + return diag; + kdb_printf("Looking for page index 0x%lx ... \n", addr1); + } + + for (i = 0; i < sizeof(struct inode); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + addr = (unsigned long) inode.i_mapping; + p = (unsigned char *)≈ + + for (i = 0; i < sizeof(struct address_space); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + if (!&inode.i_mapping) goto out; + again: + if (which == 0){ + which=1; + head = &inode.i_mapping->clean_pages; + kdb_printf("CLEAN page_struct index cnt flags\n"); + } else if (which == 1) { + which=2; + head = &inode.i_mapping->dirty_pages; + kdb_printf("DIRTY page_struct index cnt flags\n"); + } else if (which == 2) { + which=3; + head = &inode.i_mapping->locked_pages; + kdb_printf("LOCKED page_struct index cnt flags\n"); + } else { + goto out; + } + + if(!head) goto again; + curr = head->next; + while (curr != head) { + struct page page; + struct list_head curr_struct; + + addr = (unsigned long) list_entry(curr, struct page, list); + p = (unsigned char *)&page; + + for (j = 0; j < sizeof(struct page); j++) + *p++ = kdba_getword(addr+j, 1); + + if (!addr1 || page.index == addr1 || + (addr1 == -1 && (page.flags & ( 1 << PG_locked)))) + { + kdb_printf(" 0x%lx %6lu %5d 0x%lx ", + addr, page.index, page.count.counter, + page.flags); + if (page.buffers) + do_buffer((unsigned long) page.buffers); + } + + addr = (unsigned long) curr; + p = (unsigned char *)&curr_struct; + for (j = 0; j < sizeof(struct list_head); j++) + *p++ = kdba_getword(addr+j, 1); + + curr = curr_struct.next; + } + goto again; + out: + return 0; +} + +static int +kdbm_inode(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + struct inode inode; + unsigned char *p = (unsigned char *)&inode; + unsigned long addr; + unsigned char *iaddr; + long offset=0; + int nextarg; + int diag; + int i; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i = 0; i < sizeof(struct inode); i++) { + *p++ = kdba_getword(addr+i, 1); + } + + kdb_printf("struct inode at 0x%lx\n", addr); + + kdb_printf(" i_ino = %lu i_count = %u i_dev = 0x%x i_size %Ld\n", + inode.i_ino, atomic_read(&inode.i_count), + inode.i_dev, inode.i_size); + + kdb_printf(" i_mode = 0x%x i_nlink = %d i_rdev = 0x%x\n", + inode.i_mode, inode.i_nlink, + inode.i_rdev); + + kdb_printf(" i_hash.nxt = 0x%p i_hash.prv = 0x%p\n", + inode.i_hash.next, inode.i_hash.prev); + + kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n", + inode.i_list.next, inode.i_list.prev); + + kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n", + inode.i_dentry.next, + inode.i_dentry.prev); + + kdb_printf(" i_dirty_buffers.nxt = 0x%p i_dirty_buffers.prv = 0x%p\n", + inode.i_dirty_buffers.next, + inode.i_dirty_buffers.prev); + + kdb_printf(" i_sb = 0x%p i_op = 0x%p i_data = 0x%lx nrpages = %lu\n", + inode.i_sb, inode.i_op, + addr + offsetof(struct inode, i_data), + inode.i_data.nrpages); + kdb_printf(" i_mapping = 0x%p\n i_flags 0x%x i_state 0x%lx [%s]", + inode.i_mapping, inode.i_flags, + inode.i_state, + map_flags(inode.i_state, inode_flag_vals)); + + iaddr = (char *)addr; + iaddr += offsetof(struct inode, u); + + kdb_printf(" fs specific info @ 0x%p\n", iaddr); + + return (0); +} + +static int +kdbm_kiobuf(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + struct kiobuf kiobuf; + struct page page; + unsigned char *p = (unsigned char *)&kiobuf; + struct page *page_array[64]; + unsigned long addr; + long offset=0; + int nextarg; + int diag; + int i, j; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i=0; imap_array)) { + p = (char *)page_array; + for (i=0; i < (kiobuf.nr_pages * sizeof(struct page *)); i++) { + *p++ = kdba_getword((kdb_machreg_t)kiobuf.maplist + i, 1); + } + kiobuf.maplist = page_array; + } + kdb_printf(" errno %d", kiobuf.errno); +#ifdef KDB_DO_PAGEBUF +#ifdef KIOBUF_IO + kdb_printf(" pb 0x%p", (page_buf_t *)kiobuf.k_dev_id); +#endif +#endif /* KDB_DO_PAGEBUF */ + kdb_printf("\n"); + kdb_printf(" page_struct page_addr cnt flags\n"); + for (i = 0; i < kiobuf.nr_pages; i++) { + addr = (unsigned long) kiobuf.maplist[i]; + p = (unsigned char *)&page; + + for (j = 0; j < sizeof(struct page); j++) { + *p++ = kdba_getword(addr+j, 1); + } + kdb_printf(" 0x%p 0x%p %d 0x%lx\n", + kiobuf.maplist[i], page_address(&page), + page.count.counter, page.flags); + } + + return (0); +} + +#ifdef KDB_DO_PAGEBUF + +/* pagebuf stuff */ + +static char *pb_flag_vals[] = { + "READ", "WRITE", "MAPPED", "PARTIAL", + "ASYNC", "NONE", "DELWRI", "FREED", "SYNC", + "MAPPABLE", "FS_RESERVED_1", "FS_RESERVED_2", "RELEASE", + "LOCK", "TRYLOCK", "ALLOCATE", "FILE_ALLOCATE", "DONT_BLOCK", + "DIRECT", "LOCKABLE", "NEXT_KEY", "ENTER_PAGES", + "ALL_PAGES_MAPPED", "SOME_INVALID_PAGES", "ADDR_ALLOCATED", + "MEM_ALLOCATED", "GRIO", "FORCEIO", "SHUTDOWN", + NULL }; + +static char *pbm_flag_vals[] = { + "EOF", "HOLE", "DELAY", "FLUSH_OVERLAPS", + "READAHEAD", "UNWRITTEN", "DONTALLOC", "NEW", + NULL }; + +static char *pb_flags(page_buf_flags_t pb_flag) +{ + return(map_flags((unsigned long) pb_flag, pb_flag_vals)); +} + +static int +kdbm_pb_flags(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + unsigned long flags; + int diag; + + if (argc != 1) + return KDB_ARGCOUNT; + + diag = kdbgetularg(argv[1], &flags); + if (diag) + return diag; + + kdb_printf("pb flags 0x%lx = %s\n", flags, pb_flags(flags)); + + return 0; +} + +static int +kdbm_pb(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + page_buf_private_t bp; + unsigned char *p = (unsigned char *)&bp; + unsigned long addr; + long offset=0; + int nextarg; + int diag; + int i; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i=0; i + +#define EV_SIZE (sizeof(event_names)/sizeof(char *)) + +void +pb_trace_core( + unsigned long match, + char *event_match, + unsigned long long offset, + long long mask) +{ + extern struct pagebuf_trace_buf pb_trace; + int i, total, end; + pagebuf_trace_t *trace; + char *event; + char value[10]; + + end = pb_trace.start - 1; + if (end < 0) + end = PB_TRACE_BUFSIZE - 1; + + if (match && (match < PB_TRACE_BUFSIZE)) { + for (i = pb_trace.start, total = 0; i != end; i = CIRC_INC(i)) { + trace = &pb_trace.buf[i]; + if (trace->pb == 0) + continue; + total++; + } + total = total - match; + for (i = pb_trace.start; i != end && total; i = CIRC_INC(i)) { + trace = &pb_trace.buf[i]; + if (trace->pb == 0) + continue; + total--; + } + match = 0; + } else + i = pb_trace.start; + for ( ; i != end; i = CIRC_INC(i)) { + trace = &pb_trace.buf[i]; + + if (offset) { + if ((trace->offset & ~mask) != offset) + continue; + } + + if (trace->pb == 0) + continue; + + if ((match != 0) && (trace->pb != match)) + continue; + + if ((trace->event < EV_SIZE) && event_names[trace->event]) { + event = event_names[trace->event]; + } else if (trace->event == 1000) { + event = (char *)trace->misc; + } else { + event = value; + sprintf(value, "%8d", trace->event); + } + + if (event_match && strcmp(event, event_match)) { + continue; + } + + + kdb_printf("pb 0x%lx [%s] (hold %u lock %d) misc 0x%p", + trace->pb, event, + trace->hold, trace->lock_value, + trace->misc); + kdb_symbol_print((unsigned int)trace->ra, NULL, + KDB_SP_SPACEB|KDB_SP_PAREN|KDB_SP_NEWLINE); + kdb_printf(" offset 0x%Lx size 0x%x task 0x%p\n", + trace->offset, trace->size, trace->task); + kdb_printf(" flags: %s\n", + pb_flags(trace->flags)); + } +} + + +static int +kdbm_pbtrace_offset(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + long mask = 0; + unsigned long offset = 0; + int diag; + + if (argc > 2) + return KDB_ARGCOUNT; + + if (argc > 0) { + diag = kdbgetularg(argv[1], &offset); + if (diag) + return diag; + } + + if (argc > 1) { + diag = kdbgetularg(argv[1], &mask); + if (diag) + return diag; + } + + pb_trace_core(0, NULL, (unsigned long long)offset, + (long long)mask); /* sign extent mask */ + return 0; +} + +static int +kdbm_pbtrace(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + unsigned long addr = 0; + int diag, nextarg; + long offset = 0; + char *event_match = NULL; + + if (argc > 1) + return KDB_ARGCOUNT; + + if (argc == 1) { + if (isupper(argv[1][0]) || islower(argv[1][0])) { + event_match = (char *)argv[1]; + printk("event match on \"%s\"\n", event_match); + argc = 0; + } else { + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) { + printk("failed to parse %s as a number\n", + argv[1]); + return diag; + } + } + } + + pb_trace_core(addr, event_match, 0LL, 0LL); + return 0; +} + +#else /* PAGEBUF_TRACE */ +static int +kdbm_pbtrace(int argc, const char **argv, const char **envp, + struct pt_regs *regs) +{ + kdb_printf("pagebuf tracing not compiled in\n"); + + return 0; +} +#endif /* PAGEBUF_TRACE */ +#endif /* KDB_DO_PAGEBUF */ + +static int __init kdbm_pg_init(void) +{ + kdb_register("kiobuf", kdbm_kiobuf, "", "Display kiobuf", 0); + kdb_register("page", kdbm_page, "", "Display page", 0); + kdb_register("inode", kdbm_inode, "", "Display inode", 0); + kdb_register("bh", kdbm_buffers, "", "Display buffer", 0); + kdb_register("inode_pages", kdbm_inode_pages, "", "Display pages in an inode", 0); +#ifdef KDB_DO_PAGEBUF + kdb_register("pb", kdbm_pb, "", "Display page_buf_t", 0); + kdb_register("pbflags", kdbm_pb_flags, "", "Display page buf flags", 0); + kdb_register("pbiodesc", kdbm_pbiodesc, "", "Display I/O Descriptor", 0); + kdb_register("pbmap", kdbm_pbmap, "", "Display Bmap", 0); + kdb_register("pbtrace", kdbm_pbtrace, "|", "page_buf_t trace", 0); +#ifdef PAGEBUF_TRACE + kdb_register("pboffset", kdbm_pbtrace_offset, " []", "page_buf_t trace", 0); +#endif + kdb_register("req", kdbm_request, "", "dump request struct", 0); + kdb_register("rqueue", kdbm_rqueue, "", "dump request queue", 0); + +#endif /* KDB_DO_PAGEBUF */ + + return 0; +} + + +static void __exit kdbm_pg_exit(void) +{ + kdb_unregister("kiobuf"); + kdb_unregister("page"); + kdb_unregister("inode"); + kdb_unregister("bh"); + kdb_unregister("inode_pages"); + kdb_unregister("pb"); + kdb_unregister("pbflags"); + kdb_unregister("pbmap"); + kdb_unregister("pbiodesc"); + kdb_unregister("pbtrace"); +#ifdef PAGEBUF_TRACE + kdb_unregister("pboffset"); +#endif + kdb_unregister("req"); + kdb_unregister("rqueue"); +} + +module_init(kdbm_pg_init) +module_exit(kdbm_pg_exit) diff -urN linux-2.4.17-rc2-virgin/kdb/modules/kdbm_vm.c linux-2.4.17-rc2-wli2/kdb/modules/kdbm_vm.c --- linux-2.4.17-rc2-virgin/kdb/modules/kdbm_vm.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kdb/modules/kdbm_vm.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,422 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("SGI"); +MODULE_DESCRIPTION("Debug VM information"); +MODULE_LICENSE("GPL"); + +struct __vmflags { + unsigned long mask; + char *name; +} vmflags[] = { + { VM_READ, "READ" }, + { VM_WRITE, "WRITE" }, + { VM_EXEC, "EXEC" }, + { VM_SHARED, "SHARED" }, + { VM_MAYREAD, "MAYREAD" }, + { VM_MAYWRITE, "MAYWRITE" }, + { VM_MAYEXEC, "MAYEXEC" }, + { VM_MAYSHARE, "MAYSHARE" }, + { VM_GROWSDOWN, "GROWSDOWN" }, + { VM_GROWSUP, "GROWSUP" }, + { VM_SHM, "SHM" }, + { VM_DENYWRITE, "DENYWRITE" }, + { VM_EXECUTABLE, "EXECUTABLE" }, + { VM_LOCKED, "LOCKED" }, + { VM_IO , "IO " }, + { 0, "" } +}; + +static int +kdbm_vm(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct vm_area_struct vp; + unsigned char *bp = (unsigned char *)&vp; + unsigned long addr; + long offset=0; + int nextarg; + int diag; + struct __vmflags *tp; + int i; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i=0; imask; tp++) { + if (vp.vm_flags & tp->mask) { + kdb_printf("%s ", tp->name); + } + } + kdb_printf("\n"); + + return 0; +} + +static int +kdbm_fp(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct file f; + unsigned char *fp = (unsigned char *)&f; + struct inode i; + unsigned char *ip = (unsigned char *)&i; + struct dentry d; + unsigned char *dp = (unsigned char *)&d; + int nextarg; + unsigned long addr; + unsigned long filpaddr; + long offset; + int diag; + int j; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + filpaddr = addr; + for (j=0; j\n", + d.d_name.len, d.d_name.name); + + kdb_printf(" d_count = %d d_flags = 0x%x d_inode = 0x%p\n", + atomic_read(&d.d_count), d.d_flags, d.d_inode); + + kdb_printf(" d_hash.nxt = 0x%p d_hash.prv = 0x%p\n", + d.d_hash.next, d.d_hash.prev); + + kdb_printf(" d_lru.nxt = 0x%p d_lru.prv = 0x%p\n", + d.d_lru.next, d.d_lru.prev); + + kdb_printf(" d_child.nxt = 0x%p d_child.prv = 0x%p\n", + d.d_child.next, d.d_child.prev); + + kdb_printf(" d_subdirs.nxt = 0x%p d_subdirs.prv = 0x%p\n", + d.d_subdirs.next, d.d_subdirs.prev); + + kdb_printf(" d_alias.nxt = 0x%p d_alias.prv = 0x%p\n", + d.d_alias.next, d.d_alias.prev); + + kdb_printf(" d_op = 0x%p d_sb = 0x%p\n\n", + d.d_op, d.d_sb); + + + kdb_printf("\nInode Entry at 0x%p\n", d.d_inode); + + kdb_printf(" i_mode = 0x%x i_nlink = %d i_rdev = 0x%x\n", + i.i_mode, i.i_nlink, i.i_rdev); + + kdb_printf(" i_ino = %ld i_count = %d i_dev = 0x%x\n", + i.i_ino, atomic_read(&i.i_count), i.i_dev); + + kdb_printf(" i_hash.nxt = 0x%p i_hash.prv = 0x%p\n", + i.i_hash.next, i.i_hash.prev); + + kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n", + i.i_list.next, i.i_list.prev); + + kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n", + i.i_dentry.next, i.i_dentry.prev); + + return 0; +} + +static int +kdbm_dentry(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct dentry d; + unsigned char *dp = (unsigned char *)&d; + int nextarg; + unsigned long addr; + unsigned long dentryaddr; + long offset; + int diag; + int j; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + dentryaddr = addr; + + for (j=0; j\n", + d.d_name.len, d.d_name.name); + + kdb_printf(" d_count = %d d_flags = 0x%x d_inode = 0x%p\n", + atomic_read(&d.d_count), d.d_flags, d.d_inode); + + kdb_printf(" d_hash.nxt = 0x%p d_hash.prv = 0x%p\n", + d.d_hash.next, d.d_hash.prev); + + kdb_printf(" d_lru.nxt = 0x%p d_lru.prv = 0x%p\n", + d.d_lru.next, d.d_lru.prev); + + kdb_printf(" d_child.nxt = 0x%p d_child.prv = 0x%p\n", + d.d_child.next, d.d_child.prev); + + kdb_printf(" d_subdirs.nxt = 0x%p d_subdirs.prv = 0x%p\n", + d.d_subdirs.next, d.d_subdirs.prev); + + kdb_printf(" d_alias.nxt = 0x%p d_alias.prv = 0x%p\n", + d.d_alias.next, d.d_alias.prev); + + kdb_printf(" d_op = 0x%p d_sb = 0x%p\n\n", + d.d_op, d.d_sb); + + return 0; +} + +static int +kdbm_sh(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int i; + int diag; + int nextarg; + unsigned long addr; + long offset =0L; + struct Scsi_Host sh; + unsigned char *shp = (unsigned char *)&sh; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i=0; i < sizeof(struct Scsi_Host); i++) + *shp++ = kdba_getword(addr+i, 1); + + kdb_printf("Scsi_Host at 0x%lx\n", addr); + kdb_printf("next = 0x%p host_queue = 0x%p\n", + sh.next, sh.host_queue); + kdb_printf("ehandler = 0x%p eh_wait = 0x%p en_notify = 0x%p eh_action = 0x%p\n", + sh.ehandler, sh.eh_wait, sh.eh_notify, sh.eh_action); + kdb_printf("eh_active = 0x%d host_wait = 0x%p hostt = 0x%p host_busy = %d\n", + sh.eh_active, &sh.host_wait, sh.hostt, sh.host_active.counter); + kdb_printf("host_failed = %d extra_bytes = %d host_no = %d resetting = %d\n", + sh.host_failed, sh.extra_bytes, sh.host_no, sh.resetting); + kdb_printf("max id/lun/channel = [%d/%d/%d] this_id = %d\n", + sh.max_id, sh.max_lun, sh.max_channel, sh.this_id); + kdb_printf("can_queue = %d cmd_per_lun = %d sg_tablesize = %d u_isa_dma = %d\n", + sh.can_queue, sh.cmd_per_lun, sh.sg_tablesize, sh.unchecked_isa_dma); + kdb_printf("host_blocked = %d reverse_ordering = %d \n", + sh.host_blocked, sh.reverse_ordering); + + return 0; +} + +static int +kdbm_sd(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int i; + int diag; + int nextarg; + unsigned long addr; + long offset =0L; + struct scsi_device sd; + unsigned char *sdp = (unsigned char *)&sd; + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i=0; i < sizeof(struct scsi_device); i++) + *sdp++ = kdba_getword(addr+i, 1); + + kdb_printf("scsi_device at 0x%lx\n", addr); + kdb_printf("next = 0x%p prev = 0x%p host = 0x%p\n", + sd.next, sd.prev, sd.host); + kdb_printf("device_busy = %d device_queue 0x%p\n", + sd.device_busy, sd.device_queue); + kdb_printf("id/lun/chan = [%d/%d/%d] single_lun = %d device_blocked = %d\n", + sd.id, sd.lun, sd.channel, sd.single_lun, sd.device_blocked); + kdb_printf("queue_depth = %d current_tag = %d scsi_level = %d\n", + sd.queue_depth, sd.current_tag, sd.scsi_level); + kdb_printf("%8.8s %16.16s %4.4s\n", sd.vendor, sd.model, sd.rev); + + return 0; +} + +static char * +str_rq_status(int rq_status) +{ + switch (rq_status) { + case RQ_INACTIVE: + return "RQ_INACTIVE"; + case RQ_ACTIVE: + return "RQ_ACTIVE"; + case RQ_SCSI_BUSY: + return "RQ_SCSI_BUSY"; + case RQ_SCSI_DONE: + return "RQ_SCSI_DONE"; + case RQ_SCSI_DISCONNECTING: + return "RQ_SCSI_DISCONNECTING"; + default: + return "UNKNOWN"; + } +} + +static int +kdbm_sc(int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int i; + int diag; + int nextarg; + unsigned long addr; + long offset =0L; + struct scsi_cmnd sc; + unsigned char *scp = (unsigned char *)≻ + + if (argc != 1) + return KDB_ARGCOUNT; + + nextarg = 1; + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + if (diag) + return diag; + + for (i=0; i < sizeof(struct scsi_cmnd); i++) + *scp++ = kdba_getword(addr+i, 1); + + kdb_printf("scsi_cmnd at 0x%lx\n", addr); + kdb_printf("host = 0x%p state = %d owner = %d device = 0x%p\nb", + sc.host, sc.state, sc.owner, sc.device); + kdb_printf("next = 0x%p reset_chain = 0x%p eh_state = %d done = 0x%p\n", + sc.next, sc.reset_chain, sc.eh_state, sc.done); + kdb_printf("serial_number = %ld serial_num_at_to = %ld retries = %d timeout = %d\n", + sc.serial_number, sc.serial_number_at_timeout, sc.retries, sc.timeout); + kdb_printf("id/lun/cmnd = [%d/%d/%d] cmd_len = %d old_cmd_len = %d\n", + sc.target, sc.lun, sc.channel, sc.cmd_len, sc.old_cmd_len); + kdb_printf("cmnd = [%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x]\n", + sc.cmnd[0], sc.cmnd[1], sc.cmnd[2], sc.cmnd[3], sc.cmnd[4], + sc.cmnd[5], sc.cmnd[6], sc.cmnd[7], sc.cmnd[8], sc.cmnd[9], + sc.cmnd[10], sc.cmnd[11]); + kdb_printf("data_cmnd = [%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x]\n", + sc.data_cmnd[0], sc.data_cmnd[1], sc.data_cmnd[2], sc.data_cmnd[3], sc.data_cmnd[4], + sc.data_cmnd[5], sc.data_cmnd[6], sc.data_cmnd[7], sc.data_cmnd[8], sc.data_cmnd[9], + sc.data_cmnd[10], sc.data_cmnd[11]); + kdb_printf("request_buffer = 0x%p bh_next = 0x%p request_bufflen = %d\n", + sc.request_buffer, sc.bh_next, sc.request_bufflen); + kdb_printf("use_sg = %d old_use_sg = %d sglist_len = %d abore_reason = %d\n", + sc.use_sg, sc.old_use_sg, sc.sglist_len, sc.abort_reason); + kdb_printf("bufflen = %d buffer = 0x%p underflow = %d transfersize = %d\n", + sc.bufflen, sc.buffer, sc.underflow, sc.transfersize); + kdb_printf("tag = %d pid = %ld\n", + sc.tag, sc.pid); + kdb_printf("request struct\n"); + kdb_printf("rq_status = %s rq_dev = [%d/%d] errors = %d cmd = %d\n", + str_rq_status(sc.request.rq_status), + MAJOR(sc.request.rq_dev), + MINOR(sc.request.rq_dev), sc.request.cmd, + sc.request.errors); + kdb_printf("sector = %ld nr_sectors = %ld current_nr_sectors = %ld\n", + sc.request.sector, sc.request.nr_sectors, sc.request.current_nr_sectors); + kdb_printf("buffer = 0x%p bh = 0x%p bhtail = 0x%p\n", + sc.request.buffer, sc.request.bh, sc.request.bhtail); + + return 0; +} + +static int __init kdbm_vm_init(void) +{ + kdb_register("vm", kdbm_vm, "", "Display vm_area_struct", 0); + kdb_register("dentry", kdbm_dentry, "", "Display interesting dentry stuff", 0); + kdb_register("filp", kdbm_fp, "", "Display interesting filp stuff", 0); + kdb_register("sh", kdbm_sh, "", "Show scsi_host", 0); + kdb_register("sd", kdbm_sd, "", "Show scsi_device", 0); + kdb_register("sc", kdbm_sc, "", "Show scsi_cmnd", 0); + + return 0; +} + +static void __exit kdbm_vm_exit(void) +{ + kdb_unregister("vm"); + kdb_unregister("dentry"); + kdb_unregister("filp"); + kdb_unregister("sh"); + kdb_unregister("sd"); + kdb_unregister("sc"); +} + +module_init(kdbm_vm_init) +module_exit(kdbm_vm_exit) diff -urN linux-2.4.17-rc2-virgin/kernel/Makefile linux-2.4.17-rc2-wli2/kernel/Makefile --- linux-2.4.17-rc2-virgin/kernel/Makefile Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/Makefile Tue Dec 18 22:21:49 2001 @@ -19,6 +19,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o obj-$(CONFIG_PM) += pm.o +obj-$(CONFIG_KALLSYMS) += kallsyms.o ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -urN linux-2.4.17-rc2-virgin/kernel/exit.c linux-2.4.17-rc2-wli2/kernel/exit.c --- linux-2.4.17-rc2-virgin/kernel/exit.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/exit.c Tue Dec 18 22:28:42 2001 @@ -190,6 +190,8 @@ } i++; set >>= 1; + debug_lock_break(1); + conditional_schedule(); } } } @@ -273,6 +275,10 @@ struct mm_struct * start_lazy_tlb(void) { struct mm_struct *mm = current->mm; +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif current->mm = NULL; /* active_mm is still 'mm' */ atomic_inc(&mm->mm_count); @@ -284,6 +290,10 @@ { struct mm_struct *active_mm = current->active_mm; +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif current->mm = mm; if (mm != active_mm) { current->active_mm = mm; @@ -307,8 +317,8 @@ /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; - task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); + task_unlock(tsk); mmput(mm); } } diff -urN linux-2.4.17-rc2-virgin/kernel/fork.c linux-2.4.17-rc2-wli2/kernel/fork.c --- linux-2.4.17-rc2-virgin/kernel/fork.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/fork.c Tue Dec 18 22:28:42 2001 @@ -260,9 +260,6 @@ void mmput(struct mm_struct *mm) { if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) { - extern struct mm_struct *swap_mm; - if (swap_mm == mm) - swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist); list_del(&mm->mmlist); mmlist_nr--; spin_unlock(&mmlist_lock); @@ -604,6 +601,12 @@ if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); +#ifdef CONFIG_PREEMPT + /* Since we are keeping the context switch off state as part + * of the context, make sure we start with it off. + */ + p->preempt_count = 1; +#endif p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; @@ -649,8 +652,6 @@ p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; - INIT_LIST_HEAD(&p->local_pages); - retval = -ENOMEM; /* copy all the process information */ if (copy_files(clone_flags, p)) @@ -682,10 +683,20 @@ * more scheduling fairness. This is only important in the first * timeslice, on the long run the scheduling behaviour is unchanged. */ + /* + * SCHED_FIFO tasks don't count down and have a negative counter. + * Don't change these, least they all end up at -1. + */ +#ifdef CONFIG_RTSCHED + if (p->policy != SCHED_FIFO) +#endif + { + p->counter = (current->counter + 1) >> 1; current->counter >>= 1; if (!current->counter) current->need_resched = 1; + } /* * Ok, add it to the run-queues and make it diff -urN linux-2.4.17-rc2-virgin/kernel/kallsyms.c linux-2.4.17-rc2-wli2/kernel/kallsyms.c --- linux-2.4.17-rc2-virgin/kernel/kallsyms.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kernel/kallsyms.c Tue Dec 18 22:21:49 2001 @@ -0,0 +1,306 @@ +/* An example of using kallsyms data in a kernel debugger. + + Copyright 2000 Keith Owens April 2000 + + This file is part of the Linux modutils. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ident "$Id$" + +/* + This code uses the list of all kernel and module symbols to :- + + * Find any non-stack symbol in a kernel or module. Symbols do + not have to be exported for debugging. + + * Convert an address to the module (or kernel) that owns it, the + section it is in and the nearest symbol. This finds all non-stack + symbols, not just exported ones. + + You need modutils >= 2.3.11 and a kernel with the kallsyms patch + which was compiled with CONFIG_KALLSYMS. + */ + +#include +#include +#include +#include +#include + +/* These external symbols are only set on kernels compiled with + * CONFIG_KALLSYMS. + */ + +extern const char __start___kallsyms[]; +extern const char __stop___kallsyms[]; + +static struct module **kallsyms_module_list; + +static void kallsyms_get_module_list(void) +{ + const struct kallsyms_header *ka_hdr; + const struct kallsyms_section *ka_sec; + const struct kallsyms_symbol *ka_sym; + const char *ka_str; + int i; + const char *p; + + if (__start___kallsyms >= __stop___kallsyms) + return; + ka_hdr = (struct kallsyms_header *)__start___kallsyms; + ka_sec = (struct kallsyms_section *) + ((char *)(ka_hdr) + ka_hdr->section_off); + ka_sym = (struct kallsyms_symbol *) + ((char *)(ka_hdr) + ka_hdr->symbol_off); + ka_str = + ((char *)(ka_hdr) + ka_hdr->string_off); + + for (i = 0; i < ka_hdr->symbols; kallsyms_next_sym(ka_hdr, ka_sym), ++i) { + p = ka_str + ka_sym->name_off; + if (strcmp(p, "module_list") == 0) { + if (ka_sym->symbol_addr) + kallsyms_module_list = (struct module **)(ka_sym->symbol_addr); + break; + } + } +} + +static inline void kallsyms_do_first_time(void) +{ + static int first_time = 1; + if (first_time) + kallsyms_get_module_list(); + first_time = 0; +} + +/* A symbol can appear in more than one module. A token is used to + * restart the scan at the next module, set the token to 0 for the + * first scan of each symbol. + */ + +int kallsyms_symbol_to_address( + const char *name, /* Name to lookup */ + unsigned long *token, /* Which module to start at */ + const char **mod_name, /* Set to module name */ + unsigned long *mod_start, /* Set to start address of module */ + unsigned long *mod_end, /* Set to end address of module */ + const char **sec_name, /* Set to section name */ + unsigned long *sec_start, /* Set to start address of section */ + unsigned long *sec_end, /* Set to end address of section */ + const char **sym_name, /* Set to full symbol name */ + unsigned long *sym_start, /* Set to start address of symbol */ + unsigned long *sym_end /* Set to end address of symbol */ + ) +{ + const struct kallsyms_header *ka_hdr = NULL; /* stupid gcc */ + const struct kallsyms_section *ka_sec; + const struct kallsyms_symbol *ka_sym = NULL; + const char *ka_str = NULL; + const struct module *m; + int i = 0, l; + const char *p, *pt_R; + char *p2; + + kallsyms_do_first_time(); + if (!kallsyms_module_list) + return(0); + + /* Restart? */ + m = *kallsyms_module_list; + if (token && *token) { + for (; m; m = m->next) + if ((unsigned long)m == *token) + break; + if (m) + m = m->next; + } + + for (; m; m = m->next) { + if (!mod_member_present(m, kallsyms_start) || + !mod_member_present(m, kallsyms_end) || + m->kallsyms_start >= m->kallsyms_end) + continue; + ka_hdr = (struct kallsyms_header *)m->kallsyms_start; + ka_sym = (struct kallsyms_symbol *) + ((char *)(ka_hdr) + ka_hdr->symbol_off); + ka_str = + ((char *)(ka_hdr) + ka_hdr->string_off); + for (i = 0; i < ka_hdr->symbols; ++i, kallsyms_next_sym(ka_hdr, ka_sym)) { + p = ka_str + ka_sym->name_off; + if (strcmp(p, name) == 0) + break; + /* Unversioned requests match versioned names */ + if (!(pt_R = strstr(p, "_R"))) + continue; + l = strlen(pt_R); + if (l < 10) + continue; /* Not _R.*xxxxxxxx */ + (void)simple_strtoul(pt_R+l-8, &p2, 16); + if (*p2) + continue; /* Not _R.*xxxxxxxx */ + if (strncmp(p, name, pt_R-p) == 0) + break; /* Match with version */ + } + if (i < ka_hdr->symbols) + break; + } + + if (token) + *token = (unsigned long)m; + if (!m) + return(0); /* not found */ + + ka_sec = (const struct kallsyms_section *) + ((char *)ka_hdr + ka_hdr->section_off + ka_sym->section_off); + *mod_name = *(m->name) ? m->name : "kernel"; + *mod_start = ka_hdr->start; + *mod_end = ka_hdr->end; + *sec_name = ka_sec->name_off + ka_str; + *sec_start = ka_sec->start; + *sec_end = ka_sec->start + ka_sec->size; + *sym_name = ka_sym->name_off + ka_str; + *sym_start = ka_sym->symbol_addr; + if (i < ka_hdr->symbols-1) { + const struct kallsyms_symbol *ka_symn = ka_sym; + kallsyms_next_sym(ka_hdr, ka_symn); + *sym_end = ka_symn->symbol_addr; + } + else + *sym_end = *sec_end; + return(1); +} + +int kallsyms_address_to_symbol( + unsigned long address, /* Address to lookup */ + const char **mod_name, /* Set to module name */ + unsigned long *mod_start, /* Set to start address of module */ + unsigned long *mod_end, /* Set to end address of module */ + const char **sec_name, /* Set to section name */ + unsigned long *sec_start, /* Set to start address of section */ + unsigned long *sec_end, /* Set to end address of section */ + const char **sym_name, /* Set to full symbol name */ + unsigned long *sym_start, /* Set to start address of symbol */ + unsigned long *sym_end /* Set to end address of symbol */ + ) +{ + const struct kallsyms_header *ka_hdr = NULL; /* stupid gcc */ + const struct kallsyms_section *ka_sec = NULL; + const struct kallsyms_symbol *ka_sym; + const char *ka_str; + const struct module *m; + int i; + unsigned long end; + + kallsyms_do_first_time(); + if (!kallsyms_module_list) + return(0); + + for (m = *kallsyms_module_list; m; m = m->next) { + if (!mod_member_present(m, kallsyms_start) || + !mod_member_present(m, kallsyms_end) || + m->kallsyms_start >= m->kallsyms_end) + continue; + ka_hdr = (struct kallsyms_header *)m->kallsyms_start; + ka_sec = (const struct kallsyms_section *) + ((char *)ka_hdr + ka_hdr->section_off); + /* Is the address in any section in this module? */ + for (i = 0; i < ka_hdr->sections; ++i, kallsyms_next_sec(ka_hdr, ka_sec)) { + if (ka_sec->start <= address && + (ka_sec->start + ka_sec->size) > address) + break; + } + if (i < ka_hdr->sections) + break; /* Found a matching section */ + } + + if (!m) + return(0); /* not found */ + + ka_sym = (struct kallsyms_symbol *) + ((char *)(ka_hdr) + ka_hdr->symbol_off); + ka_str = + ((char *)(ka_hdr) + ka_hdr->string_off); + *mod_name = *(m->name) ? m->name : "kernel"; + *mod_start = ka_hdr->start; + *mod_end = ka_hdr->end; + *sec_name = ka_sec->name_off + ka_str; + *sec_start = ka_sec->start; + *sec_end = ka_sec->start + ka_sec->size; + *sym_name = *sec_name; /* In case we find no matching symbol */ + *sym_start = *sec_start; + *sym_end = *sec_end; + + for (i = 0; i < ka_hdr->symbols; ++i, kallsyms_next_sym(ka_hdr, ka_sym)) { + if (ka_sym->symbol_addr > address) + continue; + if (i < ka_hdr->symbols-1) { + const struct kallsyms_symbol *ka_symn = ka_sym; + kallsyms_next_sym(ka_hdr, ka_symn); + end = ka_symn->symbol_addr; + } + else + end = *sec_end; + if (end <= address) + continue; + if ((char *)ka_hdr + ka_hdr->section_off + ka_sym->section_off + != (char *)ka_sec) + continue; /* wrong section */ + *sym_name = ka_str + ka_sym->name_off; + *sym_start = ka_sym->symbol_addr; + *sym_end = end; + break; + } + return(1); +} + +/* List all sections in all modules. The callback routine is invoked with + * token, module name, section name, section start, section end, section flags. + */ +int kallsyms_sections(void *token, + int (*callback)(void *, const char *, const char *, ElfW(Addr), ElfW(Addr), ElfW(Word))) +{ + const struct kallsyms_header *ka_hdr = NULL; /* stupid gcc */ + const struct kallsyms_section *ka_sec = NULL; + const char *ka_str; + const struct module *m; + int i; + + kallsyms_do_first_time(); + if (!kallsyms_module_list) + return(0); + + for (m = *kallsyms_module_list; m; m = m->next) { + if (!mod_member_present(m, kallsyms_start) || + !mod_member_present(m, kallsyms_end) || + m->kallsyms_start >= m->kallsyms_end) + continue; + ka_hdr = (struct kallsyms_header *)m->kallsyms_start; + ka_sec = (const struct kallsyms_section *) ((char *)ka_hdr + ka_hdr->section_off); + ka_str = ((char *)(ka_hdr) + ka_hdr->string_off); + for (i = 0; i < ka_hdr->sections; ++i, kallsyms_next_sec(ka_hdr, ka_sec)) { + if (callback( + token, + *(m->name) ? m->name : "kernel", + ka_sec->name_off + ka_str, + ka_sec->start, + ka_sec->start + ka_sec->size, + ka_sec->flags)) + return(0); + } + } + return(1); +} diff -urN linux-2.4.17-rc2-virgin/kernel/ksyms.c linux-2.4.17-rc2-wli2/kernel/ksyms.c --- linux-2.4.17-rc2-virgin/kernel/ksyms.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/ksyms.c Tue Dec 18 22:28:42 2001 @@ -55,6 +55,9 @@ #ifdef CONFIG_KMOD #include #endif +#ifdef CONFIG_KALLSYMS +#include +#endif extern void set_device_ro(kdev_t dev,int flag); @@ -80,6 +83,15 @@ EXPORT_SYMBOL(inter_module_put); EXPORT_SYMBOL(try_inc_mod_count); +#ifdef CONFIG_KALLSYMS +extern const char __start___kallsyms[]; +extern const char __stop___kallsyms[]; +EXPORT_SYMBOL(__start___kallsyms); +EXPORT_SYMBOL(__stop___kallsyms); +EXPORT_SYMBOL(kallsyms_symbol_to_address); +EXPORT_SYMBOL(kallsyms_address_to_symbol); +#endif + /* process memory management */ EXPORT_SYMBOL(do_mmap_pgoff); EXPORT_SYMBOL(do_munmap); @@ -436,6 +448,9 @@ EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(schedule); +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(preempt_schedule); +#endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(jiffies); EXPORT_SYMBOL(xtime); diff -urN linux-2.4.17-rc2-virgin/kernel/ptrace.c linux-2.4.17-rc2-wli2/kernel/ptrace.c --- linux-2.4.17-rc2-virgin/kernel/ptrace.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/ptrace.c Tue Dec 18 22:28:42 2001 @@ -121,17 +121,119 @@ } /* - * Access another process' address space. - * Source/target buffer must be kernel space, - * Do not walk the page table directly, use get_user_pages + * Access another process' address space, one page at a time. */ +static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write) +{ + pgd_t * pgdir; + pmd_t * pgmiddle; + pte_t * pgtable; + char *maddr; + struct page *page; + +repeat: + spin_lock(&mm->page_table_lock); + pgdir = pgd_offset(vma->vm_mm, addr); + if (pgd_none(*pgdir)) + goto fault_in_page; + if (pgd_bad(*pgdir)) + goto bad_pgd; + pgmiddle = pmd_offset(pgdir, addr); + if (pmd_none(*pgmiddle)) + goto fault_in_page; + if (pmd_bad(*pgmiddle)) + goto bad_pmd; + pgtable = pte_offset(pgmiddle, addr); + if (!pte_present(*pgtable)) + goto fault_in_page; + if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable))) + goto fault_in_page; + page = pte_page(*pgtable); + + /* ZERO_PAGE is special: reads from it are ok even though it's marked reserved */ + if (page != ZERO_PAGE(addr) || write) { + if ((!VALID_PAGE(page)) || PageReserved(page)) { + spin_unlock(&mm->page_table_lock); + return 0; + } + } + get_page(page); + spin_unlock(&mm->page_table_lock); + flush_cache_page(vma, addr); + + if (write) { + maddr = kmap(page); + memcpy(maddr + (addr & ~PAGE_MASK), buf, len); + flush_page_to_ram(page); + flush_icache_page(vma, page); + kunmap(page); + } else { + maddr = kmap(page); + memcpy(buf, maddr + (addr & ~PAGE_MASK), len); + flush_page_to_ram(page); + kunmap(page); + } + put_page(page); + return len; + +fault_in_page: + spin_unlock(&mm->page_table_lock); + /* -1: out of memory. 0 - unmapped page */ + if (handle_mm_fault(mm, vma, addr, write) > 0) + goto repeat; + return 0; + +bad_pgd: + spin_unlock(&mm->page_table_lock); + pgd_ERROR(*pgdir); + return 0; + +bad_pmd: + spin_unlock(&mm->page_table_lock); + pmd_ERROR(*pgmiddle); + return 0; +} + +static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write) +{ + int copied = 0; + + for (;;) { + unsigned long offset = addr & ~PAGE_MASK; + int this_len = PAGE_SIZE - offset; + int retval; + + if (this_len > len) + this_len = len; + retval = access_one_page(mm, vma, addr, buf, this_len, write); + copied += retval; + if (retval != this_len) + break; + + len -= retval; + if (!len) + break; + + addr += retval; + buf += retval; + + if (addr < vma->vm_end) + continue; + if (!vma->vm_next) + break; + if (vma->vm_next->vm_start != vma->vm_end) + break; + + vma = vma->vm_next; + } + return copied; +} int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) { + int copied; struct mm_struct *mm; - struct vm_area_struct *vma; - struct page *page; - void *old_buf = buf; + struct vm_area_struct * vma; /* Worry about races with exit() */ task_lock(tsk); @@ -143,41 +245,14 @@ return 0; down_read(&mm->mmap_sem); - /* ignore errors, just check how much was sucessfully transfered */ - while (len) { - int bytes, ret, offset; - void *maddr; - - ret = get_user_pages(current, mm, addr, 1, - write, 1, &page, &vma); - if (ret <= 0) - break; - - bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; + vma = find_extend_vma(mm, addr); + copied = 0; + if (vma) + copied = access_mm(mm, vma, addr, buf, len, write); - flush_cache_page(vma, addr); - - maddr = kmap(page); - if (write) { - memcpy(maddr + offset, buf, bytes); - flush_page_to_ram(page); - flush_icache_page(vma, page); - } else { - memcpy(buf, maddr + offset, bytes); - flush_page_to_ram(page); - } - kunmap(page); - put_page(page); - len -= bytes; - buf += bytes; - } up_read(&mm->mmap_sem); mmput(mm); - - return buf - old_buf; + return copied; } int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len) diff -urN linux-2.4.17-rc2-virgin/kernel/rtsched.h linux-2.4.17-rc2-wli2/kernel/rtsched.h --- linux-2.4.17-rc2-virgin/kernel/rtsched.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/kernel/rtsched.h Thu Dec 20 18:55:14 2001 @@ -0,0 +1,1218 @@ +/* + * linux/kernel/rtsched.h + * + * NOTE: This is a .h file that is mostly source, not the usual convention. + * It is coded this way to allow the depend rules to correctly set + * up the make file dependencies. This is an alternate scheduler + * that replaces the core scheduler in sched.c. It does not, however, + * replace most of the static support functions that call schedule. + * By making this an include file for sched.c, all of those functions + * are retained without the need for duplicate code and its attendant + * support issues. At the same time, keeping it a seperate file allows + * diff and patch to work most cleanly and correctly. + * + * Kernel scheduler and related syscalls + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001 MontaVista Software Inc. + * + * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar + * 2000-03-15 Added the Real Time run queue support by George Anzinger + * 2000-8-29 Added code to do lazy recalculation of counters + * by George Anzinger + */ + +/* + * 'sched.c' is the main kernel file. It contains scheduling primitives + * (sleep_on, wakeup, schedule etc) as well as a number of simple system + * call functions (type getpid()), which just extract a field from + * current-task + */ + +#ifndef preempt_disable +#define preempt_disable() +#define preempt_enable() +#define preempt_is_disabled() 0 +#define preempt_enable_no_resched() +#endif + +/* + * scheduler variables + */ +#define VERSION_DATE "<20011203.1609.50>" +/* + * We align per-CPU scheduling data on cacheline boundaries, + * to prevent cacheline ping-pong. + */ +static union { + struct schedule_data { + struct task_struct * curr; + cycles_t last_schedule; + struct list_head schedule_data_list; + int cpu,effprio; + } schedule_data; + char __pad [SMP_CACHE_BYTES]; +} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0,{0,0},0,0}}}; + +#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr +static void newprio_ready_q(struct task_struct * tptr,int newprio); +#ifdef CONFIG_SMP +static void newprio_executing(struct task_struct *tptr,int newprio); +static struct list_head hed_cpu_prio __cacheline_aligned = + LIST_HEAD_INIT(hed_cpu_prio); +#endif +/* + * task_on_rq tests for task actually in the ready queue. + * task_on_runque tests for task either on ready queue or being executed + * (by virtue of our seting a running tasks run_list.next to 1) + */ +#define task_on_rq(p) ((unsigned)p->run_list.next > 1) + +static struct list_head rq[MAX_PRI+1] ____cacheline_aligned; + +static struct ready_queue { + int recalc; /* # of counter recalculations on SCHED_OTHER */ + int ticks; /* # of ticks for all in SCHED_OTHER ready Q */ +} runq ____cacheline_aligned; + +/* set the bit map up with guard bits below. This will result in + * priority -1 if there are no tasks in the ready queue which will + * happen as we are not putting the idle tasks in the ready queue. + */ +static struct { + int guard; + int rq_bit_ary[(MAX_PRI/32) +1]; +}rq_bits = {-1,{0,0,0,0}}; +#define rq_bit_map rq_bits.rq_bit_ary + +static int high_prio=0; + +#define Rdy_Q_Hed(pri) &rq[pri] + +#define PREEMPTION_THRESHOLD 1 + +#define NOT_RT 0 /* Use priority zero for non-RT processes */ +#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule + +struct kernel_stat kstat; + +#ifdef CONFIG_SMP + +/* + * At the moment, we will ignor cpus_allowed, primarily because if it were + * used, we would have a conflict in the runq.ticks count (i.e. since we + * are not scheduleing some tasks, the count would not reflect what is + * is really on the list). Oh, and also, nowhere is there code in the + * kernel to set cpus_allowed to anything but -1. In the long run, we + * would like to try seperate lists for each cpu, at which point + * cpus_allowed could be used to direct the task to the proper list. + + * Well, darn, now there is code that messes with cpus_allowed. We will change + * sometime soon.... + */ + +#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) +#define can_schedule(p,cpu) \ + ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu)) + +#else + +#define idle_task(cpu) (&init_task) +#define can_schedule(p,cpu) (1) + +#endif + +void scheduling_functions_start_here(void) { } + +/* + * This is the function that decides how desirable a process is.. + * You can weigh different processes against each other depending + * on what CPU they've run on lately etc to try to handle cache + * and TLB miss penalties. + * + * Return values: + * -1000: never select this + * 0: out of time, recalculate counters (but it might still be + * selected) + * +ve: "goodness" value (the larger, the better) + */ + +static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm) +{ + int weight; + + /* + * goodness is NEVER called for Realtime processes! + * Realtime process, select the first one on the + * runqueue (taking priorities within processes + * into account). + + */ + /* + * Give the process a first-approximation goodness value + * according to the number of clock-ticks it has left. + * + * Don't do any other calculations if the time slice is + * over or if this is an idle task. + */ + weight = p->counter; + if (weight <= 0) + goto out; + +#ifdef CONFIG_SMP + /* Give a largish advantage to the same processor... */ + /* (this is equivalent to penalizing other processors) */ + if (p->processor == this_cpu) + weight += PROC_CHANGE_PENALTY; +#endif + + /* .. and a slight advantage to the current MM */ + if (p->mm == this_mm || !p->mm) + weight += 1; + weight += 20 - p->nice; + +out: + return weight; +} + +/* + * the 'goodness value' of replacing a process on a given CPU. + * positive value means 'replace', zero or negative means 'dont'. + */ +static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu) +{ + return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm); +} + +/* + * This is ugly, but reschedule_idle() is very timing-critical. + * We are called with the runqueue spinlock held and we must + * not claim the tasklist_lock. + */ +static FASTCALL(void reschedule_idle(struct task_struct * p)); + +static void reschedule_idle(struct task_struct * p) +{ +#ifdef CONFIG_SMP + int this_cpu = smp_processor_id(), target_cpu; + struct task_struct *target_tsk; + struct list_head *cptr; + struct schedule_data *sch; + int best_cpu; + + /* + * shortcut if the woken up task's last CPU is + * idle now. + */ + best_cpu = p->processor; + target_tsk = idle_task(best_cpu); + if (cpu_curr(best_cpu) == target_tsk) + goto preempt_now; + /* + * For real time, the choice is simple. We just check + * if the most available processor is working on a lower + * priority task. If so we bounce it, if not, there is + * nothing more important than what we are doing. + * Note that this will pick up any idle cpu(s) we may + * have as they will have effprio of -1. + */ + cptr = hed_cpu_prio.prev; + sch = list_entry(cptr, + struct schedule_data, + schedule_data_list); + target_tsk = sch->curr; + if (p->effprio > sch->effprio){ + goto preempt_now; + } + /* + * If all cpus are doing real time and we failed + * above, then there is no help for this task. + */ + if ( sch->effprio ) + goto out_no_target; + /* + * Non-real time contender and one or more processors + * doing non-real time things. + + * So we have a non-real time task contending among + * other non-real time tasks on one or more processors + * We know we have no idle cpus. + */ + /* + * No CPU is idle, but maybe this process has enough priority + * to preempt it's preferred CPU. + */ + target_tsk = cpu_curr(best_cpu); + if (target_tsk->effprio == 0 && + preemption_goodness(target_tsk, p, best_cpu) > 0) + goto preempt_now; + + for (; cptr != &hed_cpu_prio; cptr = cptr->prev ){ + sch =list_entry(cptr, + struct schedule_data, + schedule_data_list); + if (sch->effprio != 0) + break; + if (sch->cpu != best_cpu){ + target_tsk = sch->curr; + if ( preemption_goodness(target_tsk, p, sch->cpu) > + PREEMPTION_THRESHOLD) + goto preempt_now; + } + + } + +out_no_target: + return; + +preempt_now: + target_cpu = target_tsk->processor; + target_tsk->need_resched = 1; + /* + * the APIC stuff can go outside of the lock because + * it uses no task information, only CPU#. + */ + if ((target_cpu != this_cpu) + && (target_tsk != idle_task(target_cpu))) + smp_send_reschedule(target_cpu); + return; +#else /* UP */ + struct task_struct *tsk; + + tsk = cpu_curr(0); + if ((high_prio > tsk->effprio) || + (!tsk->effprio && preemption_goodness(tsk, p, 0) > + PREEMPTION_THRESHOLD)){ + tsk->need_resched = 1; + } +#endif +} + +/* + * This routine maintains the list of smp processors. This is + * a by directional list maintained in priority order. The above + * code used this list to find a processor to use for a new task. + * The search will be backward thru the list as we want to take + * the lowest prioity cpu first. We put equal prioities such that + * the new one will be ahead of the old, so the new should stay + * around a bit longer. + */ + +#ifdef CONFIG_SMP +static inline void re_queue_cpu(struct task_struct *next, + struct schedule_data *sch) +{ + struct list_head *cpuptr; + list_del(&sch->schedule_data_list); + sch->effprio = next->effprio; + cpuptr = hed_cpu_prio.next; + while (cpuptr != &hed_cpu_prio && + sch->effprio < list_entry(cpuptr, + struct schedule_data, + schedule_data_list)->effprio + ) + cpuptr = cpuptr->next; + list_add_tail(&sch->schedule_data_list,cpuptr); + next->newprio = &newprio_executing; +} +#else +#define re_queue_cpu(a,b) +#endif +/* + * Careful! + * + * This has to add the process to the _beginning_ of the + * run-queue, not the end. See the comment about "This is + * subtle" in the scheduler proper.. + * + * For real time tasks we do this a bit differently. We + * keep a priority list of ready tasks. We remove tasks + * from this list when they are running so a running real + * time task will not be in either the ready list or the run + * queue. Also, in the name of speed and real time, only + * priority is important so we spend a few bytes on the queue. + * We have a doubly linked list for each priority. This makes + * Insert and removal very fast. We also keep a bit map of + * the priority queues where a bit says if the queue is empty + * or not. We also keep loose track of the highest priority + * queue that is currently occupied. This high_prio mark + * is updated when a higher priority task enters the ready + * queue and only goes down when we look for a task in the + * ready queue at high_prio and find none. Then, and only + * then, we examine the bit map to find the true high_prio. + */ + +#define BF 31 /* bit flip constant */ +#define set_rq_bit(bit) set_bit(BF-((bit)&0x1f),&rq_bit_map[(bit) >> 5]) +#define clear_rq_bit(bit) clear_bit(BF-((bit)&0x1f),&rq_bit_map[(bit) >> 5]) + +static inline void _del_from_runqueue(struct task_struct * p) +{ + nr_running--; + list_del( &p->run_list ); + if (list_empty(Rdy_Q_Hed(p->effprio))){ + clear_rq_bit(p->effprio); + } + /* p->run_list.next = NULL; !=0 prevents requeue */ + p->run_list.next = NULL; + p->newprio = NULL; + if( !p->effprio) runq.ticks -= p->counter; +} +/* Exported for main.c, also used in init code here */ +void __del_from_runqueue(struct task_struct * p) +{ + _del_from_runqueue(p); +} +static inline struct task_struct * get_next_task(struct task_struct * prev, + int this_cpu) +{ + struct list_head *next, *rqptr; + struct task_struct *it=0; + int *i,c,oldcounter; + + repeat_schedule: + rqptr = Rdy_Q_Hed(high_prio); + next = rqptr->next; + if (unlikely( next == rqptr)){ + for (i=&rq_bit_map[MAX_PRI/32],high_prio=BF+((MAX_PRI/32)*32); + (*i == 0);high_prio -=32,i--); + high_prio -= ffz(~*i); + if (unlikely(high_prio < 0)){ + /* + * No tasks to run, return this cpu's idle task + * It is not in the ready queue, so no need to remove it. + * But first make sure its priority keeps it out of + * the way. + */ + high_prio = 0; + it = idle_task(this_cpu); + it->effprio = -1; + return it; + } + goto repeat_schedule; + } + /* + * If there is only one task on the list, it is a no brainer. + * But really, this also prevents us from looping on recalulation + * if the one and only task is trying to yield. These sort of + * loops are NOT_FUN. Note: we use likely() to tilt toward + * real-time tasks, even thou they are, usually unlikely. We + * are, after all, a real time scheduler. + */ + if ( likely(high_prio || next->next == rqptr)){ + it = list_entry(next, struct task_struct, run_list); + back_from_figure_non_rt_next: + _del_from_runqueue(it); + return it; + } + /* + * Here we set up a SCHED_OTHER yield. Note that for other policies + * yield is handled else where. This means we can use == and = + * instead of & and &= to test and clear the flag. If the prev + * task has all the runq.ticks, then we just do the recaculation + * version and let the winner take all (yield fails). Otherwise + * we fource the counter to zero for the loop and put it back + * after we found some other task. We must remember to update + * runq.ticks during all this. Also, we don't give it all back + * if the yielder has more than the next guy. + */ + oldcounter = 0; + if ( unlikely(prev->policy == (SCHED_YIELD | SCHED_OTHER)) ){ + if ( unlikely(prev->counter == runq.ticks)) { + prev->policy = SCHED_OTHER; + runq.ticks = 0; + }else{ + oldcounter = prev->counter; + prev->counter = 0; + } + } + c = -1000; + if (likely(runq.ticks > 0)) { + do { + int weight; + struct task_struct *p = + list_entry(next, struct task_struct, run_list); + /* if (can_schedule(p, this_cpu))*/ { + weight = goodness(p, this_cpu, prev->active_mm); + if (weight > c) + c = weight, it = p; + } + next = next->next; + } while (next != rqptr); + /* + * if we get out of sync with the runq.ticks counter + * force it to 0 and catch it next time around. Note we + * catch a negative counter on entry. + */ + if ( unlikely(c <= 0 )){ + runq.ticks = 0; + } + }else{ +#ifdef CONFIG_SMP + /* + * Here we update the tasks that are current on other + * processors + */ + struct list_head *wkptr, + *cptr=&aligned_data[(this_cpu)]. + schedule_data. + schedule_data_list; + + runq.ticks = 0; + list_for_each ( wkptr, &hed_cpu_prio) { + struct task_struct *p; + if (cptr == wkptr ) continue; + p = list_entry(wkptr, + struct schedule_data, + schedule_data_list)->curr; + if ( p->effprio == 0){ + p->counter = (p->counter >> 1) + + NICE_TO_TICKS(p->nice); + p->counter_recalc++; + } + } +#else + runq.ticks = 0; +#endif + runq.recalc++; + do { + int weight; + struct task_struct *p = + list_entry(next, struct task_struct, run_list); + runq.ticks += + p->counter = NICE_TO_TICKS(p->nice); + p->counter_recalc++; + /* if (can_schedule(p, this_cpu)) */ + { + weight = goodness(p, this_cpu, prev->active_mm); + if (weight > c) + c = weight, it = p; + } + next = next->next; + } while (next != rqptr); + } + /* Undo the stuff we did for SCHED_YIELD. We know we did something + * if oldcounter != 0. + */ + if (unlikely(oldcounter)){ + + prev->counter = (it->counter < oldcounter) ? + it->counter : + oldcounter; + runq.ticks += prev->counter-oldcounter; + prev->policy &= ~SCHED_YIELD; + } + goto back_from_figure_non_rt_next; + +} +/* Add to the head of the run queue */ +static inline void add_to_runqueue(struct task_struct * p,int cpu) +{ + struct list_head *next; + int prio; + /* idle tasks, don't get put in the list */ + if (unlikely(p == idle_task(cpu))) return; + prio = p->effprio; + next = Rdy_Q_Hed(prio); + if (list_empty(next)) { /* an empty queue */ + set_rq_bit(prio); + if (high_prio < prio) { + high_prio = prio; + } + } + list_add(&p->run_list,next); + p->newprio = newprio_ready_q; + if ( likely(!p->effprio )) { + int diff,c; + if ((diff = runq.recalc - p->counter_recalc) != 0) { + p->counter_recalc = runq.recalc; + c = NICE_TO_TICKS(p->nice) << 1; + p->counter = diff > 8 ? c - 1 : /* max priority */ + c + ((p->counter - c) >> diff); + } + runq.ticks += p->counter; + } + nr_running++; +} + +/* + * This function is only called from schedule() so it need not worry + * about updating the counter as it should never be out of date. + * If you change this, remember to do the update. + */ +static inline void add_last_runqueue(struct task_struct * p) +{ + struct list_head *next = Rdy_Q_Hed(p->effprio); + + if (list_empty(next)) { /* empty list, set the bit */ + set_rq_bit(p->effprio); + if (p->effprio > high_prio){ + high_prio = p->effprio; + } + } + list_add_tail(&p->run_list,next); + p->newprio = newprio_ready_q; + if ( !p->effprio ) runq.ticks += p->counter; + nr_running++; +} + + +static inline void move_first_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add_tail(&p->run_list, Rdy_Q_Hed(p->effprio)); +} +/* + * When we have a task in some queue by priority, we need + * to provide a way to change that priority. Depending on the + * queue we must do different things. We handle this by putting + * a function address in the task_struct (newprio()). + * + * First a front end routine to take care of the case were the task + * is not in any priority queues. We take the runqueue_lock + * here, so the caller must not. Since we may be called + * recursively, protect against a dead lock. + */ +static struct task_struct *newprio_inuse; +static int newprio_inuse_count; + +void set_newprio(struct task_struct * tptr, int newprio) +{ + if ( newprio_inuse != current){ + spin_lock_irq(&runqueue_lock); + newprio_inuse = current; + } + newprio_inuse_count++; + if (! tptr->newprio ) { + tptr->effprio = newprio; + }else if ( tptr->effprio != newprio) { + tptr->newprio(tptr,newprio); + } + if ( ! --newprio_inuse_count ){ + spin_unlock_irq(&runqueue_lock); + newprio_inuse = 0; + } +} + + +/* + * Here are the routines we use for the ready queue and an executing + * process. Note that the executing process may fall out of favor + * as a result of the change. We do the right thing. Note that newprio + * is not cleared so we test here to see if the task is still running. + */ + +static void newprio_ready_q(struct task_struct * tptr,int newprio) +{ + _del_from_runqueue(tptr); + tptr->effprio = newprio; + add_to_runqueue(tptr,0); + reschedule_idle(tptr); +} +#ifdef CONFIG_SMP +static void newprio_executing(struct task_struct *tptr,int newprio) +{ + int cpu; + struct schedule_data *sched_data; + if(!newprio || newprio < tptr->effprio){ + tptr->need_resched = 1; + } + cpu = tptr->processor; + sched_data = & aligned_data[cpu].schedule_data; + tptr->effprio = newprio; + if( sched_data->curr != tptr) return; /* if not expected, out of here */ + re_queue_cpu(tptr,sched_data); + if ((cpu != smp_processor_id()) && tptr->need_resched) + smp_send_reschedule(cpu); +} +#endif + + + +/* + * Wake up a process. Put it on the ready-queue if it's not + * already there. The "current" process is not on the + * ready-queue (it makes it much easier to figure out if we + * need to preempt, esp. the real time case). It is possible + * to wake the current process. This happens when it is waken + * before schedule has had a chance to put it properly to + * sleep. If schedule did not turn on ints in the middle of + * things this would all be ok, however, it does so we have the + * possibility of being in that window. + * The "current" process is never on the + * run-queue (except when the actual re-schedule is in + * progress), and as such you're allowed to do the simpler + * "current->state = TASK_RUNNING" to mark yourself runnable + * without the overhead of this. + */ +static inline int try_to_wake_up(struct task_struct * p, int synchronous) +{ + unsigned long flags; + int success = 0; + + /* + * We want the common case fall through straight, thus the goto. + */ + spin_lock_irqsave(&runqueue_lock, flags); + p->state = TASK_RUNNING; + if ( task_on_runqueue(p) ) + goto out; + add_to_runqueue(p,0); + if (!synchronous /*|| !(p->cpus_allowed & (1 << smp_processor_id())*/) + reschedule_idle(p); + success = 1; +out: + spin_unlock_irqrestore(&runqueue_lock, flags); + return success; +} + +inline int wake_up_process(struct task_struct * p) +{ + return try_to_wake_up(p, 0); +} +/* + * schedule_tail() is getting called from the fork return path. This + * cleans up all remaining scheduler things, without impacting the + * common case. + */ +static inline void __schedule_tail(struct task_struct *prev) +{ +#ifdef CONFIG_SMP + + /* + * fast path falls through. We have to clear cpus_runnable before + * checking prev->state to avoid a wakeup race. Protect against + * the task exiting early. + */ + task_lock(prev); + task_release_cpu(prev); + mb(); + if (task_on_rq(prev)) + goto needs_resched; + +out_unlock: + task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */ + return; + + /* + * Slow path - we 'push' the previous process and + * reschedule_idle() will attempt to find a new + * processor for it. (but it might preempt the + * current process as well.) We must take the runqueue + * lock and re-check prev->state to be correct. It might + * still happen that this process has a preemption + * 'in progress' already - but this is not a problem and + * might happen in other circumstances as well. + */ +needs_resched: + { + unsigned long flags; + + /* + * Avoid taking the runqueue lock in cases where + * no preemption-check is necessery: + * Note: Idle task is NEVER on the ready queue so + * no need to check if prev was idle. + */ + + spin_lock_irqsave(&runqueue_lock, flags); + if (task_on_rq(prev) /* && !task_has_cpu(prev)*/ ) + reschedule_idle(prev); + spin_unlock_irqrestore(&runqueue_lock, flags); + goto out_unlock; + } +#define smp_label_a _smp_label_a: +#define smp_label_b _smp_label_b: +#else + prev->policy &= ~SCHED_YIELD; +#define smp_label_a +#define smp_label_b +#endif /* CONFIG_SMP */ +} + +asmlinkage void schedule_tail(struct task_struct *prev) +{ + __schedule_tail(prev); + preempt_enable(); +} + +/* + * 'schedule()' is the scheduler function. It's a very simple and nice + * scheduler: it's not perfect, but certainly works for most things. + * + * The goto is "interesting". + * + * NOTE!! Task 0 is the 'idle' task, which gets called when no other + * tasks can run. It can not be killed, and it cannot sleep. The 'state' + * information in task[0] is never used. + */ +asmlinkage void schedule(void) +{ + struct schedule_data * sched_data; + struct task_struct *prev, *next; + int this_cpu; + + spin_lock_prefetch(&runqueue_lock); + try_try_again: + + preempt_disable(); + + if (unlikely(!current->active_mm)) BUG(); + prev = current; + this_cpu = prev->processor; + + if (unlikely(in_interrupt())) { + printk("Scheduling in interrupt\n"); + BUG(); + } + + release_kernel_lock(prev, this_cpu); + + /* + * 'sched_data' is protected by the fact that we can run + * only one process per CPU. + */ + sched_data = & aligned_data[this_cpu].schedule_data; + + spin_lock_irq(&runqueue_lock); + +#ifdef CONFIG_PREEMPT + /* + * Note that this is an '&' NOT an '&&'... + */ + if (preempt_is_disabled() & PREEMPT_ACTIVE) goto sw_TASK_RUNNING; +#endif + if (prev->state == TASK_INTERRUPTIBLE) { + //case TASK_INTERRUPTIBLE: + if (likely( ! signal_pending(prev))) { + goto sw_default; + } + prev->state = TASK_RUNNING; + } + + if (prev->state != TASK_RUNNING) { + goto sw_default; + } + //case TASK_RUNNING: +#ifdef CONFIG_PREEMPT + sw_TASK_RUNNING: +#endif + /* + * move an exhausted RR process to be last.. + * Do the same for Yields + */ + if (!prev->counter && (prev->policy & SCHED_RR)) + goto move_rr_last; + if (prev->policy & SCHED_YIELD) + goto move_yield_last; + /* + * There is a case where current is already + * in the ready que. That is where it was + * on the way out, but the wait already + * expired, so wake_up_process has already + * done it. In this case, we don't!! + */ + if (!task_on_rq(prev)) + add_to_runqueue(prev,this_cpu); + goto move_rr_back; + //default: + sw_default: + prev->sleep_time = jiffies; + prev->run_list.next = 0; + + move_rr_back: + prev->need_resched = 0; + smp_label_a + next = get_next_task(prev, this_cpu); + smp_label_b + next->run_list.next = (struct list_head *)1; + sched_data->curr = next; + re_queue_cpu(next,sched_data); + spin_unlock_irq(&runqueue_lock); + + if (unlikely(prev == next)) { + goto same_process; + } + +#ifdef CONFIG_SMP + /* + * maintain the per-process 'last schedule' value. + * (this has to be recalculated even if we reschedule to + * the same process) Currently this is only used on SMP, + * and it's approximate, so we do not have to maintain + * it while holding the runqueue spinlock. + */ + sched_data->last_schedule = get_cycles(); + + /* + * We drop the scheduler lock early (it's a global spinlock), + * thus we have to lock the previous process from getting + * rescheduled during switch_to() (since we are still on his stack). + * + * Here is how we do it. The cpus_runnable flag will be held until + * the task is truly available. On the other hand, this task + * is put in the ready queue during the above runqueue_lock so + * it may be picked up by another cpu. Suppose that cpu is this + * one. Now the prior cpu left the task in the ready queue and + * we have just pluck it from there. No conflict so far, but if + * cpus_runnable is not clear, the other cpu is still in the switch code. + * There are no locks there SAVE THIS ONE!!! Oh woe is me! + * At the same time, under these conditions, i.e. a task is + * coming out of the ready queue before we actually switch, it + * would be good to not switch cpus. So lets define a "wanted" + * bit in the cpus_runnable member. Oops, it is now a cpu bit mask + * so, since only a few folks look at it, we will fudge it a bit. + * Choose an addition that is more than on bit away from a single bit + * + + * We will spin here waiting for cpus_runnable to go to zero. Until + * this happens, we must not change the processor value as + * interrupt code depends on this being right for "current". + */ +#define WANTED 10 +#define TAKEN 20 + { + unsigned long cur_cpus_runnable = next->cpus_runnable; + + atomic_add(WANTED,(atomic_t *)&next->cpus_runnable); + /* + * It is either "WANTED+cur_cpus_runnable" which means we + * need to wait or is: + * A. The old cpu_id + WANTED or + * B. WANTED - 1 which means it cleared (or was clear). + * C. TAKEN + cur_cpus_runnable + */ + while ((cur_cpus_runnable != ~0UL) && + (volatile int)next->cpus_runnable == + WANTED + cur_cpus_runnable) { + unsigned long my_cpu = 1 << this_cpu; + + barrier(); + /* + * OK, so while we wait, lets look in on prev and see + * if he is wanted. + */ + if ( (volatile int)prev->cpus_runnable != my_cpu) { + /* + * Another cpu wants the task we have yet to + * switch away from. Lets steal it back. + * Once WANTED is set on prev, we can clear it + * either here or in schedule_tail. The other + * cpu can clear it by coming here where it will + * be known by him as next... + + * Here, we set it to (TAKEN+my_cpu), in + * schedule_tail it is set to my_cpu + */ + spin_lock_irq(&runqueue_lock); + if ( (volatile int)prev->cpus_runnable != my_cpu) { + spin_unlock_irq(&runqueue_lock); + continue; + } + /* + * Three possibilities on the state of next: + * 0.) cpus_runnable has gone to ~0UL. Means the + * prior cpu has finished and is not + * interested. So put back in ready queue. + * 5.) Other cpu noticed our interest and stoled + * it back (cpus_runnable will be + * TAKEN + his flag). Do nothing. + * 3.) No change, put back in the ready queue + * Note, case 3 presents a bit of a race on our + * clearing the WANTED bit. So, we subtract and + * if the result is negative, set it to zero. + */ + if ( (volatile int)next->cpus_runnable != + cur_cpus_runnable + TAKEN) { + atomic_add(-WANTED, + (atomic_t *)&next->cpus_runnable); + if ((volatile int)next->cpus_runnable < 0) { + next->cpus_runnable = ~0UL; + } + add_to_runqueue(next,this_cpu); + } + /* + * So much for "next". Now lets take prev. + * Setting cpus_runnable to TAKEN+old will pop the + * waiter out of the wait loop. + * We then wait for him to clear TAKEN to + * complete the handshake. We hand shake here + * to keep the other cpu from seeing some later + * state that may be wrong. + */ + prev->cpus_runnable = TAKEN + my_cpu; + next = prev; + spin_unlock_irq(&runqueue_lock); + while ((volatile int)prev->cpus_runnable == + TAKEN + my_cpu) { + barrier(); + } + spin_lock_irq(&runqueue_lock); + goto _smp_label_b; + } + } + /* + * if we poped out of the while because cpus_runnable has TAKEN + * set it means the prior owner stoled back the task. Time to + * rescan the ready queue (after clearing the TAKEN bit to + * complete the handshake). The other possibilities are: + * cpus_runnable = WANTED -1 ( was clear when we started) + * cpus_runnable = -1 (was his, but the other cpu finished, + * seting -1) + */ + if ((volatile int)next->cpus_runnable == + TAKEN + cur_cpus_runnable){ + atomic_add(-TAKEN,(atomic_t *)&next->cpus_runnable); + spin_lock_irq(&runqueue_lock); + goto _smp_label_a; + } + } + /* + * Gosh wasn't that fun! + */ + task_set_cpu(next,this_cpu); +#endif /* CONFIG_SMP */ + + /* + * An interesting problem here. Since we turned on interrupts, + * we could now have a need schedule flag set in prev. Actually + * this can only happen on interrupt and then only be meaningful + * if it is done by a wakeup() call to reschedule_idle(). This + * is covered as that code will set the need_resched flag in the + * task found by cpu_curr() which comes from the cpu structs + * which we have already updated. + + * The remaining problems come from left over timeouts against + * prev, but he was the target and he is gone now... unless + * we did not really switch. So in the switch path we will + * clear the need_resched flag, not in the no switch path. + */ + + kstat.context_swtch++; + /* + * there are 3 processes which are affected by a context switch: + * + * prev == .... ==> (last => next) + * + * It's the 'much more previous' 'prev' that is on next's stack, + * but prev is set to (the just run) 'last' process by switch_to(). + * This might sound slightly confusing but makes tons of sense. + */ + prepare_to_switch(); + { + struct mm_struct *mm = next->mm; + struct mm_struct *oldmm = prev->active_mm; + if (!mm) { + if (next->active_mm) BUG(); + next->active_mm = oldmm; + atomic_inc(&oldmm->mm_count); + enter_lazy_tlb(oldmm, next, this_cpu); + } else { + if (next->active_mm != mm) BUG(); + switch_mm(oldmm, mm, next, this_cpu); + } + + if (!prev->mm) { + prev->active_mm = NULL; + mmdrop(oldmm); + } + } + + /* + * This just switches the register state and the + * stack. + */ + switch_to(prev, next, prev); + __schedule_tail(prev); + prev->need_resched = 0; + +same_process: + reacquire_kernel_lock(current); + preempt_enable_no_resched(); + if ( ! current->need_resched) + return; + + /* The task managed to get its need_resched flag set already! + */ + goto try_try_again; + + + move_rr_last: + prev->counter = NICE_TO_TICKS(prev->nice); + + move_yield_last: + if (prev->effprio) /* non-real time tasks get cleared later */ + prev->policy &= ~SCHED_YIELD; + add_last_runqueue(prev); + goto move_rr_back; + +} +static inline struct task_struct *find_process_by_pid(pid_t pid); + +static int setscheduler(pid_t pid, int policy, + struct sched_param *param) +{ + struct sched_param lp; + struct task_struct *p; + int retval; + + retval = -EINVAL; + if (!param || pid < 0) + goto out_nounlock; + + retval = -EFAULT; + if (copy_from_user(&lp, param, sizeof(struct sched_param))) + goto out_nounlock; + + /* + * We play safe to avoid deadlocks. + */ + read_lock_irq(&tasklist_lock); + spin_lock(&runqueue_lock); + + p = find_process_by_pid(pid); + + retval = -ESRCH; + if (!p) + goto out_unlock; + + if (policy < 0) + policy = p->policy; + else { + retval = -EINVAL; + if (policy != SCHED_FIFO && policy != SCHED_RR && + policy != SCHED_OTHER) + goto out_unlock; + } + + /* + * Valid priorities for SCHED_FIFO and SCHED_RR are 1..MAX_PRI, valid + * priority for SCHED_OTHER is 0. + */ + retval = -EINVAL; + if (lp.sched_priority < 0 || lp.sched_priority > MAX_PRI) + goto out_unlock; + if ((policy == SCHED_OTHER) != (lp.sched_priority == 0)) + goto out_unlock; + + retval = -EPERM; + if ((policy == SCHED_FIFO || policy == SCHED_RR) && + !capable(CAP_SYS_NICE)) + goto out_unlock; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !capable(CAP_SYS_NICE)) + goto out_unlock; + + retval = 0; + p->policy = policy; + if ( policy == SCHED_FIFO) { + p->counter = -100; /* we don't count down neg couters */ + }else{ + p->counter = NICE_TO_TICKS(p->nice); + } + + p->rt_priority = lp.sched_priority; + + spin_unlock_irq(&runqueue_lock); + set_newprio(p,lp.sched_priority); + goto out_readunlock; + +out_unlock: + spin_unlock_irq(&runqueue_lock); + out_readunlock: + read_unlock(&tasklist_lock); + +out_nounlock: + return retval; +} +asmlinkage long sys_sched_yield(void) +{ + /* + * Trick. sched_yield() first checks to see if it will be REALLY + * lonly in the ready queue. It just returns if it is the only + * game in town. The multilple ready queues really help here. + * (This test does not have + * to be atomic.) In threaded applications this optimization + * gets triggered quite often. + */ + if ( ! list_empty(Rdy_Q_Hed(current->effprio))){ + /* + * I think this is safe as only the current task can + * here and only the current task will be clearing this bit + */ + current->policy |= SCHED_YIELD; + schedule(); + } + return 0; +} +/* Seems to be the first place we hear about a given cpu as it comes up. + * A new (including the first) cpu is reporting for duty. Since he is + * already running we must patch him into the processor queue. + * We get here the first time the processor enters the idle code and also + * one more time for the boot cpu so... be careful to not redo what is + * already done. Also note that the fork that created the task put it + * in the ready queue, so we need to take it out, except the initial cpus + * task was not created by a fork. No matter, the removal code works even + * then. + * We give the idle task prioity -1 to keep it out of the way of tasks + * that have real work to do. + */ +extern unsigned long wait_init_idle; + +void __init init_idle(void) +{ + struct schedule_data * sched_data; + int cpu=smp_processor_id(); + sched_data = &aligned_data[cpu].schedule_data; + + if (task_on_rq(current)) { + del_from_runqueue(current); + } + sched_data->curr = current; + sched_data->last_schedule = get_cycles(); + current->effprio = current->rt_priority = 0; + sched_data->effprio = -1; /* idle flag */ + sched_data->cpu = cpu; + clear_bit(current->processor, &wait_init_idle); +#ifdef CONFIG_SMP + if ( ! sched_data->schedule_data_list.next ) { + list_add_tail(&sched_data->schedule_data_list,&hed_cpu_prio); + } +#endif +} + +extern void init_timervecs (void); + +void __init sched_init(void) +{ + /* + * We have to do a little magic to get the first + * process right in SMP mode. + */ + int cpu = smp_processor_id(); + int nr; + int i; + + init_task.processor = cpu; + /* Init the ready queue */ + for (i=0;i<=MAX_PRI ;i++){ + INIT_LIST_HEAD(Rdy_Q_Hed(i)); + } + + + for(nr = 0; nr < PIDHASH_SZ; nr++) + pidhash[nr] = NULL; + printk("rtsched version " VERSION_DATE "\n"); + + init_timervecs(); + + init_bh(TIMER_BH, timer_bh); + init_bh(TQUEUE_BH, tqueue_bh); + init_bh(IMMEDIATE_BH, immediate_bh); + + /* + * The boot idle thread does lazy MMU switching as well: + */ + atomic_inc(&init_mm.mm_count); + enter_lazy_tlb(&init_mm, current, cpu); +} diff -urN linux-2.4.17-rc2-virgin/kernel/sched.c linux-2.4.17-rc2-wli2/kernel/sched.c --- linux-2.4.17-rc2-virgin/kernel/sched.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/sched.c Tue Dec 18 22:28:42 2001 @@ -92,6 +92,10 @@ spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ +#ifdef CONFIG_RTSCHED +extern struct task_struct *child_reaper; +#include "rtsched.h" +#else static LIST_HEAD(runqueue_head); /* @@ -373,6 +377,7 @@ { return try_to_wake_up(p, 0); } +#endif /* ifdef CONFIG_RTSCHED */ static void process_timeout(unsigned long __data) { @@ -458,7 +463,7 @@ out: return timeout < 0 ? 0 : timeout; } - +#ifndef CONFIG_RTSCHED /* * schedule_tail() is getting called from the fork return path. This * cleans up all remaining scheduler things, without impacting the @@ -491,7 +496,7 @@ task_lock(prev); task_release_cpu(prev); mb(); - if (prev->state == TASK_RUNNING) + if (task_on_runqueue(prev)) goto needs_resched; out_unlock: @@ -521,7 +526,7 @@ goto out_unlock; spin_lock_irqsave(&runqueue_lock, flags); - if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev)) + if (task_on_runqueue(prev) && !task_has_cpu(prev)) reschedule_idle(prev); spin_unlock_irqrestore(&runqueue_lock, flags); goto out_unlock; @@ -534,6 +539,7 @@ asmlinkage void schedule_tail(struct task_struct *prev) { __schedule_tail(prev); + preempt_enable(); } /* @@ -556,6 +562,8 @@ spin_lock_prefetch(&runqueue_lock); + preempt_disable(); + if (!current->active_mm) BUG(); need_resched_back: prev = current; @@ -583,6 +591,9 @@ move_last_runqueue(prev); } +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() & PREEMPT_ACTIVE) goto treat_like_run; +#endif switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { @@ -593,6 +604,9 @@ del_from_runqueue(prev); case TASK_RUNNING:; } +#ifdef CONFIG_PREEMPT + treat_like_run: +#endif prev->need_resched = 0; /* @@ -701,8 +715,10 @@ reacquire_kernel_lock(current); if (current->need_resched) goto need_resched_back; + preempt_enable_no_resched(); return; } +#endif /* ifndef CONFIG_RTSCHED */ /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything @@ -897,7 +913,7 @@ tsk = find_task_by_pid(pid); return tsk; } - +#ifndef CONFIG_RTSCHED static int setscheduler(pid_t pid, int policy, struct sched_param *param) { @@ -967,6 +983,7 @@ out_nounlock: return retval; } +#endif /* ifndef CONFIG_RTSCHED */ asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param *param) @@ -979,6 +996,34 @@ return setscheduler(pid, -1, param); } +#ifdef CONFIG_PREEMPT + +#ifdef CONFIG_SMP +#define lock_to_this_cpu() \ + unsigned long old_cpus_allowed = current->cpus_allowed; \ + current->cpus_allowed = 1UL << smp_processor_id() +#define restore_cpus_allowed() current->cpus_allowed = old_cpus_allowed +#else +#define lock_to_this_cpu() +#define restore_cpus_allowed() +#endif /* !CONFIG_SMP */ + +asmlinkage void preempt_schedule(void) +{ + while (current->need_resched) { + /* it would be ideal not to lock tasks to their cpu here, + * but only around the data that needs such locking */ + lock_to_this_cpu(); + current->preempt_count += PREEMPT_ACTIVE + 1; + barrier(); + schedule(); + current->preempt_count -= PREEMPT_ACTIVE + 1; + barrier(); + restore_cpus_allowed(); + } +} +#endif /* CONFIG_PREEMPT */ + asmlinkage long sys_sched_getscheduler(pid_t pid) { struct task_struct *p; @@ -1030,6 +1075,7 @@ return retval; } +#ifndef CONFIG_RTSCHED asmlinkage long sys_sched_yield(void) { /* @@ -1070,7 +1116,7 @@ } return 0; } - +#endif /* ifndef CONFIG_RTSCHED */ asmlinkage long sys_sched_get_priority_max(int policy) { int ret = -EINVAL; @@ -1078,7 +1124,7 @@ switch (policy) { case SCHED_FIFO: case SCHED_RR: - ret = 99; + ret = MAX_PRI; break; case SCHED_OTHER: ret = 0; @@ -1297,6 +1343,7 @@ atomic_inc(¤t->files->count); } +#ifndef CONFIG_RTSCHED extern unsigned long wait_init_idle; void __init init_idle(void) @@ -1342,3 +1389,4 @@ atomic_inc(&init_mm.mm_count); enter_lazy_tlb(&init_mm, current, cpu); } +#endif /* ifndef CONFIG_RTSCHED */ diff -urN linux-2.4.17-rc2-virgin/kernel/sysctl.c linux-2.4.17-rc2-wli2/kernel/sysctl.c --- linux-2.4.17-rc2-virgin/kernel/sysctl.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/sysctl.c Tue Dec 18 22:28:42 2001 @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -256,10 +257,16 @@ {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug", &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec}, #endif +#ifdef CONFIG_KDB + {KERN_KDB, "kdb", &kdb_on, sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif /* CONFIG_KDB */ {0} }; static ctl_table vm_table[] = { + {VM_FREEPG, "freepages", + &freepages, sizeof(freepages_t), 0644, NULL, &proc_dointvec}, {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &bdflush_min, &bdflush_max}, @@ -271,6 +278,8 @@ &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec}, {VM_PAGE_CLUSTER, "page-cluster", &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_MAX_MAP_COUNT, "max_map_count", + &max_map_count, sizeof(int), 0644, NULL, &proc_dointvec}, {VM_MIN_READAHEAD, "min-readahead", &vm_min_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, {VM_MAX_READAHEAD, "max-readahead", diff -urN linux-2.4.17-rc2-virgin/kernel/timer.c linux-2.4.17-rc2-wli2/kernel/timer.c --- linux-2.4.17-rc2-virgin/kernel/timer.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/timer.c Tue Dec 18 22:28:42 2001 @@ -583,7 +583,15 @@ update_one_process(p, user_tick, system, cpu); if (p->pid) { +#ifdef CONFIG_RTSCHED + /* SCHED_FIFO and the idle(s) have counters set to -100, + * so we won't count them, seems like a good idea for + * both schedulers, but, being pure... + */ + if (p->counter >= 0 && --p->counter <= 0) { +#else if (--p->counter <= 0) { +#endif p->counter = 0; p->need_resched = 1; } diff -urN linux-2.4.17-rc2-virgin/kernel/user.c linux-2.4.17-rc2-wli2/kernel/user.c --- linux-2.4.17-rc2-virgin/kernel/user.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/kernel/user.c Tue Dec 18 22:28:42 2001 @@ -19,7 +19,14 @@ #define UIDHASH_BITS 8 #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) ^ uid) & UIDHASH_MASK) + +/* + * hash function borrowed from Chuck Lever's paper + * The effects of this replacement have not been measured. + * -- wli + */ +#define __uidhashfn(uid) \ + (((2654435761UL*(uid)) >> (BITS_PER_LONG-UIDHASH_BITS)) & UIDHASH_MASK) #define uidhashentry(uid) (uidhash_table + __uidhashfn(uid)) static kmem_cache_t *uid_cachep; diff -urN linux-2.4.17-rc2-virgin/lib/dec_and_lock.c linux-2.4.17-rc2-wli2/lib/dec_and_lock.c --- linux-2.4.17-rc2-virgin/lib/dec_and_lock.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/lib/dec_and_lock.c Tue Dec 18 22:28:42 2001 @@ -1,5 +1,6 @@ #include #include +#include #include /* diff -urN linux-2.4.17-rc2-virgin/mm/Makefile linux-2.4.17-rc2-wli2/mm/Makefile --- linux-2.4.17-rc2-virgin/mm/Makefile Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/mm/Makefile Tue Dec 18 22:39:09 2001 @@ -9,12 +9,12 @@ O_TARGET := mm.o -export-objs := shmem.o filemap.o +export-objs := shmem.o filemap.o page_alloc.o obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ - shmem.o + shmem.o rmap.o obj-$(CONFIG_HIGHMEM) += highmem.o diff -urN linux-2.4.17-rc2-virgin/mm/TODO linux-2.4.17-rc2-wli2/mm/TODO --- linux-2.4.17-rc2-virgin/mm/TODO Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/mm/TODO Tue Dec 18 22:28:42 2001 @@ -0,0 +1,31 @@ + VM TODO list + +Forever valid TODO entries: + - keep up with the official kernel + - port over bugfixes + - minimise the diff by keeping code in sync, where possible + +Easy short-term features: + - reclaim swap space from refill_inactive() + - simplify SMP locking + - replace foo()/foo_pgd()/foo_pmd()/foo_pte() stuff with + one single function using a for_each_pte() macro + for_each_pte(ptep, mm, start_address, end_address) + - stronger drop behind / unused object dropping, all the way + to the far end of the inactive list + - per-zone active/inactive list (wli) + - fix page_launder() to not eat horrible amounts of CPU or flush + all pages to disk at once + - better VM balancing, clean vs. dirty ratio + +Long-term features: + - extensive VM statistics + - IO clustering for page_launder() and sync_old_buffers() + - readahead on per-VMA level (+ drop behind?) + - more graceful degradation when the load gets high + - reducing readahead + - unfair pageout so not all apps fall over + - memory objects, using pagecache and tmpfs for storage so + the memory object itself doesn't introduce any new overhead + - using the memory objects, removing page table copying from fork() + - load control able to deal with really extreme loads, swapping diff -urN linux-2.4.17-rc2-virgin/mm/bootmem.c linux-2.4.17-rc2-wli2/mm/bootmem.c --- linux-2.4.17-rc2-virgin/mm/bootmem.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/mm/bootmem.c Tue Dec 18 22:28:42 2001 @@ -3,8 +3,9 @@ * * Copyright (C) 1999 Ingo Molnar * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 + * Segment tree memory reservation system, William Irwin, IBM, Oct 2001 * - * simple boot-time physical memory area allocator and + * Simple boot-time physical memory area allocator and * free memory collector. It's used to deal with reserved * system memory and memory holes as well. */ @@ -17,40 +18,192 @@ #include #include #include -#include +#include /* - * Access to this subsystem has to be serialized externally. (this is - * true for the boot process anyway) + * Design notes: + * + * This design was arrived at by considering four principal concerns, + * beyond properly representing discontiguous memory machines: + * + * (1) Machines on which the physical address space is highly fragmented. + * (2) Machines where nodes' memory fragments may be interleaved. + * (3) Machines whose physical address space layouts are irregular. + * (4) Machines requiring heavy boot-time memory reservation activity. + * + * These design concerns led to an implementation which represented + * available physical memory explicitly in terms of intervals to save + * space and also one utilizing an efficient search structure. These + * design concerns may not be universally important; however, small + * benefits should be seen even on low-memory machines, or machines + * without significant boot-time memory reservation activity. + * + * Concern (3) is perhaps the principal concern. In this situation, + * there is very little prior knowledge of memory range to node + * mappings, so perhaps a large portion of the work the bootmem + * allocator is intended to do must be done "up front" when bitmaps + * associated with memory ranges are used to represent availability + * information. While it is possible to use bitmaps for that purpose, + * it is my belief that the reduced space overhead of the segment + * trees and the obliviousness of their storage management with + * respect to the address ranges they represent is advantageous. + * + * In order to motivate how (2) is addressed, the notion of + * "residency" is useful. When a memory range is associated with + * a node, only a certain portion of it is actually available. + * the ratio of available memory to the size of the memory range + * being tracked, sizeof(available memory)/sizeof(memory in map), + * is what I call the residency of the range. When the map of the + * available memory requires a contiguous range of memory that is + * a larger proportion of the range of memory being tracked than + * the residency of that range, then the algorithm can no longer + * properly function. + * So to address that, a representation has been chosen which does + * not grow with the size of the range of memory being represented. + * The residency requirements of the bitmap-based representation + * are 1/(8*sizeof(page)) on byte addressed machines. But the range + * set representation has no specific residency requirements. + * Segment pools need not be drawn from a contiguous range of memory + * larger than the combined size of a header for tracking all the + * segment pools and the size of a single range structure. Dynamic + * addition of segment pools is not implemented here yet. + */ + +/* + * Access to this subsystem has to be serialized externally. (This is + * true for the boot process anyway.) + */ + +/* + * Alignment has to be a power of 2 value. + * These macros abstract out common address calculations for alignments. + */ +#define RND_DN(x,n) ((x) & ~((n)-1)) +#define RND_UP(x,n) RND_DN((x) + (n) - 1, n) +#define DIV_DN(x,n) ((x) / (n)) +#define DIV_UP(x,n) DIV_DN((x) + ((n) - 1), n) + +/* + * The highest and lowest page frame numbers on the system. + * These refer to physical addresses backed by memory regardless + * of runtime availability. */ unsigned long max_low_pfn; unsigned long min_low_pfn; -/* return the number of _pages_ that will be allocated for the boot bitmap */ -unsigned long __init bootmem_bootmap_pages (unsigned long pages) +/* + * This is a poor choice of random seeds for deterministic + * behavior during debugging. Oddly enough it does not seem + * to damage the structure of the trees. + */ +static unsigned long __initdata random_seed = 1UL; + +/* + * Park-Miller random number generator, using Schrage's + * technique for overflow handling. + */ +static unsigned long __init rand(void) { - unsigned long mapsize; + unsigned long a = 16807; + unsigned long q = 12773; + unsigned long r = 2386; + unsigned long k; + + k = random_seed / q; + random_seed = a*(random_seed - k*q) - r*k; + return random_seed; +} - mapsize = (pages+7)/8; - mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; - mapsize >>= PAGE_SHIFT; +/* + * Initialize the segment pool, which occupies node_bootmem_map. + * This is the memory from which the tree nodes tracking available + * memory are allocated. + */ +static void __init segment_pool_init(bootmem_data_t *bdata) +{ + unsigned k; + segment_buf_t *segment_pool = (segment_buf_t *)bdata->node_bootmem_map; - return mapsize; + for(k = 0; k < NR_SEGMENTS - 1; ++k) + segment_pool[k].next = &segment_pool[k+1]; + segment_pool[NR_SEGMENTS-1].next = NULL; + bdata->free_segments = segment_pool; +} + +/* + * Allocates a tree node from a node's segment pool, initializing the + * whole of the memory block to zeroes. + */ +static segment_tree_node_t * __init segment_alloc(bootmem_data_t *bdata) +{ + segment_tree_node_t *tmp = (segment_tree_node_t *)bdata->free_segments; + + if(!bdata->free_segments) + return NULL; + + bdata->free_segments = bdata->free_segments->next; + memset(tmp, 0, sizeof(segment_tree_node_t)); + return tmp; +} + +/* + * Convenience operation to insert a tree node into both + * of the segment trees associated with a node. The randomized + * priorities are used here. + */ +static void __init segment_insert(segment_tree_root_t *root, + segment_tree_node_t *node) +{ + node->start.priority = rand(); + node->length.priority = rand(); + treap_insert(&root->start_tree, &node->start); + treap_insert(&root->length_tree, &node->length); +} + +/* + * Returns a segment tree node to the node-local pool of available + * tree nodes. + */ +static void __init segment_free(bootmem_data_t *bdata, + segment_tree_node_t *node) +{ + segment_buf_t *tmp; + + if(!node) + return; + + tmp = (segment_buf_t *)node; + tmp->next = bdata->free_segments; + bdata->free_segments = tmp; +} + +/* + * Return the number of _pages_ that will be allocated for the bootmem + * segment pool. Its sole purpose is to warn callers of the bootmem + * interface in advance of its size, so that a suitably large range of + * physical memory may be found to hold it. + */ +unsigned long __init bootmem_bootmap_pages (unsigned long pages) +{ + return DIV_UP(NR_SEGMENTS*sizeof(segment_buf_t),PAGE_SIZE); } /* * Called once to set up the allocator itself. + * Its responsibilities are manipulate the bootmem_data_t within + * a node, initializing its address range and node-local segment + * pool fields. It is supposed to calculate the amount of memory + * required for the node_bootmem_map, but this is not possible + * without a change of interface. */ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, unsigned long mapstart, unsigned long start, unsigned long end) { bootmem_data_t *bdata = pgdat->bdata; - unsigned long mapsize = ((end - start)+7)/8; pgdat->node_next = pgdat_list; pgdat_list = pgdat; - mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL); bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); bdata->node_boot_start = (start << PAGE_SHIFT); bdata->node_low_pfn = end; @@ -59,300 +212,701 @@ * Initially all pages are reserved - setup_arch() has to * register free RAM areas explicitly. */ - memset(bdata->node_bootmem_map, 0xff, mapsize); + bdata->segment_tree.start_tree = NULL; + bdata->segment_tree.length_tree = NULL; + segment_pool_init(bdata); - return mapsize; + return RND_UP(NR_SEGMENTS*sizeof(segment_buf_t), PAGE_SIZE); } /* - * Marks a particular physical memory range as unallocatable. Usable RAM - * might be used for boot-time allocations - or it might get added - * to the free page pool later on. + * reserve_bootmem_core marks a particular segment of physical + * memory as unavailable. Available memory might be used for boot-time + * allocations, or it might be made available again later on. + * + * Its behavior is to mark the specified range of physical memory + * as unavailable, irrespective of alignment constraints (in contrast + * to prior incarnations, which page-aligned the starting and ending + * addresses of the unavailable interval of memory). */ -static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init reserve_bootmem_core(bootmem_data_t *bdata, + unsigned long addr, unsigned long size) { - unsigned long i; + unsigned long start; + unsigned long end; + segment_tree_node_t split_segment, segment; + segment_tree_node_t reserved_left, reserved_right; + segment_tree_node_t *multiple_left, *multiple_right; + treap_node_t *tmp, *parent, *intersect; + /* - * round up, partially reserved pages are considered - * fully reserved. + * Round up, partially reserved pages are considered fully reserved. */ - unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; - unsigned long eidx = (addr + size - bdata->node_boot_start + - PAGE_SIZE-1)/PAGE_SIZE; - unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; + start = addr; + end = start + size - 1; - if (!size) BUG(); + segment_set_endpoints(&segment, start, end); - if (sidx < 0) - BUG(); - if (eidx < 0) - BUG(); - if (sidx >= eidx) - BUG(); - if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn) - BUG(); - if (end > bdata->node_low_pfn) - BUG(); - for (i = sidx; i < eidx; i++) - if (test_and_set_bit(i, bdata->node_bootmem_map)) - printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); + segment_all_intersect(&bdata->segment_tree.start_tree, + start, end, &intersect); + + /* + * If the set of intersecting intervals is empty, report + * the entire interval as multiply-reserved. Then the + * condition of the loop ensures a proper exit will follow. + */ + if(!intersect) + printk(KERN_WARNING "the interval [%lu, %lu] " + "was multiply reserved (!intersect)\n", + segment_start(&segment), + segment_end(&segment)); + + /* + * For error-checking, this must be called only for a single + * node per reservation. The next step in strict error checking + * would be to track the fragments of the interval to reserve + * that do not lie within any available interval and then report + * them as multiply-reserved. + * + * Unfortunately, error checking that way appears to require + * unbounded allocations in order to maintain the set of multiply + * reserved intervals, so it is not entirely robust. + * + * For the moment, a cruder form of error checking is done: + * if the available interval does not contain the interval + * to be reserved, then the complement of the reserved + * interval with respect to the available interval is reported + * as multiply reserved. This may multiply report multiply + * reserved ranges, but it is still less verbose than the + * mechanism used in the bitmap-based allocator. + */ + + /* + * Destructive post-order traversal of the set of + * intersecting intervals. + */ + tmp = intersect; + treap_find_leftmost_leaf(tmp); + while(tmp) { + segment_tree_node_t *fragment = &split_segment; + segment_tree_node_t *avail = start_segment_treap(tmp); + treap_find_parent_and_remove_child(tmp, parent); + + multiple_left = &reserved_left; + multiple_right = &reserved_right; + + if(!segment_contains(avail, &segment)) { + segment_set_endpoints(multiple_left, + segment_start(&segment), + segment_end(&segment)); + segment_complement(&multiple_left, avail, + &multiple_right); + if(multiple_left) + printk(KERN_WARNING "the interval [%lu, %lu] " + " was multiply reserved (left)\n", + segment_start(multiple_left), + segment_end(multiple_left)); + if(multiple_right) + printk(KERN_WARNING "the interval [%lu, %lu] " + " was multiply reserved (right)\n", + segment_start(multiple_right), + segment_end(multiple_right)); + } + + if(!treap_root_delete(segment_length_link(tmp))) + treap_root_delete(&bdata->segment_tree.length_tree); + + segment_complement(&avail, &segment, &fragment); + + if(!avail) + segment_free(bdata, start_segment_treap(tmp)); + else + segment_insert(&bdata->segment_tree, avail); + + if(fragment) { + + avail = segment_alloc(bdata); + + if(!avail) + BUG(); + + segment_set_endpoints(avail, segment_start(fragment), + segment_end(fragment)); + segment_insert(&bdata->segment_tree, avail); + } + + tmp = parent; + treap_find_leftmost_leaf(tmp); + } } -static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +/* + * free_bootmem_core marks a particular segment of the physical + * address space as available. Its semantics are to make the range + * of addresses available, irrespective of alignment constraints. + */ +static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { - unsigned long i; - unsigned long start; + unsigned long start, end; + segment_tree_node_t segment, *avail, intersection, freed; + treap_node_t *tmp, *parent, *intersect = NULL; + + start = addr; + end = start + size - 1; + + segment_set_endpoints(&segment, start, end); + segment_set_endpoints(&freed, start, end); + + segment_all_intersect(&bdata->segment_tree.start_tree, + start ? start - 1 : start, end + 1, &intersect); + /* - * round down end of usable mem, partially free pages are - * considered reserved. + * Error checking here is simple: + * If the available segment and the segment being freed truly + * intersect, their intersection should be reported as multiply + * made available. */ - unsigned long sidx; - unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; - unsigned long end = (addr + size)/PAGE_SIZE; - - if (!size) BUG(); - if (end > bdata->node_low_pfn) - BUG(); /* - * Round up the beginning of the address. + * Destructive post-order traversal of the set of intervals + * intersecting with the freed interval expanded by one. This + * provides for merging of available intervals, as all the + * adjacent intervals are united with newly available interval. */ - start = (addr + PAGE_SIZE-1) / PAGE_SIZE; - sidx = start - (bdata->node_boot_start/PAGE_SIZE); + tmp = intersect; + treap_find_leftmost_leaf(tmp); + while(tmp) { + + avail = start_segment_treap(tmp); + treap_find_parent_and_remove_child(tmp, parent); + + if(segment_intersect(&freed, avail)) { + segment_intersection(&intersection, &freed, avail); + printk(KERN_WARNING "the interval [%lu, %lu] " + "was multiply made available\n", + segment_start(&intersection), + segment_end(&intersection)); + } - for (i = sidx; i < eidx; i++) { - if (!test_and_clear_bit(i, bdata->node_bootmem_map)) - BUG(); + segment_unite(&segment, avail); + + if(!treap_root_delete(segment_length_link(tmp))) + treap_root_delete(&bdata->segment_tree.length_tree); + + segment_free(bdata, avail); + + tmp = parent; + treap_find_leftmost_leaf(tmp); } + + avail = segment_alloc(bdata); + if(!avail) + BUG(); + + segment_set_endpoints(avail, segment_start(&segment), + segment_end(&segment)); + + segment_insert(&bdata->segment_tree, avail); } /* - * We 'merge' subsequent allocations to save space. We might 'lose' - * some fraction of a page if allocations cannot be satisfied due to - * size constraints on boxes where there is physical RAM space - * fragmentation - in these cases * (mostly large memory boxes) this - * is not a problem. + * The terms are borrowed from linear programming. + * A feasible line segment is one which contains a subinterval + * aligned on the appropriate boundary of sufficient length. + * + * The objective function is the magnitude of the least residue + * of the smallest aligned address within the subinterval minus the goal + * mod the largest page frame number. A conditional is used instead of + * of remainder so as to avoid the overhead of division. * - * On low memory boxes we get it right in 100% of the cases. + * The idea here is to iterate over the feasible set and minimize + * the objective function (by exhaustive search). The search space + * is "thinned" prior to the iteration by using the heuristic that + * the interval must be at least of the length requested, though + * that is not sufficient because of alignment constraints. */ +#define FEASIBLE(seg, len, align) \ +( \ + (segment_end(seg) >= RND_UP(segment_start(seg), align)) \ + && \ + ((segment_end(seg) - RND_UP(segment_start(seg), align)) > (len))\ +) + +#define STARTS_BELOW(seg,goal,align,len) \ + (RND_UP(segment_start(seg), align) <= (goal)) + +#define ENDS_ABOVE(seg, goal, align, len) \ + ((segment_end(seg) > (goal)) && ((segment_end(seg) - (goal)) > (len))) + +#define GOAL_WITHIN(seg,goal,align,len) \ + (STARTS_BELOW(seg,goal,align,len) && ENDS_ABOVE(seg,goal,align,len)) + +#define GOAL_ABOVE(seg, goal, align) \ + ((goal) > segment_end(seg)) + +#define DISTANCE_BELOW(seg, goal, align) \ + (segment_start(seg) - (goal)) + +#define DISTANCE_ABOVE(seg, goal, align) \ + (((ULONG_MAX - (goal)) + 1) + segment_start(seg)) + +#define OBJECTIVE(seg, goal, align, len) \ +( GOAL_WITHIN(seg,goal,align,len) \ + ? 0UL \ + : ( \ + GOAL_ABOVE(seg, goal, align) \ + ? DISTANCE_ABOVE(seg, goal, align) \ + : DISTANCE_BELOW(seg, goal, align) \ + ) \ +) + +#define UNVISITED 0 +#define LEFT_SEARCHED 1 +#define RIGHT_SEARCHED 2 +#define VISITED 3 + /* - * alignment has to be a power of 2 value. + * __alloc_bootmem_core attempts to satisfy reservation requests + * of a certain size with alignment constraints, so that the beginning + * of the allocated line segment is as near as possible to the goal + * in the following sense: + * + * The beginning of the allocated line segment is either the lowest + * possible address above the goal, or the lowest possible address + * overall. This actually has a simple notion of distance, namely + * (goal - start) % (MAX_ADDR + 1). The OBJECTIVE macros measures + * this distance, albeit with some arithmetic complications. + * + * The algorithm proceeds as follows: + * (1) Divide the set of available intervals into those which are + * long enough and those which are not long enough, ignoring + * alignment constraints. + * (2) Perform depth-first search over the tree of supposedly + * long enough intervals for the best possible interval. + * + * The FEASIBLE macro is used to determine whether it is truly + * possible to place an aligned interval of sufficient length + * within the interval, and it is needed because the true length + * of the interval is not sufficient to determine that, and + * because it is not truly possible to subdivide the set of available + * intervals according to this criterion with pure tree operations. + * + * As address ranges are the granularity of available interval tracking, + * this should provide optimal merging behavior. */ + static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, unsigned long size, unsigned long align, unsigned long goal) { - unsigned long i, start = 0; + unsigned long length; + segment_tree_node_t left_half, right_half, reserved, *left, *right; + segment_tree_node_t *optimum, *node; + treap_node_t *tmp, *infeasible, *feasible; void *ret; - unsigned long offset, remaining_size; - unsigned long areasize, preferred, incr; - unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >> - PAGE_SHIFT); - if (!size) BUG(); + feasible = infeasible = NULL; - if (align & (align-1)) + if(!align) + align = 1; + + length = size; + if(!length) BUG(); - offset = 0; - if (align && - (bdata->node_boot_start & (align - 1UL)) != 0) - offset = (align - (bdata->node_boot_start & (align - 1UL))); - offset >>= PAGE_SHIFT; - - /* - * We try to allocate bootmem pages above 'goal' - * first, then we try to allocate lower pages. - */ - if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { - preferred = goal - bdata->node_boot_start; - } else - preferred = 0; - - preferred = ((preferred + align - 1) & ~(align - 1)) >> PAGE_SHIFT; - preferred += offset; - areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; - incr = align >> PAGE_SHIFT ? : 1; - -restart_scan: - for (i = preferred; i < eidx; i += incr) { - unsigned long j; - if (test_bit(i, bdata->node_bootmem_map)) + treap_split(&bdata->segment_tree.length_tree, length, &infeasible, + &feasible); + optimum = NULL; + + tmp = feasible; + while(tmp) { + + if(tmp->marker == UNVISITED) { + if(tmp->left) { + tmp->marker = LEFT_SEARCHED; + tmp = tmp->left; + continue; + } else if(tmp->right) { + tmp->marker = RIGHT_SEARCHED; + tmp = tmp->right; + continue; + } else + tmp->marker = VISITED; + } else if(tmp->marker == LEFT_SEARCHED) { + if(tmp->right) { + tmp->marker = RIGHT_SEARCHED; + tmp = tmp->right; + continue; + } else + tmp->marker = VISITED; + } else if(tmp->marker == RIGHT_SEARCHED) + tmp->marker = VISITED; + else if(tmp->marker == VISITED) { + tmp->marker = UNVISITED; + tmp = tmp->parent; continue; - for (j = i + 1; j < i + areasize; ++j) { - if (j >= eidx) - goto fail_block; - if (test_bit (j, bdata->node_bootmem_map)) - goto fail_block; - } - start = i; - goto found; - fail_block:; + } else + BUG(); + + if(!tmp) + break; + + node = length_segment_treap(tmp); + + if(!optimum && FEASIBLE(node, length, align)) + + optimum = node; + + else if(FEASIBLE(node, length, align) + && (OBJECTIVE(node, goal, align, length) + < OBJECTIVE(optimum, goal, align, length))) + + optimum = node; + } - if (preferred) { - preferred = offset; - goto restart_scan; + + /* + * Restore the set of available intervals keyed by length, + * taking into account the need to remove the optimum from + * the set if it has been determined. + */ + if(!optimum) { + treap_join(&bdata->segment_tree.length_tree, &feasible, + &infeasible); + return NULL; } - return NULL; -found: - if (start >= eidx) - BUG(); + + if(!treap_root_delete(treap_node_link(&optimum->start))) + treap_root_delete(&bdata->segment_tree.start_tree); + + if(!treap_root_delete(treap_node_link(&optimum->length))) + treap_root_delete(&feasible); + + treap_join(&bdata->segment_tree.length_tree, &infeasible, &feasible); /* - * Is the next page of the previous allocation-end the start - * of this allocation's buffer? If yes then we can 'merge' - * the previous partial page with this allocation. - */ - if (align <= PAGE_SIZE - && bdata->last_offset && bdata->last_pos+1 == start) { - offset = (bdata->last_offset+align-1) & ~(align-1); - if (offset > PAGE_SIZE) + * Now the iteration has converged to the optimal feasible interval. + * Within that interval we must now choose a subinterval + * satisfying the alignment constraints and do the appropriate + * splitting of the interval from which it was drawn. + */ + + segment_set_endpoints(&reserved, goal, goal + length - 1); + + if(!segment_contains_point(optimum, goal) + || !segment_contains(optimum, &reserved)) + + segment_set_endpoints(&reserved, + RND_UP(segment_start(optimum), align), + RND_UP(segment_start(optimum),align)+length-1); + + segment_set_endpoints(&left_half, segment_start(optimum), + segment_end(optimum)); + + left = &left_half; + right = &right_half; + segment_complement(&left, &reserved, &right); + + if(!left && !right) + segment_free(bdata, optimum); + + if(left) { + segment_set_endpoints(optimum, segment_start(left), + segment_end(left)); + segment_insert(&bdata->segment_tree, optimum); + } + + if(right) { + segment_tree_node_t *segment = segment_alloc(bdata); + if(!segment) BUG(); - remaining_size = PAGE_SIZE-offset; - if (size < remaining_size) { - areasize = 0; - // last_pos unchanged - bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); - } else { - remaining_size = size - remaining_size; - areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); - bdata->last_pos = start+areasize-1; - bdata->last_offset = remaining_size; - } - bdata->last_offset &= ~PAGE_MASK; - } else { - bdata->last_pos = start + areasize - 1; - bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); + segment_set_endpoints(segment, segment_start(right), + segment_end(right)); + segment_insert(&bdata->segment_tree, segment); } + /* - * Reserve the area now: + * Convert the physical address to a kernel virtual address, + * zero out the memory within the interval, and return it. */ - for (i = start; i < start+areasize; i++) - if (test_and_set_bit(i, bdata->node_bootmem_map)) - BUG(); + ret = (void *)(phys_to_virt(segment_start(&reserved))); memset(ret, 0, size); + return ret; } +/* + * free_all_bootmem_core's responsibilities are to initialize the + * node_mem_map array of struct page with the availability information + * regarding physical memory, and to make available the memory the + * bootmem allocator itself used for tracking available physical memory. + * Here the prior behavior with respect to page alignment is emulated + * by reducing the granularity of the address ranges to page frames, + * using the conservative approximation of the largest page-aligned + * interval lying within the interval seen to be available, or making + * no memory available if the interval is smaller than a page in length. + */ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) { - struct page *page = pgdat->node_mem_map; - bootmem_data_t *bdata = pgdat->bdata; - unsigned long i, count, total = 0; - unsigned long idx; + unsigned long total = 0UL, mapstart, start, end; + unsigned long node_start = pgdat->bdata->node_boot_start >> PAGE_SHIFT; + struct page *page; + treap_node_t *parent, *tmp; - if (!bdata->node_bootmem_map) BUG(); + mapstart = virt_to_phys(pgdat->bdata->node_bootmem_map); - count = 0; - idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); - for (i = 0; i < idx; i++, page++) { - if (!test_bit(i, bdata->node_bootmem_map)) { - count++; - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - } - } - total += count; +#ifdef DEBUG_BOOTMEM + + printk("Available physical memory:\n"); + +#endif /* DEBUG_BOOTMEM */ + + free_bootmem_core(pgdat->bdata, mapstart, + RND_UP(NR_SEGMENTS*sizeof(segment_buf_t), PAGE_SIZE)); /* - * Now free the allocator bitmap itself, it's not - * needed anymore: + * Destructive post-order traversal of the length tree. + * The tree is never used again, so no attempt is made + * to restore it to working order. */ - page = virt_to_page(bdata->node_bootmem_map); - count = 0; - for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { - count++; - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); + tmp = pgdat->bdata->segment_tree.length_tree; + treap_find_leftmost_leaf(tmp); + while(tmp) { + segment_tree_node_t *segment = length_segment_treap(tmp); + + /* + * This calculation differs from that in prior + * incarnations in this subsystem, so I describe it + * in passing detail here. + * + ******************************************************* + * + * We have start so that start is the least pfn with + * + * PAGE_SIZE * start >= segment_start(segment) + * + * so after division and ceiling: + * + * start = DIV_UP(segment_start(segment), PAGE_SIZE) + * + ******************************************************* + * + * Now the last pfn is the greatest pfn such that + * + * PAGE_SIZE * last + PAGE_SIZE - 1 <= segment_end(segment) + * + * -or- + * + * PAGE_SIZE * (last + 1) <= segment_end(segment) + 1 + * + * giving us after division and flooring: + * + * last + 1 = DIV_DN(segment_end(segment) + 1, PAGE_SIZE) + * + * or using end as a -strict- upper bound (i.e. end > pfn), + * we have + * + * end = DIV_DN(segment_end(segment) + 1, PAGE_SIZE) + * + */ + + start = DIV_UP(segment_start(segment), PAGE_SIZE); + end = DIV_DN(segment_end(segment) + 1, PAGE_SIZE); + +#ifdef DEBUG_BOOTMEM + + if(start < end) + printk("available segment: [%lu,%lu]\n", + start * PAGE_SIZE, + end * PAGE_SIZE - 1); + +#endif /* DEBUG_BOOTMEM */ + + for( page = pgdat->node_mem_map + (start - node_start); + page < pgdat->node_mem_map + (end - node_start); + ++page) { + + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + + /* + * In most calculations in this file, closed intervals + * are considered. In this instance, a half-open interval + * is being considered, and so the usual end - start + 1 + * calculation does not apply. + */ + if(start < end) + total += end - start; + + treap_find_parent_and_remove_child(tmp, parent); + tmp = parent; + treap_find_leftmost_leaf(tmp); } - total += count; - bdata->node_bootmem_map = NULL; return total; } -unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) +/* + * Wrappers around the core routines so that they operate on the + * per-node memory structures (pg_data_t *pgdat). + */ +unsigned long __init init_bootmem_node (pg_data_t *pgdat, + unsigned long freepfn, + unsigned long startpfn, + unsigned long endpfn) { - return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); + return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); } -void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { reserve_bootmem_core(pgdat->bdata, physaddr, size); } -void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) +{ + void *ptr; + + ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); + if(ptr) + return ptr; + + printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); + panic("Out of memory"); + return NULL; +} + +void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { - return(free_bootmem_core(pgdat->bdata, physaddr, size)); + free_bootmem_core(pgdat->bdata, physaddr, size); } unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) { - return(free_all_bootmem_core(pgdat)); + return free_all_bootmem_core(pgdat); } +/* + * Non-node-aware wrappers for the core routines. The per-node + * structures are hidden by using the global variable contig_page_data. + */ unsigned long __init init_bootmem (unsigned long start, unsigned long pages) { max_low_pfn = pages; min_low_pfn = start; - return(init_bootmem_core(&contig_page_data, start, 0, pages)); + return init_bootmem_core(&contig_page_data, start, 0, pages); } -void __init reserve_bootmem (unsigned long addr, unsigned long size) +/* + * In multinode configurations it is not desirable to make memory + * available without information about the node assignment of the + * memory range, so even though reserve_bootmem() may operate + * without node information this cannot. + * + * This apparent inconsistency in the interface actually makes + * some sense, as when presented with irregular node to memory range + * assignments in firmware tables, the original request to make memory + * available will be aware of its node assignment. But an outstanding + * issue is that a non-node-aware memory reservation request (via + * alloc_bootmem()) will not know to which node to return the memory. + * + * Resolving that issue would involve tracking dynamic allocations + * separately from assertions regarding the presence of physical + * memory, which is feasible given a change of interface, or perhaps a + * separate tree in each node for memory reserved by dynamic allocations. + */ +void __init free_bootmem (unsigned long addr, unsigned long size) { - reserve_bootmem_core(contig_page_data.bdata, addr, size); + free_bootmem_core(contig_page_data.bdata, addr, size); } -void __init free_bootmem (unsigned long addr, unsigned long size) +/* + * reserve_bootmem operates without node information, yet is node + * aware. In situations where it may not be clear to where a given + * physical memory range is assigned this performs the task of + * searching the nodes on behalf of the caller. + */ +void __init reserve_bootmem (unsigned long addr, unsigned long size) { - return(free_bootmem_core(contig_page_data.bdata, addr, size)); + unsigned long start, end; + unsigned in_any_node = 0; + segment_tree_node_t segment, *tree; + pg_data_t *pgdat = pgdat_list; + + start = addr; + end = start + size - 1; + + segment_set_endpoints(&segment, start, end); + + /* + * For error checking, this must determine the node(s) within + * which an interval to be reserved lies. Otherwise, once the + * error checking is in place, the memory will be reported as + * multiply-reserved on those nodes not containing the memory. + */ + while(pgdat) { + unsigned in_node; + + tree = start_segment_treap(pgdat->bdata->segment_tree.start_tree); + in_node = segment_tree_intersects(tree, &segment); + in_any_node |= in_node; + + if(in_node) + reserve_bootmem_node(pgdat, addr, size); + + pgdat = pgdat->node_next; + } + if(!in_any_node) + printk(KERN_WARNING "the interval [%lu, %lu] " + "was multiply reserved\n", + segment_start(&segment), + segment_end(&segment)); } +/* + * free_all_bootmem is now a convenience function, and iterates over + * all the nodes, performing free_all_bootmem_core. + */ unsigned long __init free_all_bootmem (void) { - return(free_all_bootmem_core(&contig_page_data)); + pg_data_t *pgdat = pgdat_list; + unsigned long total = 0UL; + + while(pgdat) { + total += free_all_bootmem_core(pgdat); + pgdat = pgdat->node_next; + } + + return total; } -void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +/* + * __alloc_bootmem performs a search over all nodes in order to satisfy + * an allocation request, for when it is unimportant from which node + * the memory used to satisfy an allocation is drawn. + */ +void * __init __alloc_bootmem (unsigned long size, unsigned long align, + unsigned long goal) { pg_data_t *pgdat = pgdat_list; void *ptr; while (pgdat) { - if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal))) - return(ptr); - pgdat = pgdat->node_next; - } - /* - * Whoops, we cannot satisfy the allocation request. - */ - printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); - panic("Out of memory"); - return NULL; -} + ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); -void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) -{ - void *ptr; + if(ptr) + return ptr; - ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); - if (ptr) - return (ptr); + pgdat = pgdat->node_next; + } - /* - * Whoops, we cannot satisfy the allocation request. - */ printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); panic("Out of memory"); return NULL; } - diff -urN linux-2.4.17-rc2-virgin/mm/filemap.c linux-2.4.17-rc2-wli2/mm/filemap.c --- linux-2.4.17-rc2-virgin/mm/filemap.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/mm/filemap.c Tue Dec 18 22:28:42 2001 @@ -53,7 +53,7 @@ EXPORT_SYMBOL(vm_min_readahead); -spinlock_t pagecache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +spinlock_t pagecache_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock * with the pagecache_lock held. @@ -63,7 +63,7 @@ * pagemap_lru_lock -> * pagecache_lock */ -spinlock_t pagemap_lru_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; #define CLUSTER_PAGES (1 << page_cluster) #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) @@ -234,7 +234,7 @@ static void truncate_complete_page(struct page *page) { /* Leave it on the LRU if it gets converted into anonymous buffers */ - if (!page->buffers || do_flushpage(page, 0)) + if (!page->pte_chain && (!page->buffers || do_flushpage(page, 0))) lru_cache_del(page); /* @@ -296,6 +296,7 @@ page_cache_release(page); + /* we hit this with lock depth of 1 or 2 */ if (current->need_resched) { __set_current_state(TASK_RUNNING); schedule(); @@ -406,6 +407,8 @@ } page_cache_release(page); + + debug_lock_break(551); if (current->need_resched) { __set_current_state(TASK_RUNNING); schedule(); @@ -454,6 +457,11 @@ return page; } +static struct page * __find_page(struct address_space * mapping, unsigned long index) +{ + return __find_page_nolock(mapping, index, *page_hash(mapping,index)); +} + /* * By the time this is called, the page is locked and * we don't have to worry about any races any more. @@ -594,12 +602,16 @@ list_del(&page->list); list_add(&page->list, &mapping->locked_pages); - if (!PageDirty(page)) - continue; - page_cache_get(page); spin_unlock(&pagecache_lock); + /* BKL is held ... */ + debug_lock_break(1); + conditional_schedule(); + + if (!PageDirty(page)) + goto clean; + lock_page(page); if (PageDirty(page)) { @@ -607,7 +619,7 @@ writepage(page); } else UnlockPage(page); - +clean: page_cache_release(page); spin_lock(&pagecache_lock); } @@ -623,14 +635,28 @@ */ void filemap_fdatawait(struct address_space * mapping) { + DEFINE_LOCK_COUNT(); + spin_lock(&pagecache_lock); +restart: while (!list_empty(&mapping->locked_pages)) { struct page *page = list_entry(mapping->locked_pages.next, struct page, list); list_del(&page->list); list_add(&page->list, &mapping->clean_pages); - + + if (TEST_LOCK_COUNT(32)) { + RESET_LOCK_COUNT(); + debug_lock_break(2); + if (conditional_schedule_needed()) { + page_cache_get(page); + break_spin_lock_and_resched(&pagecache_lock); + page_cache_release(page); + goto restart; + } + } + if (!PageLocked(page)) continue; @@ -894,6 +920,7 @@ * the hash-list needs a held write-lock. */ repeat: + break_spin_lock(&pagecache_lock); page = __find_page_nolock(mapping, offset, hash); if (page) { page_cache_get(page); @@ -970,7 +997,53 @@ /* - * Same as grab_cache_page, but do not wait if the page is unavailable. + * We combine this with read-ahead to deactivate pages when we + * think there's sequential IO going on. Note that this is + * harmless since we don't actually evict the pages from memory + * but just move them to the inactive list. + * + * TODO: + * - make the readahead code smarter + * - move readahead to the VMA level so we can do the same + * trick with mmap() + * + * Rik van Riel, 2000 + */ +static void drop_behind(struct file * file, unsigned long index) +{ + struct inode *inode = file->f_dentry->d_inode; + struct address_space *mapping = inode->i_mapping; + struct page *page; + unsigned long start; + + /* Nothing to drop-behind if we're on the first page. */ + if (!index) + return; + + if (index > file->f_rawin) + start = index - file->f_rawin; + else + start = 0; + + /* + * Go backwards from index-1 and drop all pages in the + * readahead window. Since the readahead window may have + * been increased since the last time we were called, we + * stop when the page isn't there. + */ + spin_lock(&pagemap_lru_lock); + spin_lock(&pagecache_lock); + while (--index >= start) { + page = __find_page(mapping, index); + if (!page || !PageActive(page)) + break; + deactivate_page_nolock(page); + } + spin_unlock(&pagecache_lock); + spin_unlock(&pagemap_lru_lock); +} + +/* Same as grab_cache_page, but do not wait if the page is unavailable. * This is intended for speculative data generators, where the data can * be regenerated if the page couldn't be grabbed. This routine should * be safe to call while holding the lock for another page. @@ -1240,6 +1313,12 @@ if (filp->f_ramax > max_readahead) filp->f_ramax = max_readahead; + /* + * Move the pages that have already been passed + * to the inactive list. + */ + drop_behind(filp, index); + #ifdef PROFILE_READAHEAD profile_readahead((reada_ok == 2), filp); #endif @@ -1248,25 +1327,6 @@ return; } -/* - * Mark a page as having seen activity. - * - * If it was already so marked, move it - * to the active queue and drop the referenced - * bit. Otherwise, just mark it for future - * action.. - */ -void mark_page_accessed(struct page *page) -{ - if (!PageActive(page) && PageReferenced(page)) { - activate_page(page); - ClearPageReferenced(page); - return; - } - - /* Mark the page referenced, AFTER checking for previous usage.. */ - SetPageReferenced(page); -} /* * This is a generic file read routine, and uses the @@ -1375,7 +1435,7 @@ * beginning or we just did an lseek. */ if (!offset || !filp->f_reada) - mark_page_accessed(page); + touch_page(page); /* * Ok, we have the page, and it's up-to-date, so @@ -1492,8 +1552,8 @@ ssize_t retval; int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; struct kiobuf * iobuf; - struct address_space * mapping = filp->f_dentry->d_inode->i_mapping; - struct inode * inode = mapping->host; + struct inode * inode = filp->f_dentry->d_inode; + struct address_space * mapping = inode->i_mapping; new_iobuf = 0; iobuf = filp->f_iobuf; @@ -1774,7 +1834,7 @@ nr = max; /* And limit it to a sane percentage of the inactive list.. */ - max = nr_inactive_pages / 2; + max = nr_inactive_clean_pages / 2; if (nr > max) nr = max; @@ -1919,7 +1979,7 @@ * Found the page and have a reference on it, need to check sharing * and possibly copy it over to another page.. */ - mark_page_accessed(page); + touch_page(page); flush_page_to_ram(page); return page; @@ -2055,6 +2115,8 @@ address += PAGE_SIZE; pte++; } while (address && (address < end)); + debug_lock_break(1); + break_spin_lock(&vma->vm_mm->page_table_lock); return error; } @@ -2085,6 +2147,9 @@ address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + + debug_lock_break(1); + break_spin_lock(&vma->vm_mm->page_table_lock); return error; } @@ -2343,7 +2408,7 @@ int error = 0; /* This caps the number of vma's this process can own */ - if (vma->vm_mm->map_count > MAX_MAP_COUNT) + if (vma->vm_mm->map_count > max_map_count) return -ENOMEM; if (start == vma->vm_start) { @@ -2443,7 +2508,7 @@ if (vma->vm_flags & VM_LOCKED) return -EINVAL; - zap_page_range(vma->vm_mm, start, end - start); + zap_page_range(vma->vm_mm, start, end - start, ZPR_PARTITION); return 0; } @@ -2773,7 +2838,7 @@ page = __read_cache_page(mapping, index, filler, data); if (IS_ERR(page)) goto out; - mark_page_accessed(page); + touch_page(page); if (Page_Uptodate(page)) goto out; @@ -2970,6 +3035,7 @@ unsigned long index, offset; long page_fault; char *kaddr; + int deactivate = 1; /* * Try to find the page in the cache. If it isn't there, @@ -2978,8 +3044,10 @@ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ index = pos >> PAGE_CACHE_SHIFT; bytes = PAGE_CACHE_SIZE - offset; - if (bytes > count) + if (bytes > count) { bytes = count; + deactivate = 0; + } /* * Bring in the user page that we will copy from _first_. @@ -3023,8 +3091,11 @@ unlock: kunmap(page); /* Mark it unlocked again and drop the page.. */ - SetPageReferenced(page); UnlockPage(page); + if (deactivate) + deactivate_page(page); + else + touch_page(page); page_cache_release(page); if (status < 0) diff -urN linux-2.4.17-rc2-virgin/mm/highmem.c linux-2.4.17-rc2-wli2/mm/highmem.c --- linux-2.4.17-rc2-virgin/mm/highmem.c Tue Dec 18 23:18:03 2001 +++ linux-2.4.17-rc2-wli2/mm/highmem.c Tue Dec 18 22:28:42 2001 @@ -32,7 +32,7 @@ */ static int pkmap_count[LAST_PKMAP]; static unsigned int last_pkmap_nr; -static spinlock_t kmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED; pte_t * pkmap_page_table; diff -urN linux-2.4.17-rc2-virgin/mm/memory.c linux-2.4.17-rc2-wli2/mm/memory.c --- linux-2.4.17-rc2-virgin/mm/memory.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/memory.c Tue Dec 18 22:28:42 2001 @@ -46,6 +46,7 @@ #include #include +#include #include #include @@ -101,6 +102,7 @@ } pte = pte_offset(dir, 0); pmd_clear(dir); + pgtable_remove_rmap(pte); pte_free(pte); } @@ -235,9 +237,11 @@ if (pte_none(pte)) goto cont_copy_pte_range_noset; + /* pte contains position in swap, so copy. */ if (!pte_present(pte)) { swap_duplicate(pte_to_swp_entry(pte)); - goto cont_copy_pte_range; + set_pte(dst_pte, pte); + goto cont_copy_pte_range_noset; } ptepage = pte_page(pte); if ((!VALID_PAGE(ptepage)) || @@ -258,6 +262,7 @@ dst->rss++; cont_copy_pte_range: set_pte(dst_pte, pte); + page_add_rmap(ptepage, dst_pte); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) goto out_unlock; @@ -313,8 +318,10 @@ continue; if (pte_present(pte)) { struct page *page = pte_page(pte); - if (VALID_PAGE(page) && !PageReserved(page)) + if (VALID_PAGE(page) && !PageReserved(page)) { freed ++; + page_remove_rmap(page, ptep); + } /* This will eventually call __free_pte on the pte. */ tlb_remove_page(tlb, ptep, address + offset); } else { @@ -355,7 +362,8 @@ /* * remove user pages in a given range. */ -void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) +void do_zap_page_range(struct mm_struct *mm, unsigned long address, + unsigned long size) { mmu_gather_t *tlb; pgd_t * dir; @@ -397,16 +405,17 @@ spin_unlock(&mm->page_table_lock); } + /* * Do a quick page-table lookup for a single page. */ -static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) +static struct page * follow_page(unsigned long address, int write) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; - pgd = pgd_offset(mm, address); + pgd = pgd_offset(current->mm, address); if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; @@ -442,74 +451,21 @@ return page; } -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) -{ - int i = 0; - - do { - struct vm_area_struct * vma; - - vma = find_extend_vma(mm, start); - - if ( !vma || - (!force && - ((write && (!(vma->vm_flags & VM_WRITE))) || - (!write && (!(vma->vm_flags & VM_READ))) ) )) { - if (i) return i; - return -EFAULT; - } - - spin_lock(&mm->page_table_lock); - do { - struct page *map; - while (!(map = follow_page(mm, start, write))) { - spin_unlock(&mm->page_table_lock); - switch (handle_mm_fault(mm, vma, start, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - if (i) return i; - return -EFAULT; - default: - if (i) return i; - return -ENOMEM; - } - spin_lock(&mm->page_table_lock); - } - if (pages) { - pages[i] = get_page_map(map); - /* FIXME: call the correct function, - * depending on the type of the found page - */ - if (pages[i]) - page_cache_get(pages[i]); - } - if (vmas) - vmas[i] = vma; - i++; - start += PAGE_SIZE; - len--; - } while(len && start < vma->vm_end); - spin_unlock(&mm->page_table_lock); - } while(len); - return i; -} - /* * Force in an entire range of pages from the current process's user VA, * and pin them in physical memory. */ -#define dprintk(x...) +#define dprintk(x...) int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) { - int pgcount, err; + unsigned long ptr, end; + int err; struct mm_struct * mm; + struct vm_area_struct * vma = 0; + struct page * map; + int i; + int datain = (rw == READ); /* Make sure the iobuf is not already mapped somewhere. */ if (iobuf->nr_pages) @@ -518,37 +474,79 @@ mm = current->mm; dprintk ("map_user_kiobuf: begin\n"); - pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; - /* mapping 0 bytes is not permitted */ - if (!pgcount) BUG(); - err = expand_kiobuf(iobuf, pgcount); + ptr = va & PAGE_MASK; + end = (va + len + PAGE_SIZE - 1) & PAGE_MASK; + err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT); if (err) return err; + down_read(&mm->mmap_sem); + + err = -EFAULT; iobuf->locked = 0; - iobuf->offset = va & (PAGE_SIZE-1); + iobuf->offset = va & ~PAGE_MASK; iobuf->length = len; - /* Try to fault in all of the necessary pages */ - down_read(&mm->mmap_sem); - /* rw==READ means read from disk, write into memory area */ - err = get_user_pages(current, mm, va, pgcount, - (rw==READ), 0, iobuf->maplist, NULL); - up_read(&mm->mmap_sem); - if (err < 0) { - unmap_kiobuf(iobuf); - dprintk ("map_user_kiobuf: end %d\n", err); - return err; - } - iobuf->nr_pages = err; - while (pgcount--) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(iobuf->maplist[pgcount]); + i = 0; + + /* + * First of all, try to fault in all of the necessary pages + */ + while (ptr < end) { + if (!vma || ptr >= vma->vm_end) { + vma = find_vma(current->mm, ptr); + if (!vma) + goto out_unlock; + if (vma->vm_start > ptr) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_unlock; + if (expand_stack(vma, ptr)) + goto out_unlock; + } + if (((datain) && (!(vma->vm_flags & VM_WRITE))) || + (!(vma->vm_flags & VM_READ))) { + err = -EACCES; + goto out_unlock; + } + } + spin_lock(&mm->page_table_lock); + while (!(map = follow_page(ptr, datain))) { + int ret; + + spin_unlock(&mm->page_table_lock); + ret = handle_mm_fault(current->mm, vma, ptr, datain); + if (ret <= 0) { + if (!ret) + goto out_unlock; + else { + err = -ENOMEM; + goto out_unlock; + } + } + spin_lock(&mm->page_table_lock); + } + map = get_page_map(map); + if (map) { + flush_dcache_page(map); + page_cache_get(map); + } else + printk (KERN_INFO "Mapped page missing [%d]\n", i); + spin_unlock(&mm->page_table_lock); + iobuf->maplist[i] = map; + iobuf->nr_pages = ++i; + + ptr += PAGE_SIZE; } + + up_read(&mm->mmap_sem); dprintk ("map_user_kiobuf: end OK\n"); return 0; + + out_unlock: + up_read(&mm->mmap_sem); + unmap_kiobuf(iobuf); + dprintk ("map_user_kiobuf: end %d\n", err); + return err; } /* @@ -598,9 +596,6 @@ if (map) { if (iobuf->locked) UnlockPage(map); - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ page_cache_release(map); } } @@ -609,6 +604,20 @@ iobuf->locked = 0; } +void zap_page_range(struct mm_struct *mm, unsigned long address, + unsigned long size, int actions) +{ + while (size) { + unsigned long chunk = size; + + if (actions & ZPR_PARTITION && chunk > ZPR_MAX_BYTES) + chunk = ZPR_MAX_BYTES; + do_zap_page_range(mm, address, chunk); + + address += chunk; + size -= chunk; + } +} /* * Lock down all of the pages of a kiovec for IO. @@ -718,11 +727,15 @@ return 0; } -static inline void zeromap_pte_range(pte_t * pte, unsigned long address, - unsigned long size, pgprot_t prot) +static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte, + unsigned long address, unsigned long size, + pgprot_t prot) { unsigned long end; + debug_lock_break(1); + break_spin_lock(&mm->page_table_lock); + address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -750,7 +763,7 @@ pte_t * pte = pte_alloc(mm, pmd, address); if (!pte) return -ENOMEM; - zeromap_pte_range(pte, address, end - address, prot); + zeromap_pte_range(mm, pte, address, end - address, prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -953,7 +966,9 @@ if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; + page_remove_rmap(old_page, page_table); break_cow(vma, new_page, address, page_table); + page_add_rmap(new_page, page_table); lru_cache_add(new_page); /* Free the old page.. */ @@ -984,7 +999,7 @@ /* mapping wholly truncated? */ if (mpnt->vm_pgoff >= pgoff) { - zap_page_range(mm, start, len); + zap_page_range(mm, start, len, ZPR_NORMAL); continue; } @@ -997,7 +1012,7 @@ /* Ok, partially affected.. */ start += diff << PAGE_SHIFT; len = (len - diff) << PAGE_SHIFT; - zap_page_range(mm, start, len); + zap_page_range(mm, start, len, ZPR_NORMAL); } while ((mpnt = mpnt->vm_next_share) != NULL); } @@ -1035,10 +1050,16 @@ do_expand: limit = current->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; + if (limit != RLIM_INFINITY) { + if (inode->i_size >= limit) { + send_sig(SIGXFSZ, current, 0); + goto out; + } + if (offset > limit) { + send_sig(SIGXFSZ, current, 0); + offset = limit; + } + } inode->i_size = offset; out_truncate: @@ -1047,11 +1068,8 @@ inode->i_op->truncate(inode); unlock_kernel(); } - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); out: - return -EFBIG; + return 0; } /* @@ -1114,8 +1132,6 @@ ret = 2; } - mark_page_accessed(page); - lock_page(page); /* @@ -1145,6 +1161,7 @@ flush_page_to_ram(page); flush_icache_page(vma, page); set_pte(page_table, pte); + page_add_rmap(page, page_table); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); @@ -1160,14 +1177,13 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { pte_t entry; + struct page * page = ZERO_PAGE(addr); /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ if (write_access) { - struct page *page; - /* Allocate our own private page. */ spin_unlock(&mm->page_table_lock); @@ -1186,10 +1202,10 @@ flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add(page); - mark_page_accessed(page); } set_pte(page_table, entry); + page_add_rmap(page, page_table); /* ignores ZERO_PAGE */ /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); @@ -1234,11 +1250,9 @@ */ if (write_access && !(vma->vm_flags & VM_SHARED)) { struct page * page = alloc_page(GFP_HIGHUSER); - if (!page) { - page_cache_release(new_page); + if (!page) return -1; - } - copy_user_highpage(page, new_page, address); + copy_highpage(page, new_page); page_cache_release(new_page); lru_cache_add(page); new_page = page; @@ -1264,6 +1278,7 @@ if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(page_table, entry); + page_add_rmap(new_page, page_table); } else { /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); @@ -1421,25 +1436,30 @@ goto out; } } + pgtable_add_rmap(new, mm, address); pmd_populate(mm, pmd, new); } out: return pte_offset(pmd, address); } +/* + * Simplistic page force-in.. + */ int make_pages_present(unsigned long addr, unsigned long end) { - int ret, len, write; + int write; + struct mm_struct *mm = current->mm; struct vm_area_struct * vma; - vma = find_vma(current->mm, addr); + vma = find_vma(mm, addr); write = (vma->vm_flags & VM_WRITE) != 0; if (addr >= end) BUG(); - if (end > vma->vm_end) - BUG(); - len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; - ret = get_user_pages(current, current->mm, addr, - len, write, 0, NULL, NULL); - return ret == len ? 0 : -1; + do { + if (handle_mm_fault(mm, vma, addr, write) < 0) + return -1; + addr += PAGE_SIZE; + } while (addr < end); + return 0; } diff -urN linux-2.4.17-rc2-virgin/mm/mmap.c linux-2.4.17-rc2-wli2/mm/mmap.c --- linux-2.4.17-rc2-virgin/mm/mmap.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/mmap.c Tue Dec 18 22:28:42 2001 @@ -45,6 +45,7 @@ }; int sysctl_overcommit_memory; +int max_map_count = DEFAULT_MAX_MAP_COUNT; /* Check that a process has enough memory to allocate a * new virtual mapping. @@ -413,7 +414,7 @@ return -EINVAL; /* Too many mappings? */ - if (mm->map_count > MAX_MAP_COUNT) + if (mm->map_count > max_map_count) return -ENOMEM; /* Obtain the address to map to. we verify (or select) it and ensure @@ -569,7 +570,7 @@ fput(file); /* Undo any partial mapping done by a device driver. */ - zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start); + zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, ZPR_NORMAL); free_vma: kmem_cache_free(vm_area_cachep, vma); return error; @@ -919,7 +920,7 @@ /* If we'll make "hole", check the vm areas limit */ if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len) - && mm->map_count >= MAX_MAP_COUNT) + && mm->map_count >= max_map_count) return -ENOMEM; /* @@ -967,7 +968,7 @@ remove_shared_vm_struct(mpnt); mm->map_count--; - zap_page_range(mm, st, size); + zap_page_range(mm, st, size, ZPR_PARTITION); /* * Fix the mapping, and free the old area if it wasn't reused. @@ -1040,7 +1041,7 @@ > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; - if (mm->map_count > MAX_MAP_COUNT) + if (mm->map_count > max_map_count) return -ENOMEM; if (!vm_enough_memory(len >> PAGE_SHIFT)) @@ -1127,7 +1128,7 @@ } mm->map_count--; remove_shared_vm_struct(mpnt); - zap_page_range(mm, start, size); + zap_page_range(mm, start, size, ZPR_PARTITION); if (mpnt->vm_file) fput(mpnt->vm_file); kmem_cache_free(vm_area_cachep, mpnt); diff -urN linux-2.4.17-rc2-virgin/mm/mremap.c linux-2.4.17-rc2-wli2/mm/mremap.c --- linux-2.4.17-rc2-virgin/mm/mremap.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/mremap.c Tue Dec 18 22:28:42 2001 @@ -61,8 +61,14 @@ { int error = 0; pte_t pte; + struct page * page = NULL; + + if (pte_present(*src)) + page = pte_page(*src); if (!pte_none(*src)) { + if (page) + page_remove_rmap(page, src); pte = ptep_get_and_clear(src); if (!dst) { /* No dest? We must put it back. */ @@ -70,6 +76,8 @@ error++; } set_pte(dst, pte); + if (page) + page_add_rmap(page, dst); } return error; } @@ -118,7 +126,7 @@ flush_cache_range(mm, new_addr, new_addr + len); while ((offset += PAGE_SIZE) < len) move_one_page(mm, new_addr + offset, old_addr + offset); - zap_page_range(mm, new_addr, len); + zap_page_range(mm, new_addr, len, ZPR_NORMAL); return -1; } diff -urN linux-2.4.17-rc2-virgin/mm/page_alloc.c linux-2.4.17-rc2-wli2/mm/page_alloc.c --- linux-2.4.17-rc2-virgin/mm/page_alloc.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/page_alloc.c Tue Dec 18 22:40:05 2001 @@ -18,14 +18,27 @@ #include #include #include +#include int nr_swap_pages; int nr_active_pages; -int nr_inactive_pages; -struct list_head inactive_list; +int nr_inactive_dirty_pages; +int nr_inactive_clean_pages; +struct list_head inactive_dirty_list; struct list_head active_list; pg_data_t *pgdat_list; + +/* + * The zone_table array is used to look up the address of the + * struct zone corresponding to a given zone number (ZONE_DMA, + * ZONE_NORMAL, or ZONE_HIGHMEM). Specifically, struct page uses + * a bitfield within ->flags to store the zone to which a page + * belongs, and so lookups to this tabl are essential. + */ +zone_t *zone_table[MAX_NR_ZONES]; +EXPORT_SYMBOL(zone_table); + static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, }; static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; @@ -54,7 +67,12 @@ /* * Temporary debugging check. */ -#define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size)) +#define BAD_RANGE(zone, page) \ +( \ + (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \ + || (((page) - mem_map) < (zone)->zone_start_mapnr) \ + || ((zone) != PageZone(page)) \ +) /* * Buddy system. Hairy. You really aren't expected to understand this @@ -80,17 +98,18 @@ BUG(); if (PageLocked(page)) BUG(); - if (PageLRU(page)) - BUG(); if (PageActive(page)) BUG(); + if (PageInactiveDirty(page)) + BUG(); + if (PageInactiveClean(page)) + BUG(); + if (page->pte_chain) + BUG(); page->flags &= ~((1<flags & PF_FREE_PAGES) - goto local_freelist; - back_local_freelist: - - zone = page->zone; + page->age = PAGE_AGE_START; + + zone = PageZone(page); mask = (~0UL) << order; base = zone->zone_mem_map; @@ -134,17 +153,6 @@ memlist_add_head(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); - return; - - local_freelist: - if (current->nr_local_pages) - goto back_local_freelist; - if (in_interrupt()) - goto back_local_freelist; - - list_add(&page->list, ¤t->local_pages); - page->index = order; - current->nr_local_pages++; } #define MARK_USED(index, order, area) \ @@ -203,10 +211,7 @@ set_page_count(page, 1); if (BAD_RANGE(zone,page)) BUG(); - if (PageLRU(page)) - BUG(); - if (PageActive(page)) - BUG(); + DEBUG_LRU_PAGE(page); return page; } curr_order++; @@ -225,78 +230,87 @@ } #endif -static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *)); -static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) +/* + * If we are able to directly reclaim pages, we move pages from the + * inactive_clean list onto the free list until the zone has enough + * free pages or until the inactive_clean pages are exhausted. + * If we cannot do the work ourselves, call kswapd. + */ +static void FASTCALL(fixup_freespace(zone_t * zone, int direct_reclaim)); +static void fixup_freespace(zone_t * zone, int direct_reclaim) +{ + if (direct_reclaim) { + struct page * page; + do { + if ((page = reclaim_page(zone))) + __free_pages_ok(page, 0); + } while (page && zone->free_pages <= zone->pages_min); + } else + wakeup_kswapd(); +} + +#define PAGES_MIN 0 +#define PAGES_LOW 1 +#define PAGES_HIGH 2 + +/* + * This function does the dirty work for __alloc_pages + * and is separated out to keep the code size smaller. + * (suggested by Davem at 1:30 AM, typed by Rik at 6 AM) + */ +static struct page * __alloc_pages_limit(zonelist_t *zonelist, + unsigned long order, int limit, int direct_reclaim) { - struct page * page = NULL; - int __freed = 0; + zone_t **zone = zonelist->zones; + unsigned long water_mark = 0; - if (!(gfp_mask & __GFP_WAIT)) - goto out; - if (in_interrupt()) - BUG(); - - current->allocation_order = order; - current->flags |= PF_MEMALLOC | PF_FREE_PAGES; - - __freed = try_to_free_pages(classzone, gfp_mask, order); - - current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES); - - if (current->nr_local_pages) { - struct list_head * entry, * local_pages; - struct page * tmp; - int nr_pages; - - local_pages = ¤t->local_pages; - - if (likely(__freed)) { - /* pick from the last inserted so we're lifo */ - entry = local_pages->next; - do { - tmp = list_entry(entry, struct page, list); - if (tmp->index == order && memclass(tmp->zone, classzone)) { - list_del(entry); - current->nr_local_pages--; - set_page_count(tmp, 1); - page = tmp; - - if (page->buffers) - BUG(); - if (page->mapping) - BUG(); - if (!VALID_PAGE(page)) - BUG(); - if (PageSwapCache(page)) - BUG(); - if (PageLocked(page)) - BUG(); - if (PageLRU(page)) - BUG(); - if (PageActive(page)) - BUG(); - if (PageDirty(page)) - BUG(); + for (;;) { + zone_t *z = *(zone++); - break; - } - } while ((entry = entry->next) != local_pages); + if (!z) + break; + if (!z->size) + BUG(); + + /* + * We allocate if the number of free + inactive_clean + * pages is above the watermark. + */ + switch (limit) { + default: + case PAGES_MIN: + water_mark += z->pages_min; + break; + case PAGES_LOW: + water_mark += z->pages_low; + break; + case PAGES_HIGH: + water_mark += z->pages_high; } - nr_pages = current->nr_local_pages; - /* free in reverse order so that the global order will be lifo */ - while ((entry = local_pages->prev) != local_pages) { - list_del(entry); - tmp = list_entry(entry, struct page, list); - __free_pages_ok(tmp, tmp->index); - if (!nr_pages--) - BUG(); + if (z->free_pages + z->inactive_clean_pages >= water_mark) { + struct page *page = NULL; + /* If possible, reclaim a page directly. */ + if (direct_reclaim) + page = reclaim_page(z); + /* If that fails, fall back to rmqueue. */ + if (!page) + page = rmqueue(z, order); + if (page) + return page; + } else if (water_mark > z->need_balance) { + /* Set kswapd's free+clean target for the zone. + * we could do this in the init code, but this way + * we support arbitrary fallback between zones. + * + * XXX: how about DISCONTIGMEM boxes ? + */ + z->need_balance = water_mark; } - current->nr_local_pages = 0; } - out: - *freed = __freed; - return page; + + /* Found nothing. */ + return NULL; } /* @@ -304,100 +318,239 @@ */ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist) { - unsigned long min; - zone_t **zone, * classzone; + zone_t **zone; + int min, direct_reclaim = 0; struct page * page; - int freed; + /* + * (If anyone calls gfp from interrupts nonatomically then it + * will sooner or later tripped up by a schedule().) + * + * We are falling back to lower-level zones if allocation + * in a higher zone fails. + */ + + /* + * Can we take pages directly from the inactive_clean + * list? + */ + if (order == 0 && (gfp_mask & __GFP_WAIT)) + direct_reclaim = 1; + +try_again: + /* + * First, see if we have any zones with lots of free memory. + * + * We allocate free memory first because it doesn't contain + * any data ... DUH! + */ zone = zonelist->zones; - classzone = *zone; min = 1UL << order; for (;;) { zone_t *z = *(zone++); if (!z) break; + if (!z->size) + BUG(); min += z->pages_low; if (z->free_pages > min) { page = rmqueue(z, order); if (page) return page; - } + } else if (z->free_pages < z->pages_min) + fixup_freespace(z, direct_reclaim); } - classzone->need_balance = 1; - mb(); - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); + /* + * Try to allocate a page from a zone with a HIGH + * amount of free + inactive_clean pages. + * + * If there is a lot of activity, inactive_target + * will be high and we'll have a good chance of + * finding a page using the HIGH limit. + */ + page = __alloc_pages_limit(zonelist, order, PAGES_HIGH, direct_reclaim); + if (page) + return page; + + wakeup_kswapd(); + /* + * Then try to allocate a page from a zone with more + * than zone->pages_low free + inactive_clean pages. + * + * When the working set is very large and VM activity + * is low, we're most likely to have our allocation + * succeed here. + */ + page = __alloc_pages_limit(zonelist, order, PAGES_LOW, direct_reclaim); + if (page) + return page; + + /* + * OK, none of the zones on our zonelist has lots + * of pages free. + * + * We wake up kswapd, in the hope that kswapd will + * resolve this situation before memory gets tight. + * + * We'll also help a bit trying to free pages, this + * way statistics will make sure really fast allocators + * are slowed down more than slow allocators and other + * programs in the system shouldn't be impacted as much + * by the hogs. + */ + if ((gfp_mask & __GFP_WAIT) && !(current->flags & (PF_MEMALLOC | PF_MEMDIE))) + try_to_free_pages(gfp_mask); + + /* + * After waking up kswapd, we try to allocate a page + * from any zone which isn't critical yet. + * + * Kswapd should, in most situations, bring the situation + * back to normal in no time. + */ + page = __alloc_pages_limit(zonelist, order, PAGES_MIN, direct_reclaim); + if (page) + return page; + + /* + * Damn, we didn't succeed. + */ + if (!(current->flags & PF_MEMALLOC)) { + /* + * Are we dealing with a higher order allocation? + * + * Try to defragment some memory. + */ + if (order > 0 && (gfp_mask & __GFP_WAIT)) + goto defragment; + + /* + * When we arrive here, we are really tight on memory. + * Since kswapd didn't succeed in freeing pages for us, + * we try to help it. + * + * Single page allocs loop until the allocation succeeds. + * Multi-page allocs can fail due to memory fragmentation; + * in that case we bail out to prevent infinite loops and + * hanging device drivers ... + * + * Another issue are GFP_NOFS allocations; because they + * do not have __GFP_FS set it's possible we cannot make + * any progress freeing pages, in that case it's better + * to give up than to deadlock the kernel looping here. + * + * NFS: we must yield the CPU (to rpciod) to avoid deadlock. + */ + if (gfp_mask & __GFP_WAIT) { + __set_current_state(TASK_RUNNING); + current->policy |= SCHED_YIELD; + schedule(); + if (!order || free_shortage()) { + int progress = try_to_free_pages(gfp_mask); + if (progress || (gfp_mask & __GFP_FS)) + goto try_again; + /* + * Fail in case no progress was made and the + * allocation may not be able to block on IO. + */ + return NULL; + } + } + } + /* + * Final phase: allocate anything we can! + * + * Higher order allocations, GFP_ATOMIC allocations and + * recursive allocations (PF_MEMALLOC) end up here. + * + * Only recursive allocations can use the very last pages + * in the system, otherwise it would be just too easy to + * deadlock the system... + */ zone = zonelist->zones; min = 1UL << order; for (;;) { - unsigned long local_min; zone_t *z = *(zone++); + struct page * page = NULL; if (!z) break; - local_min = z->pages_min; - if (!(gfp_mask & __GFP_WAIT)) - local_min >>= 2; - min += local_min; - if (z->free_pages > min) { + /* + * SUBTLE: direct_reclaim is only possible if the task + * becomes PF_MEMALLOC while looping above. This will + * happen when the OOM killer selects this task for + * instant execution... + */ + if (direct_reclaim) { + page = reclaim_page(z); + if (page) + return page; + } + + /* XXX: is pages_min/4 a good amount to reserve for this? */ + min += z->pages_min / 4; + if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) { page = rmqueue(z, order); if (page) return page; } } + goto out_failed; - /* here we're in the low on memory slow path */ -rebalance: - if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) { + /* + * Naive "defragmentation" for higher-order allocations. First we + * free the inactive_clean pages to see if we can allocate our + * allocation, then we call page_launder() to clean some dirty + * pages and we try once more. + * + * We might want to turn this into something which defragments + * memory based on physical page, simply by looking for unmapped + * pages next to pages on the free list... + */ +defragment: + { + int freed = 0; zone = zonelist->zones; +defragment_again: for (;;) { zone_t *z = *(zone++); if (!z) break; - - page = rmqueue(z, order); - if (page) - return page; + if (!z->size) + continue; + while (z->inactive_clean_pages) { + struct page * page; + /* Move one page to the free list. */ + page = reclaim_page(z); + if (!page) + break; + __free_page(page); + /* Try if the allocation succeeds. */ + page = rmqueue(z, order); + if (page) + return page; + } } - return NULL; - } - /* Atomic allocations - we can't balance anything */ - if (!(gfp_mask & __GFP_WAIT)) - return NULL; - - page = balance_classzone(classzone, gfp_mask, order, &freed); - if (page) - return page; - - zone = zonelist->zones; - min = 1UL << order; - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - min += z->pages_min; - if (z->free_pages > min) { - page = rmqueue(z, order); - if (page) - return page; + /* XXX: do real defragmentation instead of calling launder ? */ + if (!freed) { + freed = 1; + current->flags |= PF_MEMALLOC; + try_to_free_pages(gfp_mask); + current->flags &= ~PF_MEMALLOC; + goto defragment_again; } } - /* Don't let big-order allocations loop */ - if (order > 3) - return NULL; - - /* Yield for kswapd, and try again */ - current->policy |= SCHED_YIELD; - __set_current_state(TASK_RUNNING); - schedule(); - goto rebalance; + +out_failed: + /* No luck.. */ +// printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order); + return NULL; } /* @@ -429,7 +582,8 @@ void page_cache_release(struct page *page) { if (!PageReserved(page) && put_page_testzero(page)) { - if (PageLRU(page)) + if (PageActive(page) || PageInactiveDirty(page) || + PageInactiveClean(page)) lru_cache_del(page); __free_pages_ok(page, 0); } @@ -537,10 +691,18 @@ tmpdat = tmpdat->node_next; } - printk("( Active: %d, inactive: %d, free: %d )\n", - nr_active_pages, - nr_inactive_pages, - nr_free_pages()); + printk("Free pages: %6dkB (%6dkB HighMem)\n", + nr_free_pages() << (PAGE_SHIFT-10), + nr_free_highpages() << (PAGE_SHIFT-10)); + + printk("( Active: %d, inactive_dirty: %d, inactive_clean: %d, free: %d (%d %d %d) )\n", + nr_active_pages, + nr_inactive_dirty_pages, + nr_inactive_clean_pages, + nr_free_pages(), + freepages.min, + freepages.low, + freepages.high); for (type = 0; type < MAX_NR_ZONES; type++) { struct list_head *head, *curr; @@ -660,7 +822,7 @@ printk("On node %d totalpages: %lu\n", nid, realtotalpages); INIT_LIST_HEAD(&active_list); - INIT_LIST_HEAD(&inactive_list); + INIT_LIST_HEAD(&inactive_dirty_list); /* * Some architectures (with lots of mem and discontinous memory @@ -699,6 +861,7 @@ unsigned long mask; unsigned long size, realsize; + zone_table[j] = zone; realsize = size = zones_size[j]; if (zholes_size) realsize -= zholes_size[j]; @@ -709,7 +872,10 @@ zone->lock = SPIN_LOCK_UNLOCKED; zone->zone_pgdat = pgdat; zone->free_pages = 0; + zone->inactive_clean_pages = 0; + zone->inactive_dirty_pages = 0; zone->need_balance = 0; + INIT_LIST_HEAD(&zone->inactive_clean_list); if (!size) continue; @@ -723,7 +889,20 @@ zone->pages_min = mask; zone->pages_low = mask*2; zone->pages_high = mask*3; - + /* + * Add these free targets to the global free target; + * we have to be SURE that freepages.high is higher + * than SUM [zone->pages_min] for all zones, otherwise + * we may have bad bad problems. + * + * This means we cannot make the freepages array writable + * in /proc, but have to add a separate extra_free_target + * for people who require it to catch load spikes in eg. + * gigabit ethernet routing... + */ + freepages.min += mask; + freepages.low += mask*2; + freepages.high += mask*3; zone->zone_mem_map = mem_map + offset; zone->zone_start_mapnr = offset; zone->zone_start_paddr = zone_start_paddr; @@ -733,9 +912,10 @@ for (i = 0; i < size; i++) { struct page *page = mem_map + offset + i; - page->zone = zone; + SetPageZone(page, j); + if (j != ZONE_HIGHMEM) - page->virtual = __va(zone_start_paddr); + SetPageVirtual(page, __va(zone_start_paddr)); zone_start_paddr += PAGE_SIZE; } diff -urN linux-2.4.17-rc2-virgin/mm/rmap.c linux-2.4.17-rc2-wli2/mm/rmap.c --- linux-2.4.17-rc2-virgin/mm/rmap.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.17-rc2-wli2/mm/rmap.c Tue Dec 18 22:28:42 2001 @@ -0,0 +1,354 @@ +/* + * mm/rmap.c - physical to virtual reverse mappings + * + * Copyright 2001, Rik van Riel + * Released under the General Public License (GPL). + * + * + * Simple, low overhead pte-based reverse mapping scheme. + * This is kept modular because we may want to experiment + * with object-based reverse mapping schemes. Please try + * to keep this thing as modular as possible. + */ + +/* + * Locking: + * - the page->pte_chain is protected by the pagemap_lru_lock, + * we probably want to change this to a per-page lock in the + * future + * - because swapout locking is opposite to the locking order + * in the page fault path, the swapout path uses trylocks + * on the mm->page_table_lock + */ +#include +#include +#include + +#include +#include +#include + +#ifdef DEBUG +/* #define DEBUG */ +#undef DEBUG +#endif + +/* + * Shared pages have a chain of pte_chain structures, used to locate + * all the mappings to this page. We only need a pointer to the pte + * here, the page struct for the page table page contains the process + * it belongs to and the offset within that process. + * + * A singly linked list should be fine for most, if not all, workloads. + * On fork-after-exec the mapping we'll be removing will still be near + * the start of the list, on mixed application systems the short-lived + * processes will have their mappings near the start of the list and + * in systems with long-lived applications the relative overhead of + * exit() will be lower since the applications are long-lived. + */ +struct pte_chain { + struct pte_chain * next; + pte_t * ptep; +}; + +static struct pte_chain * pte_chain_freelist; +static inline struct pte_chain * pte_chain_alloc(void); +static inline void pte_chain_free(struct pte_chain *, struct pte_chain *, struct page *); +static void alloc_new_pte_chains(void); + +/** + * page_referenced - test if the page was referenced + * @page: the page to test + * + * Quick test_and_clear_referenced for all mappings to a page, + * returns the number of processes which referenced the page. + * Caller needs to hold the pagemap_lru_lock. + */ +int FASTCALL(page_referenced(struct page *)); +int page_referenced(struct page * page) +{ + struct pte_chain * pc; + int referenced = 0; + + if (PageTestandClearReferenced(page)) + referenced++; + + /* Check all the page tables mapping this page. */ + for (pc = page->pte_chain; pc; pc = pc->next) { + if (ptep_test_and_clear_young(pc->ptep)) + referenced++; + } + + return referenced; +} + +/** + * page_add_rmap - add reverse mapping entry to a page + * @page: the page to add the mapping to + * @ptep: the page table entry mapping this page + * + * Add a new pte reverse mapping to a page. + * The caller needs to hold the mm->page_table_lock. + */ +void FASTCALL(page_add_rmap(struct page *, pte_t *)); +void page_add_rmap(struct page * page, pte_t * ptep) +{ + struct pte_chain * pte_chain, * pc; + struct page * pte_page = virt_to_page(ptep); + + if (!page || !ptep) + BUG(); + if (!pte_present(*ptep)) + BUG(); + if (!pte_page->mapping) + BUG(); + if (!VALID_PAGE(page) || PageReserved(page)) + return; + + spin_lock(&pagemap_lru_lock); +#ifdef DEBUG + for (pc = page->pte_chain; pc; pc = pc->next) { + if (pc->ptep == ptep) + BUG(); + } +#endif + pte_chain = pte_chain_alloc(); + + /* Hook up the pte_chain to the page. */ + pte_chain->ptep = ptep; + pte_chain->next = page->pte_chain; + page->pte_chain = pte_chain; + + spin_unlock(&pagemap_lru_lock); +} + +/** + * page_remove_rmap - take down reverse mapping to a page + * @page: page to remove mapping from + * @ptep: page table entry to remove + * + * Removes the reverse mapping from the pte_chain of the page, + * after that the caller can clear the page table entry and free + * the page. + * Caller needs to hold the mm->page_table_lock. + */ +void FASTCALL(page_remove_rmap(struct page *, pte_t *)); +void page_remove_rmap(struct page * page, pte_t * ptep) +{ + struct pte_chain * pc, * prev_pc = NULL; + + if (!page || !ptep) + BUG(); + if (!VALID_PAGE(page) || PageReserved(page)) + return; + + spin_lock(&pagemap_lru_lock); + for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) { + if (pc->ptep == ptep) { + pte_chain_free(pc, prev_pc, page); + goto out; + } + } +#ifdef DEBUG + /* Not found. This should NEVER happen! */ + printk("page_remove_rmap: pte_chain %p not present...\n", ptep); + printk("page_remove_rmap: only found: "); + for (pc = page->pte_chain; pc; pc = pc->next) + printk("%p ", pc->ptep); + printk("\n"); + /* panic("page_remove_rmap: giving up.\n"); */ +#endif + +out: + spin_unlock(&pagemap_lru_lock); + return; + +} + +/** + * try_to_unmap_one - worker function for try_to_unmap + * @page: page to unmap + * @ptep: page table entry to unmap from page + * + * Internal helper function for try_to_unmap, called for each page + * table entry mapping a page. Because locking order here is opposite + * to the locking order used by the page fault path, we use trylocks. + * Locking: + * pagemap_lru_lock page_launder() + * page lock page_launder(), trylock + * mm->page_table_lock try_to_unmap_one(), trylock + */ +int FASTCALL(try_to_unmap_one(struct page *, pte_t *)); +int try_to_unmap_one(struct page * page, pte_t * ptep) +{ + unsigned long address = ptep_to_address(ptep); + struct mm_struct * mm = ptep_to_mm(ptep); + struct vm_area_struct * vma; + pte_t pte; + int ret; + + if (!mm) + BUG(); + + /* + * We need the page_table_lock to protect us from page faults, + * munmap, fork, etc... + */ + if (!spin_trylock(&mm->page_table_lock)) + return SWAP_AGAIN; + + /* During mremap, it's possible pages are not in a VMA. */ + vma = find_vma(mm, address); + if (!vma) { + ret = SWAP_FAIL; + goto out_unlock; + } + + /* The page is mlock()d, we cannot swap it out. */ + if (vma->vm_flags & VM_LOCKED) { + ret = SWAP_FAIL; + goto out_unlock; + } + + /* Nuke the page table entry. */ + pte = ptep_get_and_clear(ptep); + flush_tlb_page(vma, address); + flush_cache_page(vma, address); + + /* Store the swap location in the pte. See handle_pte_fault() ... */ + if (PageSwapCache(page)) { + swp_entry_t entry; + entry.val = page->index; + swap_duplicate(entry); + set_pte(ptep, swp_entry_to_pte(entry)); + } + + /* Move the dirty bit to the physical page now the pte is gone. */ + if (pte_dirty(pte)) + set_page_dirty(page); + + mm->rss--; + page_cache_release(page); + ret = SWAP_SUCCESS; + +out_unlock: + spin_unlock(&mm->page_table_lock); + return ret; +} + +/** + * try_to_unmap - try to remove all page table mappings to a page + * @page: the page to get unmapped + * + * Tries to remove all the page table entries which are mapping this + * page, used in the pageout path. Caller must hold pagemap_lru_lock + * and the page lock. Return values are: + * + * SWAP_SUCCESS - we succeeded in removing all mappings + * SWAP_AGAIN - we missed a trylock, try again later + * SWAP_FAIL - the page is unswappable + * SWAP_ERROR - an error occurred + */ +int FASTCALL(try_to_unmap(struct page *)); +int try_to_unmap(struct page * page) +{ + struct pte_chain * pc, * next_pc, * prev_pc = NULL; + int ret = SWAP_SUCCESS; + + /* This page should not be on the pageout lists. */ + if (!VALID_PAGE(page) || PageReserved(page)) + BUG(); + if (!PageLocked(page)) + BUG(); + /* We need backing store to swap out a page. */ + if (!page->mapping) + BUG(); + + for (pc = page->pte_chain; pc; pc = next_pc) { + next_pc = pc->next; + switch (try_to_unmap_one(page, pc->ptep)) { + case SWAP_SUCCESS: + /* Free the pte_chain struct. */ + pte_chain_free(pc, prev_pc, page); + break; + case SWAP_AGAIN: + /* Skip this pte, remembering status. */ + prev_pc = pc; + ret = SWAP_AGAIN; + continue; + case SWAP_FAIL: + return SWAP_FAIL; + case SWAP_ERROR: + return SWAP_ERROR; + } + } + + return ret; +} + +/** + * pte_chain_free - free pte_chain structure + * @pte_chain: pte_chain struct to free + * @prev_pte_chain: previous pte_chain on the list (may be NULL) + * @page: page this pte_chain hangs off (may be NULL) + * + * This function unlinks pte_chain from the singly linked list it + * may be on and adds the pte_chain to the free list. May also be + * called for new pte_chain structures which aren't on any list yet. + * Caller needs to hold the pagemap_lru_list. + */ +static inline void pte_chain_free(struct pte_chain * pte_chain, struct pte_chain * prev_pte_chain, struct page * page) +{ + if (prev_pte_chain) + prev_pte_chain->next = pte_chain->next; + else if (page) + page->pte_chain = pte_chain->next; + + pte_chain->ptep = NULL; + pte_chain->next = pte_chain_freelist; + pte_chain_freelist = pte_chain; +} + +/** + * pte_chain_alloc - allocate a pte_chain struct + * + * Returns a pointer to a fresh pte_chain structure. Allocates new + * pte_chain structures as required. + * Caller needs to hold the pagemap_lru_lock. + */ +static inline struct pte_chain * pte_chain_alloc(void) +{ + struct pte_chain * pte_chain; + + /* Allocate new pte_chain structs as needed. */ + if (!pte_chain_freelist) + alloc_new_pte_chains(); + + /* Grab the first pte_chain from the freelist. */ + pte_chain = pte_chain_freelist; + pte_chain_freelist = pte_chain->next; + pte_chain->next = NULL; + + return pte_chain; +} + +/** + * alloc_new_pte_chains - convert a free page to pte_chain structures + * + * Grabs a free page and converts it to pte_chain structures. We really + * should pre-allocate these earlier in the pagefault path or come up + * with some other trick. + */ +static void alloc_new_pte_chains(void) +{ + struct pte_chain * pte_chain = (void *) get_zeroed_page(GFP_ATOMIC); + int i = PAGE_SIZE / sizeof(struct pte_chain); + + if (pte_chain) { + for (; i-- > 0; pte_chain++) + pte_chain_free(pte_chain, NULL, NULL); + } else { + /* Yeah yeah, I'll fix the pte_chain allocation ... */ + panic("Fix pte_chain allocation, you lazy bastard!\n"); + } +} diff -urN linux-2.4.17-rc2-virgin/mm/shmem.c linux-2.4.17-rc2-wli2/mm/shmem.c --- linux-2.4.17-rc2-virgin/mm/shmem.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/shmem.c Tue Dec 18 22:28:42 2001 @@ -1193,7 +1193,7 @@ follow_link: shmem_follow_link, }; -static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long * blocks, unsigned long *inodes) +static int shmem_parse_options(char *options, int *mode, unsigned long * blocks, unsigned long *inodes) { char *this_char, *value, *rest; @@ -1205,7 +1205,7 @@ *value++ = 0; } else { printk(KERN_ERR - "tmpfs: No value for mount option '%s'\n", + "shmem_parse_options: No value for option '%s'\n", this_char); return 1; } @@ -1230,20 +1230,8 @@ *mode = simple_strtoul(value,&rest,8); if (*rest) goto bad_val; - } else if (!strcmp(this_char,"uid")) { - if (!uid) - continue; - *uid = simple_strtoul(value,&rest,0); - if (*rest) - goto bad_val; - } else if (!strcmp(this_char,"gid")) { - if (!gid) - continue; - *gid = simple_strtoul(value,&rest,0); - if (*rest) - goto bad_val; } else { - printk(KERN_ERR "tmpfs: Bad mount option %s\n", + printk(KERN_ERR "shmem_parse_options: Bad option %s\n", this_char); return 1; } @@ -1251,7 +1239,7 @@ return 0; bad_val: - printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", + printk(KERN_ERR "shmem_parse_options: Bad value '%s' for option '%s'\n", value, this_char); return 1; @@ -1263,7 +1251,7 @@ unsigned long max_blocks = sbinfo->max_blocks; unsigned long max_inodes = sbinfo->max_inodes; - if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, &max_inodes)) + if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes)) return -EINVAL; return shmem_set_size(sbinfo, max_blocks, max_inodes); } @@ -1280,8 +1268,6 @@ struct dentry * root; unsigned long blocks, inodes; int mode = S_IRWXUGO | S_ISVTX; - uid_t uid = current->fsuid; - gid_t gid = current->fsgid; struct shmem_sb_info *sbinfo = SHMEM_SB(sb); struct sysinfo si; @@ -1293,8 +1279,10 @@ blocks = inodes = si.totalram / 2; #ifdef CONFIG_TMPFS - if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, &inodes)) + if (shmem_parse_options (data, &mode, &blocks, &inodes)) { + printk(KERN_ERR "tmpfs invalid option\n"); return NULL; + } #endif spin_lock_init (&sbinfo->stat_lock); @@ -1311,8 +1299,6 @@ if (!inode) return NULL; - inode->i_uid = uid; - inode->i_gid = gid; root = d_alloc_root(inode); if (!root) { iput(inode); diff -urN linux-2.4.17-rc2-virgin/mm/slab.c linux-2.4.17-rc2-wli2/mm/slab.c --- linux-2.4.17-rc2-virgin/mm/slab.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/slab.c Thu Dec 20 16:59:45 2001 @@ -49,7 +49,9 @@ * constructors and destructors are called without any locking. * Several members in kmem_cache_t and slab_t never change, they * are accessed without any locking. - * The per-cpu arrays are never accessed from the wrong cpu, no locking. + * The per-cpu arrays are never accessed from the wrong cpu, no locking, + * they are however called with local interrupts disabled so no + * preempt_disable needed. * The non-constant members are protected with a per-cache irq spinlock. * * Further notes from the original documentation: @@ -109,11 +111,9 @@ #if DEBUG # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ - SLAB_NO_REAP | SLAB_CACHE_DMA | \ - SLAB_MUST_HWCACHE_ALIGN) + SLAB_NO_REAP | SLAB_CACHE_DMA) #else -# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ - SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN) +# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA) #endif /* @@ -651,7 +651,7 @@ flags &= ~SLAB_POISON; } #if FORCED_DEBUG - if ((size < (PAGE_SIZE>>3)) && !(flags & SLAB_MUST_HWCACHE_ALIGN)) + if (size < (PAGE_SIZE>>3)) /* * do not red zone large object, causes severe * fragmentation. @@ -1282,9 +1282,10 @@ }) #ifdef CONFIG_SMP -void* kmem_cache_alloc_batch(kmem_cache_t* cachep, cpucache_t* cc, int flags) +void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags) { int batchcount = cachep->batchcount; + cpucache_t* cc = cc_data(cachep); spin_lock(&cachep->spinlock); while (batchcount--) { @@ -1333,7 +1334,7 @@ objp = cc_entry(cc)[--cc->avail]; } else { STATS_INC_ALLOCMISS(cachep); - objp = kmem_cache_alloc_batch(cachep,cc,flags); + objp = kmem_cache_alloc_batch(cachep,flags); if (!objp) goto alloc_new_slab_nolock; } @@ -1534,15 +1535,9 @@ */ void * kmalloc (size_t size, int flags) { - cache_sizes_t *csizep = cache_sizes; - - for (; csizep->cs_size; csizep++) { - if (size > csizep->cs_size) - continue; - return __kmem_cache_alloc(flags & GFP_DMA ? - csizep->cs_dmacachep : csizep->cs_cachep, flags); - } - return NULL; + kmem_cache_t *cp = kmem_find_general_cachep (size, flags); + + return cp == NULL ? NULL : __kmem_cache_alloc(cp, flags); } /** @@ -1590,18 +1585,66 @@ kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) { - cache_sizes_t *csizep = cache_sizes; + int idx; - /* This function could be moved to the header file, and - * made inline so consumers can quickly determine what - * cache pointer they require. - */ - for ( ; csizep->cs_size; csizep++) { - if (size > csizep->cs_size) - continue; + switch (size) { +#if PAGE_SIZE == 4096 + case 0 ... 32: + idx = 0; + break; + case 33 ... 64: + idx = 1; + break; +#else + case 0 ... 64: + idx = 1; + break; +#endif + case 65 ... 128: + idx = 2; + break; + case 129 ... 256: + idx = 3; + break; + case 257 ...512: + idx = 4; + break; + case 513 ... 1024: + idx = 5; + break; + case 1025 ... 2048: + idx = 6; + break; + case 2049 ... 4096: + idx = 7; + break; + case 4097 ... 8192: + idx = 8; + break; + case 8193 ... 16384: + idx = 9; + break; + case 16385 ... 32768: + idx = 10; + break; + case 32769 ... 65536: + idx = 11; + break; + case 65537 ... 131072: + idx = 12; + break; + default: + idx = -1; break; } - return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep; + + if (idx == -1) + return NULL; + +#if PAGE_SIZE != 4096 + idx = idx - 1; +#endif + return (gfpflags & GFP_DMA) ? cache_sizes [idx].cs_dmacachep : cache_sizes [idx].cs_cachep; } #ifdef CONFIG_SMP @@ -1921,13 +1964,12 @@ #endif #ifdef CONFIG_SMP { - cpucache_t *cc = cc_data(cachep); unsigned int batchcount = cachep->batchcount; unsigned int limit; - if (cc) - limit = cc->limit; - else + if (cc_data(cachep)) + limit = cc_data(cachep)->limit; + else limit = 0; len += sprintf(page+len, " : %4u %4u", limit, batchcount); diff -urN linux-2.4.17-rc2-virgin/mm/swap.c linux-2.4.17-rc2-wli2/mm/swap.c --- linux-2.4.17-rc2-virgin/mm/swap.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/swap.c Tue Dec 18 22:28:42 2001 @@ -24,6 +24,20 @@ #include /* for copy_to/from_user */ #include +/* + * We identify three levels of free memory. We never let free mem + * fall below the freepages.min except for atomic allocations. We + * start background swapping if we fall below freepages.high free + * pages, and we begin intensive swapping below freepages.low. + * + * Actual initialization is done in mm/page_alloc.c + */ +freepages_t freepages = { + 0, /* freepages.min */ + 0, /* freepages.low */ + 0 /* freepages.high */ +}; + /* How many pages do we try to swap or page in/out together? */ int page_cluster; @@ -33,17 +47,59 @@ 8, /* do swap I/O in clusters of this size */ }; +/** + * (de)activate_page - move pages from/to active and inactive lists + * @page: the page we want to move + * @nolock - are we already holding the pagemap_lru_lock? + * + * Deactivate_page will move an active page to the right + * inactive list, while activate_page will move a page back + * from one of the inactive lists to the active list. If + * called on a page which is not on any of the lists, the + * page is left alone. + */ +void FASTCALL(deactivate_page_nolock(struct page *)); +void deactivate_page_nolock(struct page * page) +{ + /* + * Don't touch it if it's not on the active list. + * (some pages aren't on any list at all) + */ + ClearPageReferenced(page); + if (PageActive(page)) { + page->age = 0; + del_page_from_active_list(page); + add_page_to_inactive_dirty_list(page); + } +} + +void FASTCALL(deactivate_page(struct page *)); +void deactivate_page(struct page * page) +{ + spin_lock(&pagemap_lru_lock); + deactivate_page_nolock(page); + spin_unlock(&pagemap_lru_lock); +} + /* * Move an inactive page to the active list. */ -static inline void activate_page_nolock(struct page * page) +void FASTCALL(activate_page_nolock(struct page *)); +void activate_page_nolock(struct page * page) { - if (PageLRU(page) && !PageActive(page)) { - del_page_from_inactive_list(page); + if (PageInactiveDirty(page)) { + del_page_from_inactive_dirty_list(page); + add_page_to_active_list(page); + } else if (PageInactiveClean(page)) { + del_page_from_inactive_clean_list(page); add_page_to_active_list(page); } + + /* Make sure the page gets a fair chance at staying active. */ + page->age = max((int)page->age, PAGE_AGE_START); } +void FASTCALL(activate_page(struct page *)); void activate_page(struct page * page) { spin_lock(&pagemap_lru_lock); @@ -55,11 +111,12 @@ * lru_cache_add: add a page to the page lists * @page: the page to add */ +void FASTCALL(lru_cache_add(struct page *)); void lru_cache_add(struct page * page) { - if (!TestSetPageLRU(page)) { + if (!PageLRU(page)) { spin_lock(&pagemap_lru_lock); - add_page_to_inactive_list(page); + add_page_to_active_list(page); spin_unlock(&pagemap_lru_lock); } } @@ -71,14 +128,15 @@ * This function is for when the caller already holds * the pagemap_lru_lock. */ +void FASTCALL(__lru_cache_del(struct page *)); void __lru_cache_del(struct page * page) { - if (TestClearPageLRU(page)) { - if (PageActive(page)) { - del_page_from_active_list(page); - } else { - del_page_from_inactive_list(page); - } + if (PageActive(page)) { + del_page_from_active_list(page); + } else if (PageInactiveDirty(page)) { + del_page_from_inactive_dirty_list(page); + } else if (PageInactiveClean(page)) { + del_page_from_inactive_clean_list(page); } } @@ -86,6 +144,7 @@ * lru_cache_del: remove a page from the page lists * @page: the page to remove */ +void FASTCALL(lru_cache_del(struct page *)); void lru_cache_del(struct page * page) { spin_lock(&pagemap_lru_lock); diff -urN linux-2.4.17-rc2-virgin/mm/swap_state.c linux-2.4.17-rc2-wli2/mm/swap_state.c --- linux-2.4.17-rc2-virgin/mm/swap_state.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/swap_state.c Tue Dec 18 22:28:42 2001 @@ -89,6 +89,40 @@ return 0; } +/** + * add_to_swap - allocate swap space for a page + * @page: page we want to move to swap + * + * Allocate swap space for the page and add the page to the + * swap cache. Caller needs to hold the page lock. + */ +int add_to_swap(struct page * page) +{ + swp_entry_t entry; + + if (!PageLocked(page)) + BUG(); + + for (;;) { + entry = get_swap_page(); + if (!entry.val) + return 0; + /* + * Add it to the swap cache and mark it dirty + * (adding to the page cache will clear the dirty + * and uptodate bits, so we need to do it again) + */ + if (add_to_swap_cache(page, entry) == 0) { + SetPageUptodate(page); + set_page_dirty(page); + swap_free(entry); + return 1; + } + /* Raced with "speculative" read_swap_cache_async */ + swap_free(entry); + } +} + /* * This must be called only on pages that have * been verified to be in the swap cache. diff -urN linux-2.4.17-rc2-virgin/mm/swapfile.c linux-2.4.17-rc2-wli2/mm/swapfile.c --- linux-2.4.17-rc2-virgin/mm/swapfile.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/swapfile.c Tue Dec 18 22:28:42 2001 @@ -374,6 +374,7 @@ return; get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); + page_add_rmap(page, dir); swap_free(entry); ++vma->vm_mm->rss; } @@ -696,6 +697,7 @@ * interactive performance. Interruptible check on * signal_pending() would be nice, but changes the spec? */ + debug_lock_break(551); if (current->need_resched) schedule(); } @@ -1124,6 +1126,13 @@ if (swap_info[i].flags != SWP_USED) continue; for (j = 0; j < swap_info[i].max; ++j) { + if (conditional_schedule_needed()) { + debug_lock_break(551); + swap_list_unlock(); + debug_lock_break(551); + unconditional_schedule(); + swap_list_lock(); + } switch (swap_info[i].swap_map[j]) { case 0: case SWAP_MAP_BAD: diff -urN linux-2.4.17-rc2-virgin/mm/vmalloc.c linux-2.4.17-rc2-wli2/mm/vmalloc.c --- linux-2.4.17-rc2-virgin/mm/vmalloc.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/vmalloc.c Tue Dec 18 22:28:47 2001 @@ -6,7 +6,6 @@ * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian , May 2000 */ -#include #include #include #include @@ -19,6 +18,54 @@ rwlock_t vmlist_lock = RW_LOCK_UNLOCKED; struct vm_struct * vmlist; +#if defined(CONFIG_KDB) +/* kdb_vmlist_check + * Check to determine if an address is within a vmalloced range. + * Parameters: + * starta -- Starting address of region to check + * enda -- Ending address of region to check + * Returns: + * 0 -- [starta,enda] not within a vmalloc area + * 1 -- [starta,enda] within a vmalloc area + * Locking: + * None. + * Remarks: + * Shouldn't acquire locks. Always called with all interrupts + * disabled and other cpus halted. Yet, if a breakpoint or fault + * occurs while the vmlist is in an indeterminate state, this + * function could fail. + */ +int +kdb_vmlist_check(unsigned long starta, unsigned long enda) +{ + struct vm_struct *vp; + + if (vmlist) { + for(vp=vmlist; vp; vp = vp->next) { + unsigned long end = (unsigned long)vp->addr + vp->size; + + end -= PAGE_SIZE; /* Unbias for guard page */ + + if ((starta >= (unsigned long)vp->addr) + && (starta < end) + && (enda < end)) { + return 1; + } + } + } + else { + /* early kdb, no vmlist yet */ + extern char _text, _end; + if (starta >= (unsigned long) &_text && + enda < (unsigned long) &_end && + starta <= enda) + return 1; + } + return 0; +} +#endif + + static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size) { pte_t * pte; @@ -274,43 +321,6 @@ if (count == 0) goto finished; *buf = *addr; - buf++; - addr++; - count--; - } while (--n > 0); - } -finished: - read_unlock(&vmlist_lock); - return buf - buf_start; -} - -long vwrite(char *buf, char *addr, unsigned long count) -{ - struct vm_struct *tmp; - char *vaddr, *buf_start = buf; - unsigned long n; - - /* Don't allow overflow */ - if ((unsigned long) addr + count < count) - count = -(unsigned long) addr; - - read_lock(&vmlist_lock); - for (tmp = vmlist; tmp; tmp = tmp->next) { - vaddr = (char *) tmp->addr; - if (addr >= vaddr + tmp->size - PAGE_SIZE) - continue; - while (addr < vaddr) { - if (count == 0) - goto finished; - buf++; - addr++; - count--; - } - n = vaddr + tmp->size - PAGE_SIZE - addr; - do { - if (count == 0) - goto finished; - *addr = *buf; buf++; addr++; count--; diff -urN linux-2.4.17-rc2-virgin/mm/vmscan.c linux-2.4.17-rc2-wli2/mm/vmscan.c --- linux-2.4.17-rc2-virgin/mm/vmscan.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/mm/vmscan.c Tue Dec 18 22:40:58 2001 @@ -32,349 +32,267 @@ */ #define DEF_PRIORITY (6) -/* - * The swap-out function returns 1 if it successfully - * scanned all the pages it was asked to (`count'). - * It returns zero if it couldn't do anything, - * - * rss may decrease because pages are shared, but this - * doesn't count as having freed a page. - */ +int vm_static_inactive_target; -/* mm->page_table_lock is held. mmap_sem is not held */ -static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone) +static inline void age_page_up(struct page *page) { - pte_t pte; - swp_entry_t entry; + page->age = min((int) (page->age + PAGE_AGE_ADV), PAGE_AGE_MAX); +} - /* Don't look at this pte if it's been accessed recently. */ - if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { - mark_page_accessed(page); - return 0; - } +static inline void age_page_down(struct page *page) +{ + page->age -= min(PAGE_AGE_DECL, (int)page->age); +} - /* Don't bother unmapping pages that are active */ - if (PageActive(page)) - return 0; +/* + * Estimate whether a zone has enough inactive or free pages.. + */ +static unsigned int zone_inactive_plenty(zone_t *zone) +{ + unsigned int inactive; - /* Don't bother replenishing zones not under pressure.. */ - if (!memclass(page->zone, classzone)) + if (!zone->size) return 0; + + inactive = zone->inactive_dirty_pages; + inactive += zone->inactive_clean_pages; + inactive += zone->free_pages; - if (TryLockPage(page)) - return 0; + return (inactive > (zone->size * 2 / 5)); +} - /* From this point on, the odds are that we're going to - * nuke this pte, so read and clear the pte. This hook - * is needed on CPUs which update the accessed and dirty - * bits in hardware. - */ - flush_cache_page(vma, address); - pte = ptep_get_and_clear(page_table); - flush_tlb_page(vma, address); +#define FREE_PLENTY_FACTOR 4 +static unsigned int zone_free_plenty(zone_t *zone) +{ + unsigned int free, target; - if (pte_dirty(pte)) - set_page_dirty(page); + target = max((int) zone->pages_high, zone->need_balance); - /* - * Is the page already in the swap cache? If so, then - * we can just drop our reference to it without doing - * any IO - it's already up-to-date on disk. - */ - if (PageSwapCache(page)) { - entry.val = page->index; - swap_duplicate(entry); -set_swap_pte: - set_pte(page_table, swp_entry_to_pte(entry)); -drop_pte: - mm->rss--; - UnlockPage(page); - { - int freeable = page_count(page) - !!page->buffers <= 2; - page_cache_release(page); - return freeable; - } - } + free = zone->free_pages; + free += zone->inactive_clean_pages; - /* - * Is it a clean page? Then it must be recoverable - * by just paging it in again, and we can just drop - * it.. or if it's dirty but has backing store, - * just mark the page dirty and drop it. - * - * However, this won't actually free any real - * memory, as the page will just be in the page cache - * somewhere, and as such we should just continue - * our scan. - * - * Basically, this just makes it possible for us to do - * some real work in the future in "refill_inactive()". - */ - if (page->mapping) - goto drop_pte; - if (!PageDirty(page)) - goto drop_pte; + return free > target * FREE_PLENTY_FACTOR; +} - /* - * Anonymous buffercache pages can be left behind by - * concurrent truncate and pagefault. - */ - if (page->buffers) - goto preserve; +static unsigned int free_plenty(void) +{ + unsigned int free; - /* - * This is a dirty, swappable page. First of all, - * get a suitable swap entry for it, and make sure - * we have the swap cache set up to associate the - * page with that swap entry. - */ - for (;;) { - entry = get_swap_page(); - if (!entry.val) - break; - /* Add it to the swap cache and mark it dirty - * (adding to the page cache will clear the dirty - * and uptodate bits, so we need to do it again) - */ - if (add_to_swap_cache(page, entry) == 0) { - SetPageUptodate(page); - set_page_dirty(page); - goto set_swap_pte; - } - /* Raced with "speculative" read_swap_cache_async */ - swap_free(entry); - } + free = nr_free_pages(); + free += nr_inactive_clean_pages; - /* No swap space left */ -preserve: - set_pte(page_table, pte); - UnlockPage(page); - return 0; + return free > freepages.high * FREE_PLENTY_FACTOR; } -/* mm->page_table_lock is held. mmap_sem is not held */ -static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone) +static inline int page_mapping_inuse(struct page * page) { - pte_t * pte; - unsigned long pmd_end; + struct address_space * mapping = page->mapping; - if (pmd_none(*dir)) - return count; - if (pmd_bad(*dir)) { - pmd_ERROR(*dir); - pmd_clear(dir); - return count; - } - - pte = pte_offset(dir, address); - - pmd_end = (address + PMD_SIZE) & PMD_MASK; - if (end > pmd_end) - end = pmd_end; + /* Page is in somebody's page tables. */ + if (page->pte_chain) + return 1; - do { - if (pte_present(*pte)) { - struct page *page = pte_page(*pte); - - if (VALID_PAGE(page) && !PageReserved(page)) { - count -= try_to_swap_out(mm, vma, address, pte, page, classzone); - if (!count) { - address += PAGE_SIZE; - break; - } - } - } - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); - mm->swap_address = address; - return count; -} - -/* mm->page_table_lock is held. mmap_sem is not held */ -static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone) -{ - pmd_t * pmd; - unsigned long pgd_end; - - if (pgd_none(*dir)) - return count; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return count; - } + /* XXX: does this happen ? */ + if (!mapping) + return 0; - pmd = pmd_offset(dir, address); + /* File is mmaped by somebody. */ + if (mapping->i_mmap || mapping->i_mmap_shared) + return 1; - pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK; - if (pgd_end && (end > pgd_end)) - end = pgd_end; - - do { - count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone); - if (!count) - break; - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return count; -} - -/* mm->page_table_lock is held. mmap_sem is not held */ -static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count, zone_t * classzone) -{ - pgd_t *pgdir; - unsigned long end; - - /* Don't swap out areas which are reserved */ - if (vma->vm_flags & VM_RESERVED) - return count; - - pgdir = pgd_offset(mm, address); - - end = vma->vm_end; - if (address >= end) - BUG(); - do { - count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone); - if (!count) - break; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - pgdir++; - } while (address && (address < end)); - return count; + return 0; } -/* Placeholder for swap_out(): may be updated by fork.c:mmput() */ -struct mm_struct *swap_mm = &init_mm; - -/* - * Returns remaining count of pages to be swapped out by followup call. +/** + * reclaim_page - reclaims one page from the inactive_clean list + * @zone: reclaim a page from this zone + * + * The pages on the inactive_clean can be instantly reclaimed. + * The tests look impressive, but most of the time we'll grab + * the first page of the list and exit successfully. */ -static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone) +struct page * reclaim_page(zone_t * zone) { - unsigned long address; - struct vm_area_struct* vma; + struct page * page = NULL; + struct list_head * page_lru; + swp_entry_t entry = {0}; + int maxscan; /* - * Find the proper vm-area after freezing the vma chain - * and ptes. + * We need to hold the pagecache_lock around all tests to make sure + * reclaim_page() cannot race with find_get_page() and friends. */ - spin_lock(&mm->page_table_lock); - address = mm->swap_address; - if (address == TASK_SIZE || swap_mm != mm) { - /* We raced: don't count this mm but try again */ - ++*mmcounter; - goto out_unlock; - } - vma = find_vma(mm, address); - if (vma) { - if (address < vma->vm_start) - address = vma->vm_start; - - for (;;) { - count = swap_out_vma(mm, vma, address, count, classzone); - vma = vma->vm_next; - if (!vma) - break; - if (!count) - goto out_unlock; - address = vma->vm_start; + spin_lock(&pagemap_lru_lock); + spin_lock(&pagecache_lock); + maxscan = zone->inactive_clean_pages; + while ((page_lru = zone->inactive_clean_list.prev) != + &zone->inactive_clean_list && maxscan--) { + page = list_entry(page_lru, struct page, lru); + + /* Wrong page on list?! (list corruption, should not happen) */ + if (unlikely(!PageInactiveClean(page))) { + printk("VM: reclaim_page, wrong page on list.\n"); + list_del(page_lru); + PageZone(page)->inactive_clean_pages--; + continue; } - } - /* Indicate that we reached the end of address space */ - mm->swap_address = TASK_SIZE; -out_unlock: - spin_unlock(&mm->page_table_lock); - return count; -} + /* Page is being freed */ + if (unlikely(page_count(page)) == 0) { + list_del(page_lru); + list_add(page_lru, &zone->inactive_clean_list); + continue; + } -static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone)); -static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone) -{ - int counter, nr_pages = SWAP_CLUSTER_MAX; - struct mm_struct *mm; + /* Page cannot be reclaimed ? Move to inactive_dirty list. */ + if (unlikely(page->pte_chain || page->buffers || + PageReferenced(page) || PageDirty(page) || + page_count(page) > 1 || TryLockPage(page))) { + del_page_from_inactive_clean_list(page); + add_page_to_inactive_dirty_list(page); + continue; + } - counter = mmlist_nr; - do { - if (unlikely(current->need_resched)) { - __set_current_state(TASK_RUNNING); - schedule(); + /* OK, remove the page from the caches. */ + if (PageSwapCache(page)) { + entry.val = page->index; + __delete_from_swap_cache(page); + goto found_page; } - spin_lock(&mmlist_lock); - mm = swap_mm; - while (mm->swap_address == TASK_SIZE || mm == &init_mm) { - mm->swap_address = 0; - mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist); - if (mm == swap_mm) - goto empty; - swap_mm = mm; + if (page->mapping) { + __remove_inode_page(page); + goto found_page; } - /* Make sure the mm doesn't disappear when we drop the lock.. */ - atomic_inc(&mm->mm_users); - spin_unlock(&mmlist_lock); + /* We should never ever get here. */ + printk(KERN_ERR "VM: reclaim_page, found unknown page\n"); + list_del(page_lru); + zone->inactive_clean_pages--; + UnlockPage(page); + } + spin_unlock(&pagecache_lock); + spin_unlock(&pagemap_lru_lock); + return NULL; - nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone); +found_page: + del_page_from_inactive_clean_list(page); + spin_unlock(&pagecache_lock); + spin_unlock(&pagemap_lru_lock); + if (entry.val) + swap_free(entry); + UnlockPage(page); + page->age = PAGE_AGE_START; + if (page_count(page) != 1) + printk("VM: reclaim_page, found page with count %d!\n", + page_count(page)); + return page; +} - mmput(mm); +static inline int page_dirty(struct page *page) +{ + struct buffer_head *tmp, *bh; - if (!nr_pages) - return 1; - } while (--counter >= 0); + if (PageDirty(page)) + return 1; - return 0; + if (page->mapping && !page->buffers) + return 0; + + tmp = bh = page->buffers; + + do { + if (tmp->b_state & ((1<b_this_page; + } while (tmp != bh); -empty: - spin_unlock(&mmlist_lock); return 0; } -static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority)); -static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) +/** + * page_launder - clean dirty inactive pages, move to inactive_clean list + * @gfp_mask: what operations we are allowed to do + * @sync: are we allowed to do synchronous IO in emergencies ? + * + * This function is called when we are low on free / inactive_clean + * pages, its purpose is to refill the free/clean list as efficiently + * as possible. + * + * This means we do writes asynchronously as long as possible and will + * only sleep on IO when we don't have another option. Since writeouts + * cause disk seeks and make read IO slower, we skip writes alltogether + * when the amount of dirty pages is small. + * + * This code is heavily inspired by the FreeBSD source code. Thanks + * go out to Matthew Dillon. + */ +#define CAN_DO_FS ((gfp_mask & __GFP_FS) && should_write) +#define WRITE_LOW_WATER 5 +#define WRITE_HIGH_WATER 10 +int page_launder(int gfp_mask) { + int maxscan, cleaned_pages; struct list_head * entry; - int max_scan = nr_inactive_pages / priority; - int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10); + cleaned_pages = 0; + + /* The main launder loop. */ spin_lock(&pagemap_lru_lock); - while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { + maxscan = nr_inactive_dirty_pages; + while (--maxscan >= 0 && (entry = inactive_dirty_list.prev) != &inactive_dirty_list) { struct page * page; - if (unlikely(current->need_resched)) { - spin_unlock(&pagemap_lru_lock); - __set_current_state(TASK_RUNNING); - schedule(); - spin_lock(&pagemap_lru_lock); - continue; - } - page = list_entry(entry, struct page, lru); - if (unlikely(!PageLRU(page))) - BUG(); - if (unlikely(PageActive(page))) - BUG(); - list_del(entry); - list_add(entry, &inactive_list); + list_add(entry, &inactive_dirty_list); + + /* Wrong page on list?! (list corruption, should not happen) */ + if (!PageInactiveDirty(page)) { + printk("VM: page_launder, wrong page on list.\n"); + list_del(entry); + nr_inactive_dirty_pages--; + PageZone(page)->inactive_dirty_pages--; + continue; + } /* - * Zero page counts can happen because we unlink the pages - * _after_ decrementing the usage count.. + * The page is in active use or really unfreeable. Move to + * the active list and adjust the page age if needed. */ - if (unlikely(!page_count(page))) + if ((page_referenced(page) || page->age) && + page_mapping_inuse(page)) { + del_page_from_inactive_dirty_list(page); + add_page_to_active_list(page); + page->age = max((int)page->age, PAGE_AGE_START); continue; + } - if (!memclass(page->zone, classzone)) + /* + * The page is still in the page tables of some process, + * move it to the active list but leave page age at 0; + * either swap_out() will make it freeable soon or it is + * mlock()ed... + * + * The !PageLocked() test is to protect us from ourselves, + * see the code around the writepage() call. + */ + if ((page_count(page) > (1 + !!page->buffers)) && + !PageLocked(page)) { + del_page_from_inactive_dirty_list(page); + add_page_to_active_list(page); continue; + } - /* Racy check to avoid trylocking when not worthwhile */ - if (!page->buffers && (page_count(page) != 1 || !page->mapping)) - goto page_mapped; + /* + * If this zone has plenty of pages free, don't spend time + * on cleaning it but only move clean pages out of the way + * so we won't have to scan those again. + */ + if (zone_free_plenty(PageZone(page)) || page_count(page) == 0) { + continue; + } /* * The page is locked. IO in progress? @@ -391,12 +309,49 @@ continue; } - if (PageDirty(page) && is_page_cache_freeable(page) && page->mapping) { + /* + * Anonymous process memory without backing store. Try to + * allocate it some swap space here. + * + * XXX: implement swap clustering ? + */ + if (page->pte_chain && !page->mapping && !page->buffers) { + page_cache_get(page); + spin_unlock(&pagemap_lru_lock); + if (!add_to_swap(page)) { + activate_page(page); + UnlockPage(page); + page_cache_release(page); + spin_lock(&pagemap_lru_lock); + continue; + } + page_cache_release(page); + spin_lock(&pagemap_lru_lock); + } + + /* + * The page is mapped into the page tables of one or more + * processes. Try to unmap it here. + */ + if (page->pte_chain) { + switch (try_to_unmap(page)) { + case SWAP_ERROR: + case SWAP_FAIL: + goto page_active; + case SWAP_AGAIN: + UnlockPage(page); + continue; + case SWAP_SUCCESS: + ; /* try to free the page below */ + } + } + + if (PageDirty(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag - * on the phisical page after having successfully + * on the physical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ @@ -425,7 +380,7 @@ if (page->buffers) { spin_unlock(&pagemap_lru_lock); - /* avoid to free a locked page */ + /* To avoid freeing our page before we're done. */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { @@ -443,14 +398,14 @@ /* effectively free the page here */ page_cache_release(page); - if (--nr_pages) - continue; - break; + cleaned_pages++; + continue; } else { /* - * The page is still in pagecache so undo the stuff - * before the try_to_release_page since we've not - * finished and we can now try the next step. + * We freed the buffers but may have + * slept; undo the stuff we did before + * try_to_release_page and fall through + * to the next step. */ page_cache_release(page); @@ -466,224 +421,279 @@ } } - spin_lock(&pagecache_lock); /* - * this is the non-racy check for busy page. + * If the page is really freeable now, move it to the + * inactive_clean list. + * + * We re-test everything since the page could have been + * used by somebody else while we waited on IO above. + * This test is not safe from races, but only the one + * in reclaim_page() needs to be. */ - if (!page->mapping || !is_page_cache_freeable(page)) { - spin_unlock(&pagecache_lock); + if (page->mapping && !PageDirty(page) && !page->pte_chain && + page_count(page) == 1) { + del_page_from_inactive_dirty_list(page); + add_page_to_inactive_clean_list(page); UnlockPage(page); -page_mapped: - if (--max_mapped >= 0) - continue; - + cleaned_pages++; + } else { /* - * Alert! We've found too many mapped pages on the - * inactive list, so we start swapping out now! + * OK, we don't know what to do with the page. + * It's no use keeping it here, so we move it to + * the active list. */ - spin_unlock(&pagemap_lru_lock); - swap_out(priority, gfp_mask, classzone); - return nr_pages; - } - - /* - * It is critical to check PageDirty _after_ we made sure - * the page is freeable* so not in use by anybody. - */ - if (PageDirty(page)) { - spin_unlock(&pagecache_lock); +page_active: + del_page_from_inactive_dirty_list(page); + add_page_to_active_list(page); UnlockPage(page); - continue; } - - /* point of no return */ - if (likely(!PageSwapCache(page))) { - __remove_inode_page(page); - spin_unlock(&pagecache_lock); - } else { - swp_entry_t swap; - swap.val = page->index; - __delete_from_swap_cache(page); - spin_unlock(&pagecache_lock); - swap_free(swap); - } - - __lru_cache_del(page); - UnlockPage(page); - - /* effectively free the page here */ - page_cache_release(page); - - if (--nr_pages) - continue; - break; } spin_unlock(&pagemap_lru_lock); - return nr_pages; + /* Return the number of pages moved to the inactive_clean list. */ + return cleaned_pages; } -/* - * This moves pages from the active list to - * the inactive list. +/** + * refill_inactive - scan the active list and find pages to deactivate + * @priority: how much are we allowed to scan * - * We move them the other way when we see the - * reference bit on the page. + * This function will scan a portion of the active list to find + * unused pages, those pages will then be moved to the inactive list. */ -static void refill_inactive(int nr_pages) +int refill_inactive(int priority) { - struct list_head * entry; + struct list_head * page_lru; + struct page * page; + int maxscan = nr_active_pages >> priority; + int nr_deactivated = 0; + /* Take the lock while messing with the list... */ spin_lock(&pagemap_lru_lock); - entry = active_list.prev; - while (nr_pages-- && entry != &active_list) { - struct page * page; + while (maxscan-- > 0 && (page_lru = active_list.prev) != &active_list) { + page = list_entry(page_lru, struct page, lru); - page = list_entry(entry, struct page, lru); - entry = entry->prev; - if (PageTestandClearReferenced(page)) { - list_del(&page->lru); - list_add(&page->lru, &active_list); + /* Wrong page on list?! (list corruption, should not happen) */ + if (unlikely(!PageActive(page))) { + printk("VM: refill_inactive, wrong page on list.\n"); + list_del(page_lru); + nr_active_pages--; continue; } - del_page_from_active_list(page); - add_page_to_inactive_list(page); - SetPageReferenced(page); + /* + * Do aging on the pages. Every time a page is referenced, + * page->age gets incremented. If it wasn't referenced, we + * decrement page->age. The page gets moved to the inactive + * list when one of the following is true: + * - the page age reaches 0 + * - the object the page belongs to isn't in active use + * - the object the page belongs to is hogging the cache + */ + if (PageTestandClearReferenced(page)) { + age_page_up(page); + } else { + age_page_down(page); + } + + /* + * Don't deactivate pages from zones which have + * plenty inactive pages. + */ + if (unlikely(zone_inactive_plenty(PageZone(page)) && + zone_free_plenty(PageZone(page)))) { + goto skip_page; + } + + /* + * If the page age is 'hot' AND the object the page + * is in is still in use, we keep the page. Otherwise + * we move it to the inactive_dirty list. + */ + if (page->age && page_mapping_inuse(page)) { +skip_page: + list_del(page_lru); + list_add(page_lru, &active_list); + } else { + deactivate_page_nolock(page); + nr_deactivated++; + } + + /* Low latency reschedule point. */ + if (unlikely(current->need_resched)) { + spin_unlock(&pagemap_lru_lock); + __set_current_state(TASK_RUNNING); + schedule(); + if (!inactive_shortage()) + return 1; + spin_lock(&pagemap_lru_lock); + } } spin_unlock(&pagemap_lru_lock); + + return nr_deactivated; } -static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)); -static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages) +/* + * Check if there are zones with a severe shortage of free pages, + * or if all zones have a minor shortage. + */ +int free_shortage(void) { - int chunk_size = nr_pages; - unsigned long ratio; + pg_data_t *pgdat; + unsigned int global_free = 0; + unsigned int global_target = freepages.high; - nr_pages -= kmem_cache_reap(gfp_mask); - if (nr_pages <= 0) - return 0; + /* Are we low on free pages anywhere? */ + pgdat = pgdat_list; + do { + int i; + for(i = 0; i < MAX_NR_ZONES; i++) { + zone_t *zone = pgdat->node_zones+ i; + unsigned int free; - nr_pages = chunk_size; - /* try to keep the active list 2/3 of the size of the cache */ - ratio = (unsigned long) nr_pages * nr_active_pages / ((nr_inactive_pages + 1) * 2); - refill_inactive(ratio); + if (!zone->size) + continue; - nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority); - if (nr_pages <= 0) - return 0; + free = zone->free_pages; + free += zone->inactive_clean_pages; - shrink_dcache_memory(priority, gfp_mask); - shrink_icache_memory(priority, gfp_mask); -#ifdef CONFIG_QUOTA - shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); -#endif + /* Local shortage? */ + if (free < zone->pages_low) + return 1; - return nr_pages; + global_free += free; + } + pgdat = pgdat->node_next; + } while (pgdat); + + /* Global shortage? */ + return global_free < global_target; } -int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order) +static inline unsigned int inactive_target(void) { - int priority = DEF_PRIORITY; - int nr_pages = SWAP_CLUSTER_MAX; + unsigned int mem; - do { - nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages); - if (nr_pages <= 0) - return 1; - } while (--priority); + mem = nr_active_pages; + mem += nr_inactive_dirty_pages; + mem += nr_inactive_clean_pages; - /* - * Hmm.. Cache shrink failed - time to kill something? - * Mhwahahhaha! This is the part I really like. Giggle. - */ - out_of_memory(); - return 0; + return mem / 4; } -DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); - -static int check_classzone_need_balance(zone_t * classzone) +/* + * Are we low on inactive pages globally or in any zone? + */ +int inactive_shortage(void) { - zone_t * first_classzone; + pg_data_t *pgdat; + unsigned int global_target = freepages.high + inactive_target(); + unsigned int global_inactive = 0; - first_classzone = classzone->zone_pgdat->node_zones; - while (classzone >= first_classzone) { - if (classzone->free_pages > classzone->pages_high) - return 0; - classzone--; - } - return 1; -} + pgdat = pgdat_list; + do { + int i; + for(i = 0; i < MAX_NR_ZONES; i++) { + zone_t *zone = pgdat->node_zones + i; + unsigned int inactive, target; -static int kswapd_balance_pgdat(pg_data_t * pgdat) -{ - int need_more_balance = 0, i; - zone_t * zone; + if (!zone->size) + continue; - for (i = pgdat->nr_zones-1; i >= 0; i--) { - zone = pgdat->node_zones + i; - if (unlikely(current->need_resched)) - schedule(); - if (!zone->need_balance) - continue; - if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) { - zone->need_balance = 0; - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - continue; + inactive = zone->inactive_dirty_pages; + inactive += zone->inactive_clean_pages; + inactive += zone->free_pages; + + target = max((int) zone->pages_high, zone->need_balance); + /* Local shortage? */ + if (inactive < target) + return 1; + + global_inactive += inactive; } - if (check_classzone_need_balance(zone)) - need_more_balance = 1; - else - zone->need_balance = 0; - } + pgdat = pgdat->node_next; + } while (pgdat); - return need_more_balance; + /* Global shortage? */ + return global_inactive < global_target; } -static void kswapd_balance(void) +/* + * Worker function for kswapd and try_to_free_pages, we get + * called whenever there is a shortage of free/inactive_clean + * pages. + * + * This function will also move pages to the inactive list, + * if needed. + */ +static int do_try_to_free_pages(unsigned int gfp_mask) { - int need_more_balance; - pg_data_t * pgdat; + int ret = 0; - do { - need_more_balance = 0; - pgdat = pgdat_list; - do - need_more_balance |= kswapd_balance_pgdat(pgdat); - while ((pgdat = pgdat->node_next)); - } while (need_more_balance); -} + /* + * Eat memory from filesystem page cache, buffer cache, + * dentry, inode and filesystem quota caches. + */ + ret += page_launder(gfp_mask); + shrink_dcache_memory(DEF_PRIORITY, gfp_mask); + shrink_icache_memory(1, gfp_mask); +#ifdef CONFIG_QUOTA + shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); +#endif -static int kswapd_can_sleep_pgdat(pg_data_t * pgdat) -{ - zone_t * zone; - int i; + /* + * If needed, we move pages from the active list + * to the inactive list. + */ + if (inactive_shortage() || free_shortage()) + ret += refill_inactive(0); - for (i = pgdat->nr_zones-1; i >= 0; i--) { - zone = pgdat->node_zones + i; - if (!zone->need_balance) - continue; - return 0; - } + /* + * Reclaim unused slab cache memory. + */ + kmem_cache_reap(gfp_mask); - return 1; + /* + * Hmm.. Cache shrink failed - time to kill something? + * Mhwahahhaha! This is the part I really like. Giggle. + */ + if (!ret) + out_of_memory(); + + return ret; } -static int kswapd_can_sleep(void) -{ - pg_data_t * pgdat; +DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); - pgdat = pgdat_list; +/* + * Move some pages from the inactive_clean lists to the free + * lists so atomic allocations have pages to work from. + * + * We refill the freelist in a bump from pages_min to pages_low + * in order to give the buddy allocator something to play with. + */ +static void refill_freelist(void) +{ + pg_data_t * pgdat = pgdat_list; + int i; do { - if (kswapd_can_sleep_pgdat(pgdat)) - continue; - return 0; - } while ((pgdat = pgdat->node_next)); + for(i = 0; i < MAX_NR_ZONES; i++) { + zone_t *zone = pgdat->node_zones + i; + if (!zone->size || zone->free_pages >= zone->pages_min) + continue; - return 1; + while (zone->free_pages < zone->pages_low) { + struct page * page; + page = reclaim_page(zone); + if (!page) + break; + __free_page(page); + } + } + pgdat = pgdat->node_next; + } while (pgdat); } /* @@ -702,7 +712,6 @@ int kswapd(void *unused) { struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); daemonize(); strcpy(tsk->comm, "kswapd"); @@ -726,24 +735,65 @@ * Kswapd main loop. */ for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kswapd_wait, &wait); + static long recalc = 0; - mb(); - if (kswapd_can_sleep()) - schedule(); + /* + * We try to rebalance the VM either when we are short + * on free pages or when we have a shortage of inactive + * pages and are getting low on free pages. + */ + if (free_shortage() || (inactive_shortage() && !free_plenty())) + do_try_to_free_pages(GFP_KSWAPD); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&kswapd_wait, &wait); + refill_freelist(); - /* - * If we actually get into a low-memory situation, - * the processes needing more memory will wake us - * up on a more timely basis. + /* Once a second ... */ + if (time_after(jiffies, recalc + HZ)) { + recalc = jiffies; + + /* Do background page aging. */ + refill_inactive(DEF_PRIORITY); + } + + /* + * We go to sleep if either the free page shortage + * or the inactive page shortage is gone. We do this + * because: + * 1) we need no more free pages or + * 2) the inactive pages need to be flushed to disk, + * it wouldn't help to eat CPU time now ... + * + * We go to sleep for one second, but if it's needed + * we'll be woken up earlier... */ - kswapd_balance(); - run_task_queue(&tq_disk); + if (!free_shortage() || !inactive_shortage()) { + interruptible_sleep_on_timeout(&kswapd_wait, HZ); + } + } +} + +void wakeup_kswapd(void) +{ + if (waitqueue_active(&kswapd_wait)) + wake_up_interruptible(&kswapd_wait); +} + +/* + * Called by non-kswapd processes when they want more + * memory but are unable to sleep on kswapd because + * they might be holding some IO locks ... + */ +int try_to_free_pages(unsigned int gfp_mask) +{ + int ret = 1; + + if (gfp_mask & __GFP_WAIT) { + current->flags |= PF_MEMALLOC; + ret = do_try_to_free_pages(gfp_mask); + current->flags &= ~PF_MEMALLOC; } + + return ret; } static int __init kswapd_init(void) diff -urN linux-2.4.17-rc2-virgin/net/socket.c linux-2.4.17-rc2-wli2/net/socket.c --- linux-2.4.17-rc2-virgin/net/socket.c Tue Dec 18 23:18:04 2001 +++ linux-2.4.17-rc2-wli2/net/socket.c Tue Dec 18 22:28:47 2001 @@ -133,7 +133,7 @@ static struct net_proto_family *net_families[NPROTO]; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t net_family_lockct = ATOMIC_INIT(0); static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;