Index: linux/Documentation/RCU/proc.txt =================================================================== --- /dev/null +++ linux/Documentation/RCU/proc.txt @@ -0,0 +1,207 @@ +/proc Filesystem Entries for RCU + + +CONFIG_RCU_STATS + +The CONFIG_RCU_STATS config option is available only in conjunction with +CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs, +rcuptrs, rcugp, and rcustats. + +/proc/rcuctrs + + CPU last cur + 0 1 1 + 1 1 1 + 2 1 1 + 3 0 2 + ggp = 230725 + +This displays the number of processes that started RCU read-side critical +sections on each CPU. In absence of preemption, the "last" and "cur" +counts for a given CPU will always sum to one. Therefore, in the example +output above, each CPU has started one RCU read-side critical section +that was later preempted. The "last" column counts RCU read-side critical +sections that started prior to the last counter flip, while the "cur" +column counts critical sections that started after the last counter flip. + +The "ggp" count is a count of the number of counter flips since boot. +Since this is shown as an odd number, the "cur" counts are stored in +the zero-th element of each of the per-CPU arrays, and the "last" counts +are stored in the first element of each of the per-CPU arrays. + + +/proc/rcuptrs + + nl=c04c7160/c04c7960 nt=c04c72d0 + wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170 + +This displays the head and tail of each of CONFIG_PREEMPT_RCU's three +callback lists. This will soon change to display this on a per-CPU +basis, since each CPU will soon have its own set of callback lists. +In the example above, the "next" list header is located at hex address +0xc04c7160, the first element on the list at hex address 0xc04c7960, +and the last element on the list at hex address 0xc04c72d0. The "wl=" +and "wt=" output is similar for the "wait" list, and the "dl=" and "dt=" +output for the "done" list. The "done" list is normally emptied very +quickly after being filled, so will usually be empty as shown above. +Note that the tail pointer points into the list header in this case. + +Callbacks are placed in the "next" list by call_rcu(), moved to the +"wait" list after the next counter flip, and moved to the "done" list +on the counter flip after that. Once on the "done" list, the callbacks +are invoked. + + +/proc/rcugp + + oldggp=241419 newggp=241421 + +This entry invokes synchronize_rcu() and prints out the number of counter +flips since boot before and after the synchronize_rcu(). These two +numbers will always differ by at least two. Unless RCU is broken. ;-) + + +/proc/rcustats + + ggp=242416 lgp=242416 sr=0 rcc=396233 + na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920 + rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746 + +The quantities printed are as follows: + +o "ggp=": The number of flips since boot. + +o "lgp=": The number of flips sensed by the local structure since + boot. This will soon be per-CPU. + +o "sr=": The number of explicit call to synchronize_rcu(). + Except that this is currently broken, so always reads as zero. + It is likely to be removed... + +o "rcc=": The number of calls to rcu_check_callbacks(). + +o "na=": The number of callbacks that call_rcu() has registered + since boot. + +o "nl=": The number of callbacks currently on the "next" list. + +o "wa=": The number of callbacks that have moved to the "wait" + list since boot. + +o "wl=": The number of callbacks currently on the "wait" list. + +o "da=": The number of callbacks that have been moved to the + "done" list since boot. + +o "dl=": The number of callbacks currently on the "done" list. + +o "dr=": The number of callbacks that have been removed from the + "done" list since boot. + +o "di=": The number of callbacks that have been invoked after being + removed from the "done" list. + +o "rtf1=": The number of attempts to flip the counters. + +o "rtf2=": The number of attempts to flip the counters that successfully + acquired the fliplock. + +o "rtf3=": The number of successful counter flips. + +o "rtfe1=": The number of attempts to flip the counters that failed + due to the lock being held by someone else. + +o "rtfe2=": The number of attempts to flip the counters that were + abandoned due to someone else doing the job for us. + +o "rtfe3=": The number of attempts to flip the counters that failed + due to some task still being in an RCU read-side critical section + starting from before the last successful counter flip. + + +CONFIG_RCU_TORTURE_TEST + +The CONFIG_RCU_TORTURE_TEST config option is available for all RCU +implementations. It makes three /proc entries available, namely: rcutw, +rcutr, and rcuts. + + +/proc/rcutw + +Reading this entry starts a new torture test, or ends an earlier one +if one is already in progress (in other words, there can be only one +writer at a time). This sleeps uninterruptibly, so be sure to run +it in the background. One could argue that it would be good to have +multiple writers, but Linux uses RCU heavily enough that you will get +write-side contention whether you want it or not. If you want additional +write-side contention, repeatedly create and destroy several large file +trees in parallel. Or use some other RCU-protected update. + + +/proc/rcutr + +Reading this entry starts a new torture reader, which runs until sent +a signal (e.g., control-C). If testing an RCU implementation with +preemptible read-side critical sections, make sure to spawn at least +two /proc/rcutr instances for each CPU. + + +/proc/rcuts + +Displays the current state of the torture test: + + ggp = 20961 + rtc: c04496f4 ver: 8734 tfle: 0 rta: 8734 rtaf: 0 rtf: 8715 + Reader Pipe: 88024120 63914 0 0 0 0 0 0 0 0 0 + Reader Batch: 88024097 63937 0 0 0 0 0 0 0 0 + Free-Block Circulation: 8733 8731 8729 8727 8725 8723 8721 8719 8717 8715 0 + +The entries are as follows: + +o "ggp": The number of counter flips (or batches) since boot. + +o "rtc": The hexadecimal address of the structure currently visible + to readers. + +o "ver": The number of times since boot that the rcutw writer task + has changed the structure visible to readers. + +o "tfle": If non-zero, indicates that the "torture freelist" + containing structure to be placed into the "rtc" area is empty. + This condition is important, since it can fool you into thinking + that RCU is working when it is not. :-/ + +o "rta": Number of structures allocated from the torture freelist. + +o "rtaf": Number of allocations from the torture freelist that have + failed due to the list being empty. + +o "rtf": Number of frees into the torture freelist. + +o "Reader Pipe": Histogram of "ages" of structures seen by readers. + If any entries past the first two are non-zero, RCU is broken. + And /proc/rcuts prints "!!!" to make sure you notice. The age + of a newly allocated structure is zero, it becomes one when + removed from reader visibility, and is incremented once per + grace period subsequently -- and is freed after passing through + (RCU_TORTURE_PIPE_LEN-2) grace periods. + + The output displayed above was taken from a correctly working + RCU. If you want to see what it looks like when broken, break + it yourself. ;-) + +o "Reader Batch": Another histogram of "ages" of structures seen + by readers, but in terms of counter flips (or batches) rather + than in terms of grace periods. The legal number of non-zero + entries is again two. The reason for this separate view is + that it is easier to get the third entry to show up in the + "Reader Batch" list than in the "Reader Pipe" list. + +o "Free-Block Circulation": Shows the number of torture structures + that have reached a given point in the pipeline. The first element + should closely correspond to the number of structures allocated, + the second to the number that have been removed from reader view, + and all but the last remaining to the corresponding number of + passes through a grace period. The last entry should be zero, + as it is only incremented if a torture structure's counter + somehow gets incremented farther than it should. Index: linux/MAINTAINERS =================================================================== --- linux.orig/MAINTAINERS +++ linux/MAINTAINERS @@ -960,6 +960,12 @@ L: linux-nvidia@lists.surfsouth.com W: http://drama.obuda.kando.hu/~fero/cgi-bin/hgafb.shtml S: Maintained +High-Res-Timers (HRT) extension to Posix Clocks & Timers +P: George Anzinger +M: george@mvista.com +L: linux-net@vger.kernel.org +S: Supported + HIGH-SPEED SCC DRIVER FOR AX.25 P: Klaus Kudielka M: klaus.kudielka@ieee.org Index: linux/Makefile =================================================================== --- linux.orig/Makefile +++ linux/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 13 -EXTRAVERSION =-rc7 +EXTRAVERSION =-rc7-rt3 NAME=Woozy Numbat # *DOCUMENTATION* @@ -517,10 +517,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops) CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps) -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_DEBUG_INFO Index: linux/arch/arm/mach-pxa/corgi_ssp.c =================================================================== --- linux.orig/arch/arm/mach-pxa/corgi_ssp.c +++ linux/arch/arm/mach-pxa/corgi_ssp.c @@ -22,7 +22,7 @@ #include #include -static spinlock_t corgi_ssp_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(corgi_ssp_lock); static struct ssp_dev corgi_ssp_dev; static struct ssp_state corgi_ssp_state; Index: linux/arch/i386/Kconfig =================================================================== --- linux.orig/arch/i386/Kconfig +++ linux/arch/i386/Kconfig @@ -368,16 +368,6 @@ config X86_L1_CACHE_SHIFT default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 default "6" if MK7 || MK8 || MPENTIUMM -config RWSEM_GENERIC_SPINLOCK - bool - depends on M386 - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -434,7 +424,7 @@ config X86_USE_PPRO_CHECKSUM config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on (MCYRIXIII || MK7) && !PREEMPT_RT default y config X86_OOSTORE @@ -442,6 +432,17 @@ config X86_OOSTORE depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR default y +choice + prompt "Clock & Timer Selection" + default LEGACY_TIMER + help + You may have either HPET, High Resolution, or Legacy timer support. + +config LEGACY_TIMER + bool "Legacy Timer Support" + help + This chooses the legacy 8254 (PIT) for timer support. + config HPET_TIMER bool "HPET Timer Support" help @@ -451,7 +452,112 @@ config HPET_TIMER activated if the platform and the BIOS support this feature. Otherwise the 8254 will be used for timing services. - Choose N to continue using the legacy 8254 timer. +config HIGH_RES_TIMERS + bool "High Resolution Timer Support" + help + This option enables high resolution POSIX clocks and timers with + resolution of at least 1 microsecond. High resolution timers are not + free, a small overhead is incurred each time a timer that does not + expire on a 1/HZ tick boundary is used. If no such timers are used + the overhead is nil. + + The POSIX clocks CLOCK_REALTIME and CLOCK_MONOTONIC are available by + default. This option enables two additional clocks, CLOCK_REALTIME_HR + and CLOCK_MONOTONIC_HR. Note that this option does not change the + resolution of CLOCK_REALTIME or CLOCK_MONOTONIC, which remain at 1/HZ + resolution. + +endchoice + +choice + prompt "High Resolution Timer clock source" + depends on HIGH_RES_TIMERS + default HIGH_RES_TIMER_TSC + help + This option allows you to choose the wall clock timer for your + system. With high resolution timers on the x86 platforms it + is best to keep the interrupt-generating timer separate from + the time-keeping timer. On x86 platforms there are two + possible sources implemented for the wall clock. These are: + + + ACPI power management (pm) timer ~280 nanoseconds + TSC (Time Stamp Counter) 1/CPU clock + + The PIT is always used to generate clock interrupts, but in + SMP systems the APIC timers are used to drive the timer list + code. This means that in SMP systems the PIT will not be + programmed to generate arch_cycle events and can give + reasonable service as the clock interrupt. In non-SMP (UP) + systems it will be programmed to interrupt when the next timer + is to expire or on the next 1/HZ tick. + + The TSC runs at the CPU clock rate (i.e. its resolution is + 1/CPU clock) and it has a very low access time. However, it + is subject, in some (incorrect) processors, to throttling to + cool the CPU, and to other slowdowns during power management. + If your system has power managment code active these changes + are tracked by the TSC timer code. If your CPU is correct and + does not change the TSC frequency for throttling or power + management outside of the power managment kernel code, this is + the best clock timer. + + The ACPI pm timer is available on systems with Advanced + Configuration and Power Interface support. The pm timer is + available on these systems even if you don't use or enable + ACPI in the software or the BIOS (but see Default ACPI pm + timer address). The timer has a resolution of about 280 + nanoseconds, however, the access time is a bit higher than + that of the TSC. Since it is part of ACPI it is intended to + keep track of time while the system is under power management, + thus it is not subject to the power management problems of the + TSC. + + If you enable the ACPI pm timer and it cannot be found, it is + possible that your BIOS is not producing the ACPI table or + that your machine does not support ACPI. In the former case, + see "Default ACPI pm timer address". If the timer is not + found the boot will fail when trying to calibrate the 'delay' + loop. + +# config HIGH_RES_TIMER_ACPI_PM +# bool "ACPI-pm-timer" + +config HIGH_RES_TIMER_TSC + bool "Time-stamp-counter/TSC" + depends on X86_TSC + +endchoice + +config HIGH_RES_RESOLUTION + int "High Resolution Timer resolution (nanoseconds)" + depends on HIGH_RES_TIMERS + default 1000 + help + This sets the resolution in nanoseconds of the CLOCK_REALTIME_HR and + CLOCK_MONOTONIC_HR timers. Too fine a resolution (small a number) + will usually not be observable due to normal system latencies. For an + 800 MHz processor about 10,000 (10 microseconds) is recommended as a + finest resolution. If you don't need that sort of resolution, + larger values may generate less overhead. + +config HIGH_RES_TIMER_ACPI_PM_ADD + int "Default ACPI pm timer address" + depends on HIGH_RES_TIMER_ACPI_PM + default 0 + help + This option is available for use on systems where the BIOS + does not generate the ACPI tables if ACPI is not enabled. For + example some BIOSes will not generate the ACPI tables if APM + is enabled. The ACPI pm timer is still available but cannot + be found by the software. This option allows you to supply + the needed address. When the high resolution timers code + finds a valid ACPI pm timer address it reports it in the boot + messages log (look for lines that begin with + "High-res-timers:"). You can turn on the ACPI support in the + BIOS, boot the system and find this value. You can then enter + it at configure time. Both the report and the entry are in + decimal. config HPET_EMULATE_RTC bool @@ -513,6 +619,20 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + default y + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) @@ -548,6 +668,16 @@ config X86_IO_APIC depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) default y +config X86_IOAPIC_FAST + bool "enhanced IO-APIC support" + depends on X86_IO_APIC + default y + help + this option will activate further optimizations in the IO-APIC + code. NOTE: this is experimental code, and disabled by default. + Symptoms of non-working systems are boot-time lockups, stray or + screaming interrupts and other interrupt related weirdnesses. + config X86_VISWS_APIC bool depends on X86_VISWS @@ -913,7 +1043,7 @@ config BOOT_IOREMAP config REGPARM bool "Use register arguments (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && !MCOUNT default n help Compile the kernel with -mregparm=3. This uses a different ABI Index: linux/arch/i386/Kconfig.debug =================================================================== --- linux.orig/arch/i386/Kconfig.debug +++ linux/arch/i386/Kconfig.debug @@ -18,6 +18,7 @@ config EARLY_PRINTK config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + default y help This option will cause messages to be printed if free stack space drops below a certain limit. @@ -35,6 +36,7 @@ config KPROBES config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL + default y help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. Index: linux/arch/i386/boot/compressed/misc.c =================================================================== --- linux.orig/arch/i386/boot/compressed/misc.c +++ linux/arch/i386/boot/compressed/misc.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_MCOUNT +void notrace mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -112,7 +118,7 @@ static long free_mem_end_ptr; #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; Index: linux/arch/i386/kernel/Makefile =================================================================== --- linux.orig/arch/i386/kernel/Makefile +++ linux/arch/i386/kernel/Makefile @@ -4,11 +4,12 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ doublefault.o quirks.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ @@ -20,6 +21,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o Index: linux/arch/i386/kernel/apic.c =================================================================== --- linux.orig/arch/i386/kernel/apic.c +++ linux/arch/i386/kernel/apic.c @@ -36,11 +36,18 @@ #include #include #include +#include #include #include "io_ports.h" +#ifndef CONFIG_HIGH_RES_TIMERS +#define compute_latch(a) +#else +extern void apic_timer_ipi_interrupt(struct pt_regs regs); +#endif + /* * Knob to control our willingness to enable the local APIC. */ @@ -76,6 +83,13 @@ void __init apic_intr_init(void) { #ifdef CONFIG_SMP smp_intr_init(); +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * We let the PIT timer interrupt trigger all CPU's + * for profiling and update_system_time + */ + set_intr_gate(LOCAL_TIMER_IPI_VECTOR, apic_timer_ipi_interrupt); +#endif #endif /* self generated IPI for local APIC timer */ set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -90,12 +104,11 @@ void __init apic_intr_init(void) #endif } -/* Using APIC to generate smp_local_timer_interrupt? */ -int using_apic_timer = 0; - +#ifndef CONFIG_HIGH_RES_TIMERS static DEFINE_PER_CPU(int, prof_multiplier) = 1; static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; static DEFINE_PER_CPU(int, prof_counter) = 1; +#endif static int enabled_via_apicbase; @@ -566,9 +579,9 @@ void lapic_shutdown(void) if (!cpu_has_apic || !enabled_via_apicbase) return; - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_PM @@ -612,9 +625,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -626,7 +639,7 @@ static int lapic_resume(struct sys_devic if (!apic_pm_state.active) return 0; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Make sure the APICBASE points to the right address @@ -657,7 +670,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -850,10 +863,10 @@ fake_ioapic_page: ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); + set_fixmap_nocache(idx, ioapic_phys); + printk(KERN_DEBUG "faked IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); idx++; } } @@ -926,13 +939,24 @@ void (*wait_timer_tick)(void) __devinitd */ #define APIC_DIVISOR 16 - +/* + * For high res timers we want a single shot timer. + * This means, for profiling, that we must load it each + * interrupt, but it works best for timers as a one shot and + * it is little overhead for the profiling which, we hope is + * not done that often, nor on production machines. + */ static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + lvtt_value = +#ifndef CONFIG_HIGH_RES_TIMERS + APIC_LVT_TIMER_PERIODIC | +#endif + LOCAL_TIMER_VECTOR; + if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); @@ -952,7 +976,7 @@ static void __devinit setup_APIC_timer(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Wait for IRQ0's slice: @@ -961,7 +985,7 @@ static void __devinit setup_APIC_timer(u __setup_APIC_LVTT(clocks); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -1050,7 +1074,7 @@ void __init setup_boot_APIC_clock(void) apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -1058,7 +1082,9 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + compute_latch(calibration_result / APIC_DIVISOR); + + raw_local_irq_enable(); } void __devinit setup_secondary_APIC_clock(void) @@ -1086,6 +1112,87 @@ void enable_APIC_timer(void) } } + +/* High resolution timer specific quirks */ +#ifdef CONFIG_HIGH_RES_TIMERS + +#ifdef CONFIG_SMP +/* + * We use the lapic timer for hrt and therefor discard + * the variable profiling + */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * This code ONLY takes IPI interrupts from the PIT interrupt handler + */ +fastcall notrace void smp_apic_timer_ipi_interrupt(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + /* + * the NMI deadlock-detector uses this. + */ + per_cpu(irq_stat, cpu).apic_timer_irqs++; + + trace_special(regs->eip, 0, 0); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + irq_enter(); + // we profile via the NMI +#if 0 + profile_tick(CPU_PROFILING, regs); +#endif + update_process_times(user_mode(regs)); + irq_exit(); + +} +#endif + +void reload_apic_timer(unsigned int clocks) +{ + apic_write_around(APIC_TMICT, clocks); +} + +/* + * We use the local apic timer for high resolution timer interrups + * This moves profiling and update process times into the PIT. + * + * We lose the variable multiplier of profiling, .... + * +*/ +inline void smp_local_timer_interrupt(struct pt_regs * regs) +{ + /* + * This is ugly, but we _must_ protect the hr interrupt code + * against corruption of jiffies because the !using_apic_timer + * path in do_timer_interrupt calls the hr interrupt code with + * xtime lock held too. We dont know at compile time whether + * we can use the local apic or not, so we have to do the + * locking here. + */ + write_seqlock(&xtime_lock); + if (highres_active) { + do_hr_timer_int(); + } + write_sequnlock(&xtime_lock); +} + +#else /* !HIGH_RES_TIMERS */ + +#ifdef CONFIG_SMP /* * the frequency of the profiling timer can be changed * by writing a multiplier value into /proc/profile. @@ -1113,8 +1220,7 @@ int setup_profiling_timer(unsigned int m return 0; } - -#undef APIC_DIVISOR +#endif /* * Local timer interrupt handler. It does both profiling and @@ -1130,7 +1236,11 @@ inline void smp_local_timer_interrupt(st { int cpu = smp_processor_id(); + // we profile via the NMI +#if 0 profile_tick(CPU_PROFILING, regs); +#endif + if (--per_cpu(prof_counter, cpu) <= 0) { /* * The multiplier may have changed since the last time we got @@ -1149,7 +1259,6 @@ inline void smp_local_timer_interrupt(st per_cpu(prof_old_multiplier, cpu) = per_cpu(prof_counter, cpu); } - #ifdef CONFIG_SMP update_process_times(user_mode_vm(regs)); #endif @@ -1167,6 +1276,8 @@ inline void smp_local_timer_interrupt(st */ } +#endif /* !HIGH_RES_TIMERS */ + /* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by @@ -1176,7 +1287,7 @@ inline void smp_local_timer_interrupt(st * interrupt as well. Thus we cannot inline the local irq ... ] */ -fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) +fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); @@ -1185,6 +1296,8 @@ fastcall void smp_apic_timer_interrupt(s */ per_cpu(irq_stat, cpu).apic_timer_irqs++; + trace_special(regs->eip, 0, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. Index: linux/arch/i386/kernel/apm.c =================================================================== --- linux.orig/arch/i386/kernel/apm.c +++ linux/arch/i386/kernel/apm.c @@ -552,9 +552,9 @@ static inline void apm_restore_cpus(cpum */ #define APM_DO_CLI \ if (apm_info.allow_ints) \ - local_irq_enable(); \ + raw_local_irq_enable(); \ else \ - local_irq_disable(); + raw_local_irq_disable(); #ifdef APM_ZERO_SEGS # define APM_DECL_SEGS \ @@ -604,12 +604,12 @@ static u8 apm_bios_call(u32 func, u32 eb save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -647,12 +647,12 @@ static u8 apm_bios_call_simple(u32 func, save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -1194,7 +1194,7 @@ static int suspend(int vetoable) } device_suspend(PMSG_SUSPEND); - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ @@ -1210,14 +1210,14 @@ static int suspend(int vetoable) */ spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); ignore_normal_resume = 1; restore_processor_state(); - local_irq_disable(); + raw_local_irq_disable(); write_seqlock(&xtime_lock); spin_lock(&i8253_lock); reinit_timer(); @@ -1232,7 +1232,7 @@ static int suspend(int vetoable) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(APM_NORMAL_RESUME, NULL); @@ -1251,22 +1251,22 @@ static void standby(void) { int err; - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ write_seqlock(&xtime_lock); /* If needed, notify drivers here */ get_time_diff(); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) apm_error("standby", err); - local_irq_disable(); + raw_local_irq_disable(); device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); } static apm_event_t get_event(void) Index: linux/arch/i386/kernel/cpu/mtrr/cyrix.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/cyrix.c +++ linux/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -17,7 +17,7 @@ cyrix_get_arr(unsigned int reg, unsigned arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ /* Save flags and disable interrupts */ - local_irq_save(flags); + raw_local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); + raw_local_irq_restore(flags); shift = ((unsigned char *) base)[1] & 0x0f; *base >>= PAGE_SHIFT; Index: linux/arch/i386/kernel/cpu/mtrr/generic.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/generic.c +++ linux/arch/i386/kernel/cpu/mtrr/generic.c @@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32 static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -296,14 +296,14 @@ static void generic_set_all(void) unsigned long mask, count; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); /* Actually set the state */ mask = set_mtrr_state(deftype_lo,deftype_hi); post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { @@ -331,7 +331,7 @@ static void generic_set_mtrr(unsigned in vr = &mtrr_state.var_ranges[reg]; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); if (size == 0) { @@ -350,7 +350,7 @@ static void generic_set_mtrr(unsigned in } post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) Index: linux/arch/i386/kernel/cpu/mtrr/main.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/main.c +++ linux/arch/i386/kernel/cpu/mtrr/main.c @@ -146,7 +146,7 @@ static void ipi_handler(void *info) struct set_mtrr_data *data = info; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); atomic_dec(&data->count); while(!atomic_read(&data->gate)) @@ -164,7 +164,7 @@ static void ipi_handler(void *info) cpu_relax(); atomic_dec(&data->count); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -225,7 +225,7 @@ static void set_mtrr(unsigned int reg, u if (smp_call_function(ipi_handler, &data, 1, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); - local_irq_save(flags); + raw_local_irq_save(flags); while(atomic_read(&data.count)) cpu_relax(); @@ -259,7 +259,7 @@ static void set_mtrr(unsigned int reg, u while(atomic_read(&data.count)) cpu_relax(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -687,11 +687,11 @@ void mtrr_ap_init(void) * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to * prevent mtrr entry changes */ - local_irq_save(flags); + raw_local_irq_save(flags); mtrr_if->set_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init mtrr_init_finialize(void) Index: linux/arch/i386/kernel/cpu/mtrr/state.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/state.c +++ linux/arch/i386/kernel/cpu/mtrr/state.c @@ -12,7 +12,7 @@ void set_mtrr_prepare_save(struct set_mt unsigned int cr0; /* Disable interrupts locally */ - local_irq_save(ctxt->flags); + raw_local_irq_save(ctxt->flags); if (use_intel() || is_cpu(CYRIX)) { @@ -73,6 +73,6 @@ void set_mtrr_done(struct set_mtrr_conte write_cr4(ctxt->cr4val); } /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); + raw_local_irq_restore(ctxt->flags); } Index: linux/arch/i386/kernel/entry.S =================================================================== --- linux.orig/arch/i386/kernel/entry.S +++ linux/arch/i386/kernel/entry.S @@ -76,10 +76,10 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +# define preempt_stop cli #else -#define preempt_stop -#define resume_kernel restore_nocheck +# define preempt_stop +# define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -160,14 +160,17 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) cli + cmpl $0, kernel_preemption + jz restore_nocheck cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck + cli call preempt_schedule_irq jmp need_resched #endif @@ -200,6 +203,11 @@ sysenter_past_esp: pushl %eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -213,6 +221,11 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -225,6 +238,11 @@ sysenter_past_esp: ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) # system call tracing in operation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -254,6 +272,17 @@ restore_all: cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS restore_nocheck: +#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE) + pushl %eax +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + call trace_irqs_on +#endif +#ifdef CONFIG_LATENCY_TRACE + call sys_ret +#endif + popl %eax +#endif +restore_nocheck_nmi: RESTORE_REGS addl $4, %esp 1: iret @@ -297,18 +326,19 @@ ldt_ss: # perform work that needs to be done immediately before resumption ALIGN work_pending: - testb $_TIF_NEED_RESCHED, %cl + testb $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %cl jz work_notifysig work_resched: - call schedule - cli # make sure we don't miss an interrupt + cli + call __schedule + # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testb $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %cl jnz work_resched work_notifysig: # deal with pending signals and @@ -348,6 +378,11 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + pushl %eax + call trace_irqs_on + popl %eax +#endif sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -409,9 +444,16 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel; +#else +# define TRACE_IRQS_OFF +#endif + ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -420,6 +462,7 @@ common_interrupt: ENTRY(name) \ pushl $nr-256; \ SAVE_ALL \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; @@ -549,7 +592,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_nmi nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) Index: linux/arch/i386/kernel/i386_ksyms.c =================================================================== --- linux.orig/arch/i386/kernel/i386_ksyms.c +++ linux/arch/i386/kernel/i386_ksyms.c @@ -6,10 +6,12 @@ /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); @@ -25,7 +27,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES) extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); Index: linux/arch/i386/kernel/i8259.c =================================================================== --- linux.orig/arch/i386/kernel/i8259.c +++ linux/arch/i386/kernel/i8259.c @@ -38,7 +38,7 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -369,7 +369,7 @@ static irqreturn_t math_error_irq(int cp * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL }; void __init init_ISA_irqs (void) { Index: linux/arch/i386/kernel/init_task.c =================================================================== --- linux.orig/arch/i386/kernel/init_task.c +++ linux/arch/i386/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/i386/kernel/io_apic.c =================================================================== --- linux.orig/arch/i386/kernel/io_apic.c +++ linux/arch/i386/kernel/io_apic.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,7 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * Is the SiS APIC rmw bug present ? @@ -55,11 +56,6 @@ static DEFINE_SPINLOCK(ioapic_lock); int sis_apic_bug = -1; /* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ @@ -128,19 +124,131 @@ static void __init replace_pin_at_irq(un } } +#ifdef CONFIG_X86_IOAPIC_FAST +# define IOAPIC_CACHE +#endif + + + +struct ioapic_data_struct { + struct sys_device dev; + int nr_registers; // # of IRQ routing registers + volatile unsigned int *base; + struct IO_APIC_route_entry *entry; +#ifdef IOAPIC_CACHE + unsigned int reg_set; + u32 cached_val[0]; +#endif +}; + +static struct ioapic_data_struct *ioapic_data[MAX_IO_APICS]; + + +static inline unsigned int __raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ +# ifdef IOAPIC_CACHE + ioapic->reg_set = reg; +# endif + ioapic->base[0] = reg; + return ioapic->base[4]; +} + + +# ifdef IOAPIC_CACHE +static void __init ioapic_cache_init(struct ioapic_data_struct *ioapic) +{ + int reg; + for (reg = 0; reg < (0x10 + 2 * ioapic->nr_registers); reg++) + ioapic->cached_val[reg] = __raw_io_apic_read(ioapic, reg); +} +# endif + + +static unsigned int raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ + unsigned int val = __raw_io_apic_read(ioapic, reg); + +# ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; +# endif + return val; +} + +static unsigned int io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ +# ifdef IOAPIC_CACHE + if (likely(!sis_apic_bug)) { + ioapic->reg_set = -1; + return ioapic->cached_val[reg]; + } +# endif + return raw_io_apic_read(ioapic, reg); +} + +static void io_apic_write(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val) +{ +# ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; + ioapic->reg_set = reg; +# endif + ioapic->base[0] = reg; + ioapic->base[4] = val; +} + + +/* + * Some systems need a POST flush or else level-triggered interrupts + * generate lots of spurious interrupts due to the POST-ed write not + * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC. + * + * It seems most systems need this - disable the optimization for now. + */ +#ifndef CONFIG_X86_IOAPIC_FAST +# define IOAPIC_POSTFLUSH +#endif + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index regiser + */ +static void io_apic_modify(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; + if (ioapic->reg_set != reg || sis_apic_bug) { + ioapic->reg_set = reg; +#else + if (unlikely(sis_apic_bug)) { +#endif + ioapic->base[0] = reg; + } + ioapic->base[4] = val; +#ifndef IOAPIC_POSTFLUSH + if (unlikely(sis_apic_bug)) +#endif + /* + * Force POST flush by reading: + */ + val = ioapic->base[4]; +} + static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int pin, reg; + unsigned int pin, val; + struct ioapic_data_struct *ioapic; for (;;) { pin = entry->pin; if (pin == -1) break; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); + ioapic = ioapic_data[entry->apic]; + val = io_apic_read(ioapic, 0x10 + pin*2); + val &= ~disable; + val |= enable; + io_apic_modify(ioapic, 0x10 + pin*2, val); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -148,29 +256,17 @@ static void __modify_IO_APIC_irq (unsign } /* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) +static inline void __mask_IO_APIC_irq (unsigned int irq) { __modify_IO_APIC_irq(irq, 0x00010000, 0); } /* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) +static inline void __unmask_IO_APIC_irq (unsigned int irq) { __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -189,15 +285,15 @@ static void unmask_IO_APIC_irq (unsigned spin_unlock_irqrestore(&ioapic_lock, flags); } -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +static void clear_IO_APIC_pin(struct ioapic_data_struct *ioapic, unsigned int pin) { struct IO_APIC_route_entry entry; unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ spin_lock_irqsave(&ioapic_lock, flags); - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int*)&entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * pin); + *(((int*)&entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); if (entry.delivery_mode == dest_SMI) return; @@ -208,8 +304,8 @@ static void clear_IO_APIC_pin(unsigned i memset(&entry, 0, sizeof(entry)); entry.mask = 1; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); + io_apic_write(ioapic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(ioapic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -217,9 +313,14 @@ static void clear_IO_APIC (void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); + for (apic = 0; apic < nr_ioapics; apic++) { + struct ioapic_data_struct *ioapic = ioapic_data[apic]; +#ifdef IOAPIC_CACHE + ioapic->reg_set = -1; +#endif + for (pin = 0; pin < ioapic->nr_registers; pin++) + clear_IO_APIC_pin(ioapic, pin); + } } static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) @@ -237,7 +338,7 @@ static void set_ioapic_affinity_irq(unsi pin = entry->pin; if (pin == -1) break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); + io_apic_write(ioapic_data[entry->apic], 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -828,7 +929,7 @@ void __init setup_ioapic_dest(void) return; for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + for (pin = 0; pin < ioapic_data[ioapic]->nr_registers; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; @@ -1071,7 +1172,7 @@ static int pin_2_irq(int idx, int apic, */ i = irq = 0; while (i < apic) - irq += nr_ioapic_registers[i++]; + irq += ioapic_data[i++]->nr_registers; irq += pin; /* @@ -1114,7 +1215,7 @@ static inline int IO_APIC_irq_trigger(in int apic, idx, pin; for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (pin = 0; pin < ioapic_data[apic]->nr_registers; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) return irq_trigger(idx); @@ -1186,11 +1287,13 @@ static void __init setup_IO_APIC_irqs(vo struct IO_APIC_route_entry entry; int apic, pin, idx, irq, first_notcon = 1, vector; unsigned long flags; + struct ioapic_data_struct *ioapic; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + ioapic = ioapic_data[apic]; + for (pin = 0; pin < ioapic->nr_registers; pin++) { /* * add it to the IO-APIC irq-routing table: @@ -1247,8 +1350,8 @@ static void __init setup_IO_APIC_irqs(vo disable_8259A_irq(irq); } spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1294,8 +1397,8 @@ static void __init setup_ExtINT_IRQ0_pin * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); enable_8259A_irq(0); @@ -1305,7 +1408,7 @@ static inline void UNEXPECTED_IO_APIC(vo { } -void __init print_IO_APIC(void) +void /*__init*/ print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1313,6 +1416,7 @@ void __init print_IO_APIC(void) union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; + struct ioapic_data_struct *ioapic; if (apic_verbosity == APIC_QUIET) return; @@ -1320,7 +1424,7 @@ void __init print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mpc_apicid, ioapic_data[i]->nr_registers); /* * We are a bit conservative about what we expect. We have to @@ -1329,14 +1433,14 @@ void __init print_IO_APIC(void) printk(KERN_INFO "testing the IO APIC.......................\n"); for (apic = 0; apic < nr_ioapics; apic++) { - + ioapic = ioapic_data[apic]; spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); + reg_00.raw = io_apic_read(ioapic, 0); + reg_01.raw = io_apic_read(ioapic, 1); if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); + reg_02.raw = io_apic_read(ioapic, 2); if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + reg_03.raw = io_apic_read(ioapic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); @@ -1407,8 +1511,8 @@ void __init print_IO_APIC(void) struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + *(((int *)&entry)+0) = raw_io_apic_read(ioapic, 0x10+i*2); + *(((int *)&entry)+1) = raw_io_apic_read(ioapic, 0x11+i*2); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", @@ -1454,7 +1558,7 @@ void __init print_IO_APIC(void) return; } -#if 0 +#if 1 static void print_APIC_bitfield (int base) { @@ -1601,9 +1705,7 @@ void /*__init*/ print_PIC(void) static void __init enable_IO_APIC(void) { - union IO_APIC_reg_01 reg_01; int i; - unsigned long flags; for (i = 0; i < PIN_MAP_SIZE; i++) { irq_2_pin[i].pin = -1; @@ -1614,16 +1716,6 @@ static void __init enable_IO_APIC(void) pirq_entries[i] = -1; /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (i = 0; i < nr_ioapics; i++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(i, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.bits.entries+1; - } - - /* * Do not trust the IO-APIC being empty at bootup */ clear_IO_APIC(); @@ -1666,8 +1758,7 @@ void disable_IO_APIC(void) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } disconnect_bsp_APIC(pin != -1); @@ -1689,6 +1780,7 @@ static void __init setup_ioapic_ids_from int i; unsigned char old_id; unsigned long flags; + struct ioapic_data_struct *ioapic; /* * Don't check I/O APIC IDs for xAPIC systems. They have @@ -1706,10 +1798,10 @@ static void __init setup_ioapic_ids_from * Set the IOAPIC ID to the value stored in the MPC table. */ for (apic = 0; apic < nr_ioapics; apic++) { - + ioapic = ioapic_data[apic]; /* Read the register 0 value */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(ioapic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic].mpc_apicid; @@ -1770,14 +1862,14 @@ static void __init setup_ioapic_ids_from reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); + io_apic_write(ioapic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(ioapic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) printk("could not set ID!\n"); @@ -1801,7 +1893,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1865,9 +1957,11 @@ static unsigned int startup_edge_ioapic_ static void ack_edge_ioapic_irq(unsigned int irq) { move_irq(irq); +#if 0 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); +#endif ack_APIC_irq(); } @@ -1892,6 +1986,30 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +#ifdef CONFIG_PREEMPT_HARDIRQS + +/* + * in the PREEMPT_HARDIRQS case we dont want to keep the local + * APIC unacked, because the prevents further interrupts from + * being handled - and with IRQ threads being delayed arbitrarily, + * that's unacceptable. So we first mask the IRQ, then ack it. + * The hardirq thread will then unmask it. + */ +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +#else + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + +#endif + static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1926,8 +2044,10 @@ static void end_level_ioapic_irq (unsign if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + /* mask = 1, trigger = 0 */ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + /* mask = 0, trigger = 1 */ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); spin_unlock(&ioapic_lock); } } @@ -1954,6 +2074,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -1993,25 +2120,25 @@ static void set_ioapic_affinity_vector ( * races. */ static struct hw_interrupt_type ioapic_edge_type = { - .typename = "IO-APIC-edge", + .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, .enable = enable_edge_ioapic, .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity, }; static struct hw_interrupt_type ioapic_level_type = { - .typename = "IO-APIC-level", + .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, .enable = enable_level_ioapic, .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity, }; static inline void init_IO_APIC_traps(void) @@ -2075,13 +2202,13 @@ static void ack_lapic_irq (unsigned int static void end_lapic_irq (unsigned int i) { /* nothing */ } static struct hw_interrupt_type lapic_irq_type = { - .typename = "local-APIC-edge", - .startup = NULL, /* startup_irq() not used for IRQ0 */ - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ - .enable = enable_lapic_irq, - .disable = disable_lapic_irq, - .ack = ack_lapic_irq, - .end = end_lapic_irq + .typename = "local-APIC-edge", + .startup = NULL, /* startup_irq() not used for IRQ0 */ + .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ + .enable = enable_lapic_irq, + .disable = disable_lapic_irq, + .ack = ack_lapic_irq, + .end = end_lapic_irq }; static void setup_nmi (void) @@ -2109,22 +2236,23 @@ static void setup_nmi (void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static void __init unlock_ExtINT_logic(void) { int pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; unsigned long flags; + struct ioapic_data_struct *ioapic0 = ioapic_data[0]; pin = find_isa_irq_pin(8, mp_INT); if (pin == -1) return; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + *(((int *)&entry0) + 1) = io_apic_read(ioapic0, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(ioapic0, 0x10 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(ioapic0, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2137,8 +2265,8 @@ static inline void unlock_ExtINT_logic(v entry1.vector = 0; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); @@ -2156,11 +2284,11 @@ static inline void unlock_ExtINT_logic(v CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(ioapic0, pin); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2170,10 +2298,11 @@ static inline void unlock_ExtINT_logic(v * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. */ -static inline void check_timer(void) +static void __init check_timer(void) { int pin1, pin2; int vector; + struct ioapic_data_struct *ioapic0 = ioapic_data[0]; /* * get/set the timer IRQ vector: @@ -2191,7 +2320,10 @@ static inline void check_timer(void) */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; +#ifdef CONFIG_PREEMPT_RT + if (nmi_watchdog) +#endif + timer_ack = 1; enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); @@ -2212,7 +2344,7 @@ static inline void check_timer(void) } return; } - clear_IO_APIC_pin(0, pin1); + clear_IO_APIC_pin(ioapic0, pin1); printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); } @@ -2237,7 +2369,7 @@ static inline void check_timer(void) /* * Cleanup, just in case ... */ - clear_IO_APIC_pin(0, pin2); + clear_IO_APIC_pin(ioapic0, pin2); } printk(" failed.\n"); @@ -2278,6 +2410,46 @@ static inline void check_timer(void) "report. Then try booting with the 'noapic' option"); } +void __init setup_IO_APIC_early(int _ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int size, nr_ioapic_registers; + volatile int *ioapic; + if (ioapic_data[_ioapic]) { + printk("been in %s before !!!!!\n", __FUNCTION__); + return; + } + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + _ioapic, mp_ioapics[_ioapic].mpc_apicaddr); + printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(FIX_IO_APIC_BASE_0 + _ioapic), mp_ioapics[_ioapic].mpc_apicaddr); + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + ioapic = IO_APIC_BASE(_ioapic); + spin_lock_irqsave(&ioapic_lock, flags); + ioapic[0] = 1; + reg_01.raw = ioapic[4]; + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers = reg_01.bits.entries+1; + + /* + * Initialsize ioapic_data struct: + */ + size = sizeof(struct ioapic_data_struct); +#ifdef IOAPIC_CACHE + size += 0x10 * sizeof(u32) + nr_ioapic_registers * sizeof(struct IO_APIC_route_entry); +#endif + ioapic_data[_ioapic] = alloc_bootmem(size); + memset(ioapic_data[_ioapic], 0, size); + ioapic_data[_ioapic]->nr_registers = nr_ioapic_registers; + ioapic_data[_ioapic]->base = ioapic; +#ifdef IOAPIC_CACHE + ioapic_cache_init(ioapic_data[_ioapic]); +#endif +} + /* * * IRQ's that are handled by the PIC in the MPS IOAPIC case. @@ -2325,25 +2497,22 @@ static int __init io_apic_bug_finalize(v late_initcall(io_apic_bug_finalize); -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct ioapic_data_struct *data; unsigned long flags; int i; + struct ioapic_data_struct *ioapic; - data = container_of(dev, struct sysfs_ioapic_data, dev); + data = container_of(dev, struct ioapic_data_struct, dev); entry = data->entry; + + ioapic = ioapic_data[dev->id]; spin_lock_irqsave(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); + for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) { + *(((int *)entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * i); + *(((int *)entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * i); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2353,23 +2522,25 @@ static int ioapic_suspend(struct sys_dev static int ioapic_resume(struct sys_device *dev) { struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct ioapic_data_struct *data; unsigned long flags; union IO_APIC_reg_00 reg_00; int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); + struct ioapic_data_struct *ioapic; + + data = container_of(dev, struct ioapic_data_struct, dev); entry = data->entry; + ioapic = ioapic_data[dev->id]; spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); + reg_00.raw = io_apic_read(ioapic, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; - io_apic_write(dev->id, 0, reg_00.raw); + io_apic_write(ioapic, 0, reg_00.raw); } - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); + for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) { + io_apic_write(ioapic, 0x11+2*i, *(((int *)entry)+1)); + io_apic_write(ioapic, 0x10+2*i, *(((int *)entry)+0)); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2392,21 +2563,20 @@ static int __init ioapic_init_sysfs(void return error; for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { + size = ioapic_data[i]->nr_registers * sizeof(struct IO_APIC_route_entry); + ioapic_data[i]->entry = kmalloc(size, GFP_KERNEL); + if (!ioapic_data[i]->entry) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); - dev = &mp_ioapic_data[i]->dev; + memset(ioapic_data[i]->entry, 0, size); + dev = &ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; error = sysdev_register(dev); if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; + kfree(ioapic_data[i]->entry); + ioapic_data[i]->entry = NULL; printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } @@ -2423,13 +2593,14 @@ device_initcall(ioapic_init_sysfs); #ifdef CONFIG_ACPI_BOOT -int __init io_apic_get_unique_id (int ioapic, int apic_id) +int __init io_apic_get_unique_id (int apic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; physid_mask_t tmp; unsigned long flags; int i = 0; + struct ioapic_data_struct *ioapic = ioapic_data[apic]; /* * The P4 platform supports up to 256 APIC IDs on two separate APIC @@ -2449,7 +2620,7 @@ int __init io_apic_get_unique_id (int io if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); + "%d\n", apic, apic_id, reg_00.bits.ID); apic_id = reg_00.bits.ID; } @@ -2468,7 +2639,7 @@ int __init io_apic_get_unique_id (int io panic("Max apic_id exceeded!\n"); printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); + "trying %d\n", apic, apic_id, i); apic_id = i; } @@ -2486,50 +2657,50 @@ int __init io_apic_get_unique_id (int io /* Sanity check */ if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + panic("IOAPIC[%d]: Unable change apic_id!\n", apic); } apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + "IOAPIC[%d]: Assigned apic_id %d\n", apic, apic_id); return apic_id; } -int __init io_apic_get_version (int ioapic) +int __init io_apic_get_version (int apic) { union IO_APIC_reg_01 reg_01; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); + reg_01.raw = io_apic_read(ioapic_data[apic], 1); spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } -int __init io_apic_get_redir_entries (int ioapic) +int __init io_apic_get_redir_entries (int apic) { union IO_APIC_reg_01 reg_01; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); + reg_01.raw = io_apic_read(ioapic_data[apic], 1); spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing (int apic, int pin, int irq, int edge_level, int active_high_low) { struct IO_APIC_route_entry entry; unsigned long flags; - + struct ioapic_data_struct *ioapic = ioapic_data[apic]; if (!IO_APIC_IRQ(irq)) { printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); + apic); return -EINVAL; } @@ -2552,18 +2723,18 @@ int io_apic_set_pci_routing (int ioapic, * IRQs < 16 are already in the irq_2_pin[] map */ if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + add_pin_to_irq(irq, apic, pin); entry.vector = assign_irq_vector(irq); apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", apic, + mp_ioapics[apic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); - if (!ioapic && (irq < 16)) + if (!apic && (irq < 16)) disable_8259A_irq(irq); spin_lock_irqsave(&ioapic_lock, flags); Index: linux/arch/i386/kernel/irq.c =================================================================== --- linux.orig/arch/i386/kernel/irq.c +++ linux/arch/i386/kernel/irq.c @@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { /* high bits used in ret_from_ code */ int irq = regs->orig_eax & 0xff; @@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r union irq_ctx *curctx, *irqctx; u32 *isp; #endif - irq_enter(); +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } @@ -173,7 +177,7 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); if (local_softirq_pending()) { curctx = current_thread_info(); @@ -194,7 +198,7 @@ asmlinkage void do_softirq(void) ); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); @@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + irq_desc_t *desc = irq_desc + i; + + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ",i); @@ -235,15 +241,28 @@ int show_interrupts(struct seq_file *p, for_each_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %-14s", desc->handler->typename); +#define F(x,c) ((desc->status & x) ? c : '.') + seq_printf(p, " [%c%c%c%c%c%c%c%c%c%c/", + F(IRQ_INPROGRESS, 'I'), + F(IRQ_DISABLED, 'D'), + F(IRQ_PENDING, 'P'), + F(IRQ_REPLAY, 'R'), + F(IRQ_AUTODETECT, 'A'), + F(IRQ_WAITING, 'W'), + F(IRQ_LEVEL, 'L'), + F(IRQ_MASKED, 'M'), + F(IRQ_PER_CPU, 'C'), + F(IRQ_NODELAY, 'N')); +#undef F + seq_printf(p, "%3d]", desc->irqs_unhandled); seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_cpu(j) @@ -298,9 +317,9 @@ void fixup_irqs(cpumask_t map) barrier(); #else /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); #endif } #endif Index: linux/arch/i386/kernel/mcount-wrapper.S =================================================================== --- /dev/null +++ linux/arch/i386/kernel/mcount-wrapper.S @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + Index: linux/arch/i386/kernel/microcode.c =================================================================== --- linux.orig/arch/i386/kernel/microcode.c +++ linux/arch/i386/kernel/microcode.c @@ -109,7 +109,7 @@ MODULE_LICENSE("GPL"); #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); Index: linux/arch/i386/kernel/mpparse.c =================================================================== --- linux.orig/arch/i386/kernel/mpparse.c +++ linux/arch/i386/kernel/mpparse.c @@ -261,6 +261,7 @@ static void __init MP_ioapic_info (struc return; } mp_ioapics[nr_ioapics] = *m; + setup_IO_APIC_early(nr_ioapics); nr_ioapics++; } @@ -911,7 +912,7 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; mp_ioapics[idx].mpc_apicaddr = address; - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + setup_IO_APIC_early(idx); if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); else Index: linux/arch/i386/kernel/nmi.c =================================================================== --- linux.orig/arch/i386/kernel/nmi.c +++ linux/arch/i386/kernel/nmi.c @@ -35,7 +35,7 @@ unsigned int nmi_watchdog = NMI_NONE; extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); @@ -113,8 +113,8 @@ static int __init check_nmi_watchdog(voi for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + raw_local_irq_enable(); + mdelay((100*1000)/nmi_hz); // wait 100 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP @@ -135,7 +135,7 @@ static int __init check_nmi_watchdog(voi /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; + nmi_hz = 10000; return 0; } @@ -478,13 +478,39 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + +void notrace nmi_watchdog_tick (struct pt_regs * regs) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -492,7 +518,19 @@ void nmi_watchdog_tick (struct pt_regs * */ int sum, cpu = smp_processor_id(); - sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + /* + * Both count the APIC timer irqs and IRQ#0 irqs: + */ + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_this_cpu.irqs[0]; + + profile_tick(CPU_PROFILING, regs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk("NMI show regs on CPU#%d:\n", cpu); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (last_irq_sums[cpu] == sum) { /* @@ -500,8 +538,24 @@ void nmi_watchdog_tick (struct pt_regs * * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + bust_spinlocks(1); + spin_lock(&nmi_print_lock); + printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) + if (i != cpu) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; Index: linux/arch/i386/kernel/process.c =================================================================== --- linux.orig/arch/i386/kernel/process.c +++ linux/arch/i386/kernel/process.c @@ -102,12 +102,13 @@ EXPORT_SYMBOL(enable_hlt); void default_idle(void) { if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } else { + raw_local_irq_enable(); cpu_relax(); } } @@ -124,7 +125,7 @@ static void poll_idle (void) { int oldval; - local_irq_enable(); + raw_local_irq_enable(); /* * Deal with another CPU just having chosen a thread to @@ -139,7 +140,7 @@ static void poll_idle (void) "testl %0, %1;" "rep; nop;" "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + : : "i"(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), "m" (current_thread_info()->flags)); clear_thread_flag(TIF_POLLING_NRFLAG); } else { @@ -162,7 +163,7 @@ static inline void play_dead(void) /* * With physical CPU hotplug, we should halt the cpu */ - local_irq_disable(); + raw_local_irq_disable(); while (1) __asm__ __volatile__("hlt":::"memory"); } @@ -185,7 +186,9 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -201,9 +204,13 @@ void cpu_idle(void) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - schedule(); + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); } } @@ -244,16 +251,16 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { set_thread_flag(TIF_POLLING_NRFLAG); do { __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); - } while (!need_resched()); + } while (!need_resched() && !need_resched_delayed()); clear_thread_flag(TIF_POLLING_NRFLAG); } } @@ -384,11 +391,16 @@ void exit_thread(void) /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + int cpu; + struct tss_struct *tss; + void *io_bitmap_ptr = t->io_bitmap_ptr; - kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + mb(); + kfree(io_bitmap_ptr); + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); /* * Careful, clear this in the TSS too: */ Index: linux/arch/i386/kernel/reboot.c =================================================================== --- linux.orig/arch/i386/kernel/reboot.c +++ linux/arch/i386/kernel/reboot.c @@ -194,7 +194,7 @@ void machine_real_restart(unsigned char { unsigned long flags; - local_irq_disable(); + raw_local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST routine will recognize as telling it to do a proper reboot. (Well Index: linux/arch/i386/kernel/semaphore.c =================================================================== --- linux.orig/arch/i386/kernel/semaphore.c +++ linux/arch/i386/kernel/semaphore.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /* @@ -49,12 +50,12 @@ * we cannot lose wakeup events. */ -static fastcall void __attribute_used__ __up(struct semaphore *sem) +static fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } -static fastcall void __attribute_used__ __sched __down(struct semaphore * sem) +static fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -91,7 +92,7 @@ static fastcall void __attribute_used__ tsk->state = TASK_RUNNING; } -static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem) +static fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -154,7 +155,7 @@ static fastcall int __attribute_used__ _ * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem) +static fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -190,15 +191,15 @@ static fastcall int __attribute_used__ _ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" +".globl __compat_down_failed\n" +"__compat_down_failed:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down\n\t" + "call __compat_down\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -211,15 +212,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" +".globl __compat_down_failed_interruptible\n" +"__compat_down_failed_interruptible:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_interruptible\n\t" + "call __compat_down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -232,15 +233,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" +".globl __compat_down_failed_trylock\n" +"__compat_down_failed_trylock:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_trylock\n\t" + "call __compat_down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -253,45 +254,20 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" +".globl __compat_up_wakeup\n" +"__compat_up_wakeup:\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" - "call __up\n\t" + "call __compat_up\n\t" "popl %ecx\n\t" "popl %edx\n\t" "ret" ); -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); +int fastcall compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + +EXPORT_SYMBOL(compat_sem_is_locked); -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif Index: linux/arch/i386/kernel/signal.c =================================================================== --- linux.orig/arch/i386/kernel/signal.c +++ linux/arch/i386/kernel/signal.c @@ -599,6 +599,13 @@ int fastcall do_signal(struct pt_regs *r int signr; struct k_sigaction ka; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux/arch/i386/kernel/smp.c =================================================================== --- linux.orig/arch/i386/kernel/smp.c +++ linux/arch/i386/kernel/smp.c @@ -164,7 +164,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu unsigned long cfg; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. @@ -187,7 +187,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu */ apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -201,7 +201,7 @@ void send_IPI_mask_sequence(cpumask_t ma * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + raw_local_irq_save(flags); for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { @@ -228,7 +228,7 @@ void send_IPI_mask_sequence(cpumask_t ma apic_write_around(APIC_ICR, cfg); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } #include /* must come after the send_IPI functions above for inlining */ @@ -246,7 +246,7 @@ void send_IPI_mask_sequence(cpumask_t ma static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL 0xffffffff /* @@ -391,7 +391,7 @@ static void flush_tlb_others(cpumask_t c while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; @@ -482,10 +482,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -539,7 +549,7 @@ int smp_call_function (void (*func) (voi } /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -573,7 +583,7 @@ static void stop_this_cpu (void * dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) __asm__("hlt"); @@ -588,19 +598,20 @@ void smp_send_stop(void) { smp_call_function(stop_this_cpu, NULL, 1, 0); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) Index: linux/arch/i386/kernel/smpboot.c =================================================================== --- linux.orig/arch/i386/kernel/smpboot.c +++ linux/arch/i386/kernel/smpboot.c @@ -57,6 +57,7 @@ #include #include #include +#include /* Set if we find a B stepping CPU */ static int __devinitdata smp_b_stepping; @@ -277,6 +278,9 @@ static void __init synchronize_tsc_bp (v wmb(); atomic_inc(&tsc_count_stop); } +#ifdef CONFIG_HIGH_RES_TIMERS + CLEAR_REF_TSC; +#endif sum = 0; for (i = 0; i < NR_CPUS; i++) { @@ -516,7 +520,7 @@ static void __devinit start_secondary(vo per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); + raw_local_irq_enable(); wmb(); cpu_idle(); @@ -1303,9 +1307,9 @@ int __cpu_disable(void) /* We enable the timer again on the exit path of the death loop */ disable_APIC_timer(); /* Allow any queued timer interrupts to get serviced */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); remove_siblinginfo(cpu); @@ -1350,11 +1354,11 @@ int __devinit __cpu_up(unsigned int cpu) /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); return -EIO; } - local_irq_enable(); + raw_local_irq_enable(); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); Index: linux/arch/i386/kernel/time.c =================================================================== --- linux.orig/arch/i386/kernel/time.c +++ linux/arch/i386/kernel/time.c @@ -29,7 +29,10 @@ * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to * serialize accesses to xtime/lost_ticks). */ - +/* 2002-8-13 George Anzinger Modified for High res timers: + * Copyright (C) 2002 MontaVista Software +*/ +#define _INCLUDED_FROM_TIME_C #include #include #include @@ -65,6 +68,8 @@ #include #include +#include +#include #include "io_ports.h" @@ -78,17 +83,20 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); +/* Using APIC to generate smp_local_timer_interrupt? */ +int using_apic_timer = 0; + unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); #include -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); struct timer_opts *cur_timer __read_mostly = &timer_none; @@ -126,46 +134,58 @@ EXPORT_SYMBOL(rtc_cmos_write); * This version of gettimeofday has microsecond resolution * and better than microsecond precision on fast x86 machines with TSC. */ + +/* + * High res timers changes: First we want to use full nsec for all + * the math to avoid the double round off (on the offset and xtime). + * Second, we want to allow a boot with HRT turned off at boot time. + * This will cause hrtimer_use to be false, and we then fall back to + * the old code. We also shorten the xtime lock region and eliminate + * the lost tick code as this kernel will never have lost ticks under + * the lock (i.e. wall_jiffies will never differ from jiffies except + * when the write xtime lock is held). + */ void do_gettimeofday(struct timeval *tv) { unsigned long seq; - unsigned long usec, sec; + unsigned long sec, nsec, clk_nsec; unsigned long max_ntp_tick; do { - unsigned long lost; - seq = read_seqbegin(&xtime_lock); - - usec = cur_timer->get_offset(); - lost = jiffies - wall_jiffies; - - /* - * If time_adjust is negative then NTP is slowing the clock - * so make sure not to go into next possible interval. - * Better to lose some accuracy than have time go backwards.. - */ - if (unlikely(time_adjust < 0)) { - max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; - usec = min(usec, max_ntp_tick); - - if (lost) - usec += lost * max_ntp_tick; - } - else if (unlikely(lost)) - usec += lost * (USEC_PER_SEC / HZ); +#ifdef CONFIG_HIGH_RES_TIMERS + nsec = arch_cycle_to_nsec(get_arch_cycles(wall_jiffies)); +#else + nsec = cur_timer->get_offset() * NSEC_PER_USEC; +#endif sec = xtime.tv_sec; - usec += (xtime.tv_nsec / 1000); + clk_nsec = xtime.tv_nsec; } while (read_seqretry(&xtime_lock, seq)); - while (usec >= 1000000) { - usec -= 1000000; + /* + * If time_adjust is negative then NTP is slowing the clock + * so make sure not to go into next possible interval. + * Better to lose some accuracy than have time go backwards.. + + * Note, in this kernel wall_jiffies and jiffies will always + * be the same, at least under the lock. + */ + if (unlikely(time_adjust < 0)) { + max_ntp_tick = tick_nsec - (tickadj * NSEC_PER_USEC); + if (max_ntp_tick > nsec) + nsec = max_ntp_tick - nsec; + } + + nsec += clk_nsec; + + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; sec++; } tv->tv_sec = sec; - tv->tv_usec = usec; + tv->tv_usec = nsec / NSEC_PER_USEC; } EXPORT_SYMBOL(do_gettimeofday); @@ -236,7 +256,7 @@ unsigned long long monotonic_clock(void) EXPORT_SYMBOL(monotonic_clock); #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -252,41 +272,26 @@ EXPORT_SYMBOL(profile_pc); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static inline void do_timer_interrupt(struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { + unsigned long flags; /* * Subtle, when I/O APICs are used we have to ack timer IRQ * manually to reset the IRR bit for do_slow_gettimeoffset(). * This will also deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + spin_lock_irqsave(&i8259A_lock, flags); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore(&i8259A_lock, flags); } #endif do_timer_interrupt_hook(regs); - - - if (MCA_bus) { - /* The PS/2 uses level-triggered interrupts. You can't - turn them off, nor would you want to (any attempt to - enable edge-triggered interrupts usually gets intercepted by a - special hardware circuit). Hence we have to acknowledge - the timer interrupt. Through some incredibly stupid - design idea, the reset for IRQ 0 is done by setting the - high bit of the PPI port B (0x61). Note that some PS/2s, - notably the 55SX, work fine if this is removed. */ - - irq = inb_p( 0x61 ); /* read the current state */ - outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ - } } /* @@ -307,12 +312,109 @@ irqreturn_t timer_interrupt(int irq, voi cur_timer->mark_offset(); - do_timer_interrupt(irq, NULL, regs); + do_timer_interrupt(regs); +#ifdef CONFIG_MCA + /* + * This code moved here from do_timer_interrupt() as part of the + * high-res timers change because it should be done every interrupt + * but do_timer_interrupt() wants to return early if it is not a + * "1/HZ" tick interrupt. For non-high-res systems the code is in + * exactly the same location (i.e. it is moved from the tail of the + * above called function to the next thing after the function). + */ + if( MCA_bus ) { + int irq; + /* The PS/2 uses level-triggered interrupts. You can't + turn them off, nor would you want to (any attempt to + enable edge-triggered interrupts usually gets intercepted by a + special hardware circuit). Hence we have to acknowledge + the timer interrupt. Through some incredibly stupid + design idea, the reset for IRQ 0 is done by setting the + high bit of the PPI port B (0x61). Note that some PS/2s, + notably the 55SX, work fine if this is removed. */ + + irq = inb_p( 0x61 ); /* read the current state */ + outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ + } +#endif write_sequnlock(&xtime_lock); +#if defined(CONFIG_SMP) && defined(CONFIG_HIGH_RES_TIMERS) + send_IPI_allbutself(LOCAL_TIMER_IPI_VECTOR); + // we profile via the NMI +#if 0 + profile_tick(CPU_PROFILING, regs); +#endif + per_cpu(irq_stat, smp_processor_id()).apic_timer_irqs++; + update_process_times(user_mode(regs)); +#endif return IRQ_HANDLED; } +#ifdef CONFIG_HIGH_RES_TIMERS +/* + * We always continue to provide interrupts even if they are not + * serviced. To do this, we leave the chip in periodic mode programmed + * to interrupt every jiffie. This is done by, for short intervals, + * programming a short time, waiting till it is loaded and then + * programming the 1/HZ. The chip will not load the 1/HZ count till the + * short count expires. If the last interrupt was programmed to be + * short, we need to program another short to cover the remaining part + * of the jiffie and can then just leave the chip alone. Note that it + * is also a low overhead way of doing things as we do not have to mess + * with the chip MOST of the time. + */ + +/* Called with xtime lock held */ +void schedule_jiffy_int(void) +{ + unsigned long cycles; + BUG_ON(!highres_active || using_apic_timer); + + if (highres_pending) + printk("Highres pending\n"); + cycles = get_arch_cycles(jiffies); + reload_timer_chip(arch_cycles_per_jiffy - cycles); +} + +/* + * We get the distance to the next event in arch_cycles + * Called with xtime lock held + */ +int _schedule_next_int(long arch_cycle_in) +{ + long arch_cycle_offset; + + BUG_ON(!highres_active); + + /* + * If time is already passed, just return saying so. + */ + if (arch_cycle_in <= 0) + return -ETIME; + + highres_pending = 1; + arch_cycle_offset = get_arch_cycles(jiffies); + + if (!using_apic_timer) { + /* Check, if we have to schedule a jiffy first */ + arch_cycle_offset = arch_cycles_per_jiffy - arch_cycle_offset; + if (arch_cycle_in > arch_cycle_offset) + arch_cycle_in = arch_cycle_offset; + } + + reload_timer_chip(arch_cycle_in); + return 0; +} + +#ifdef CONFIG_APM +void restart_timer(void) +{ + start_PIT(); +} +#endif /* CONFIG_APM */ +#endif /* CONFIG_HIGH_RES_TIMERS */ + /* not static: needed by APM */ unsigned long get_cmos_time(void) { @@ -333,14 +435,20 @@ EXPORT_SYMBOL(get_cmos_time); static void sync_cmos_clock(unsigned long dummy); -static struct timer_list sync_cmos_timer = - TIMER_INITIALIZER(sync_cmos_clock, 0, 0); +static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); static void sync_cmos_clock(unsigned long dummy) { struct timeval now, next; int fail = 1; + /* + * This is dumb for two reasons. + * 1.) it is based on wall time which has not yet been updated. + * 2.) it is checked each tick for something that happens each + * 10 min. Why not use a timer for it? Much lower overhead, + * in fact, zero if STA_UNSYNC is set. + */ /* * If we have an externally synchronized Linux clock, then update * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be @@ -412,6 +520,7 @@ static int timer_resume(struct sys_devic write_sequnlock_irqrestore(&xtime_lock, flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } Index: linux/arch/i386/kernel/time_hpet.c =================================================================== --- linux.orig/arch/i386/kernel/time_hpet.c +++ linux/arch/i386/kernel/time_hpet.c @@ -302,11 +302,11 @@ int hpet_rtc_timer_init(void) else hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - local_irq_save(flags); + raw_local_irq_save(flags); cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); - local_irq_restore(flags); + raw_local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; Index: linux/arch/i386/kernel/timers/Makefile =================================================================== --- linux.orig/arch/i386/kernel/timers/Makefile +++ linux/arch/i386/kernel/timers/Makefile @@ -7,3 +7,6 @@ obj-y := timer.o timer_none.o timer_tsc. obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o obj-$(CONFIG_HPET_TIMER) += timer_hpet.o obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o +obj-$(CONFIG_HIGH_RES_TIMER_ACPI_PM) += hrtimer_pm.o +obj-$(CONFIG_HIGH_RES_TIMER_ACPI_PM) += high-res-tbxfroot.o +obj-$(CONFIG_HIGH_RES_TIMER_TSC) += hrtimer_tsc.o \ No newline at end of file Index: linux/arch/i386/kernel/timers/common.c =================================================================== --- linux.orig/arch/i386/kernel/timers/common.c +++ linux/arch/i386/kernel/timers/common.c @@ -23,7 +23,7 @@ * device. */ -#define CALIBRATE_TIME (5 * 1000020/HZ) +__initdata unsigned long tsc_cycles_per_50_ms; unsigned long calibrate_tsc(void) { @@ -58,6 +58,12 @@ unsigned long calibrate_tsc(void) if (endlow <= CALIBRATE_TIME) goto bad_ctc; + /* + * endlow at this point is 50 ms of arch clocks + * Set up the value for other who want high res. + */ + tsc_cycles_per_50_ms = endlow; + __asm__("divl %2" :"=a" (endlow), "=d" (endhigh) :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); @@ -71,6 +77,7 @@ unsigned long calibrate_tsc(void) * 32 bits.. */ bad_ctc: + printk("******************** TSC calibrate failed!\n"); return 0; } Index: linux/arch/i386/kernel/timers/high-res-tbxfroot.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/timers/high-res-tbxfroot.c @@ -0,0 +1,273 @@ +/****************************************************************************** + * + * Module Name: tbxfroot - Find the root ACPI table (RSDT) + * $Revision: 1.4 $ + * + *****************************************************************************/ + +/* + * Copyright (C) 2000, 2001 R. Byron Moore + + * This code purloined and modified by George Anzinger + * Copyright (C) 2002 by MontaVista Software. + * It is part of the high-res-timers ACPI option and its sole purpose is + * to find the darn timer. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This is most annoying! We want to find the address of the pm timer in the + * ACPI hardware package. We know there is one if ACPI is available at all + * as it is part of the basic ACPI hardware set. + * However, the powers that be have conspired to make it a real + * pain to find the address. We have written a minimal search routine + * that we use only once on boot up. We try to cover all the bases including + * checksum, and version. We will try to get some constants and structures + * from the ACPI code in an attempt to follow it, but darn, what a mess. + * + * First problem, the include files are in the driver package.... + * and what a mess they are. We pick up the kernel string and types first. + + * But then there is the COMPILER_DEPENDENT_UINT64 ... + */ +//#define ACPI_MACHINE_WIDTH BITS_PER_LONG +#define COMPILER_DEPENDENT_UINT64 unsigned long long +#define COMPILER_DEPENDENT_INT64 long long +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STRNCMP(d,s,n) strncmp((d), (s), (NATIVE_INT)(n)) +#define RSDP_CHECKSUM_LENGTH 20 +#define NATIVE_INT INT32 +#define NATIVE_CHAR char + +#ifndef CONFIG_ACPI_enough /* I am tired of trying to use the acpi stuff */ + /* this code works, lets just use it. */ +/******************************************************************************* + * + * FUNCTION: hrt_acpi_checksum + * + * PARAMETERS: Buffer - Buffer to checksum + * Length - Size of the buffer + * + * RETURNS 8 bit checksum of buffer + * + * DESCRIPTION: Computes an 8 bit checksum of the buffer(length) and returns it. + * + ******************************************************************************/ +static __init u8 +hrt_acpi_checksum(void *buffer, u32 length) +{ + u8 *limit; + u8 *rover; + u8 sum = 0; + + if (buffer && length) { + /* Buffer and Length are valid */ + + limit = (u8 *) buffer + length; + + for (rover = buffer; rover < limit; rover++) { + sum = (u8) (sum + *rover); + } + } + + return (sum); +} + +/******************************************************************************* + * + * FUNCTION: hrt_acpi_scan_memory_for_rsdp + * + * PARAMETERS: Start_address - Starting pointer for search + * Length - Maximum length to search + * + * RETURN: Pointer to the RSDP if found, otherwise NULL. + * + * DESCRIPTION: Search a block of memory for the RSDP signature + * + ******************************************************************************/ +static __init u8 * +hrt_acpi_scan_memory_for_rsdp(u8 * start_address, u32 length) +{ + u32 offset; + u8 *mem_rover; + + /* Search from given start addr for the requested length */ + + for (offset = 0, mem_rover = start_address; + offset < length; + offset += ACPI_RSDP_SCAN_STEP, mem_rover += ACPI_RSDP_SCAN_STEP) { + + /* The signature and checksum must both be correct */ + + if (STRNCMP((NATIVE_CHAR *) mem_rover, + RSDP_SIG, sizeof (RSDP_SIG) - 1) == 0 && + hrt_acpi_checksum(mem_rover, RSDP_CHECKSUM_LENGTH) == 0) { + /* If so, we have found the RSDP */ + + ; + return (mem_rover); + } + } + + /* Searched entire block, no RSDP was found */ + + return (NULL); +} + +/******************************************************************************* + * + * FUNCTION: hrt_acpi_find_rsdp + * + * PARAMETERS: + * + * RETURN: Logical address of rsdp + * + * DESCRIPTION: Search lower 1_mbyte of memory for the root system descriptor + * pointer structure. If it is found, return its address, + * else return 0. + * + * NOTE: The RSDP must be either in the first 1_k of the Extended + * BIOS Data Area or between E0000 and FFFFF (ACPI 1.0 section + * 5.2.2; assertion #421). + * + ******************************************************************************/ +/* Constants used in searching for the RSDP in low memory */ + +#define LO_RSDP_WINDOW_BASE 0 /* Physical Address */ +#define HI_RSDP_WINDOW_BASE 0xE0000 /* Physical Address */ +#define LO_RSDP_WINDOW_SIZE 0x400 +#define HI_RSDP_WINDOW_SIZE 0x20000 +#define RSDP_DESCRIPTOR struct rsdp_descriptor +static __init RSDP_DESCRIPTOR * +hrt_find_acpi_rsdp(void) +{ + u8 *mem_rover; + + /* + * 1) Search EBDA (low memory) paragraphs + */ + mem_rover = + hrt_acpi_scan_memory_for_rsdp((u8 *) __va(LO_RSDP_WINDOW_BASE), + LO_RSDP_WINDOW_SIZE); + + if (!mem_rover) { + /* + * 2) Search upper memory: + * 16-byte boundaries in E0000h-F0000h + */ + mem_rover = + hrt_acpi_scan_memory_for_rsdp((u8 *) + __va(HI_RSDP_WINDOW_BASE), + HI_RSDP_WINDOW_SIZE); + } + + if (mem_rover) { + /* Found it, return the logical address */ + + return (RSDP_DESCRIPTOR *) mem_rover; + } + return (RSDP_DESCRIPTOR *) 0; +} + +__init u32 hrt_get_acpi_pm_ptr(void) +{ + struct fadt_descriptor_rev2 *fadt2; + struct fadt_descriptor_rev1 *fadt1; + struct rsdt_descriptor_rev1 *rsdt; + struct xsdt_descriptor_rev2 *xsdt; + RSDP_DESCRIPTOR *rsdp = hrt_find_acpi_rsdp(); + struct acpi_table_header *header; + u32 rtn; + + if (!rsdp) { + printk("ACPI: System description tables not found\n"); + return 0; + } + /* + * Now that we have that problem out of the way, lets set up this + * timer. We need to figure the addresses based on the revision + * of ACPI, which is in this here table we just found. + * We will not check the RSDT checksum, but will the FADT. + */ + if (rsdp->revision == 2) { + xsdt = + (struct xsdt_descriptor_rev2 *) + __va(rsdp->xsdt_physical_address); + fadt2 = + (struct fadt_descriptor_rev2 *) + __va(xsdt->table_offset_entry[0]); + header = (struct acpi_table_header *) fadt2; + rtn = (u32) fadt2->xpm_tmr_blk.address; + } else { + rsdt = + (struct rsdt_descriptor_rev1 *) + __va(rsdp->rsdt_physical_address); + fadt1 = + (struct fadt_descriptor_rev1 *) + __va(rsdt->table_offset_entry[0]); + header = (struct acpi_table_header *) fadt1; + rtn = (u32) fadt1->pm_tmr_blk; + } + /* + * Verify the signature and the checksum, if good, return + * the address. + */ + if (STRNCMP((NATIVE_CHAR *) header->signature, + FADT_SIG, sizeof (FADT_SIG) - 1) == 0 && + hrt_acpi_checksum((NATIVE_CHAR *) header, header->length) == 0) + return rtn; + + printk("ACPI: Signature or checksum failed on FADT\n"); + return 0; +} + +#else +extern int acpi_get_firmware_table(acpi_string signature, + u32 instance, + u32 flags, + struct acpi_table_header ** table_pointer); + +extern struct fadt_descriptor_rev2 acpi_fadt; +__init u32 hrt_get_acpi_pm_ptr(void) +{ + struct fadt_descriptor_rev2 *fadt = &acpi_fadt; + struct fadt_descriptor_rev2 local_fadt; + + if (!fadt || !fadt->header.signature[0]) { + fadt = &local_fadt; + fadt->header.signature[0] = '\0'; + acpi_get_firmware_table("FACP", 1, ACPI_PHYSICAL_POINTER, + (struct acpi_table_header **) & fadt); + } + if (!fadt || !fadt->header.signature[0]) { + printk("ACPI: Could not find the ACPI pm timer."); + } + + if (fadt->header.revision == 2) { + return (u32) fadt->xpm_tmr_blk.address; + } else { + return (u32) fadt->V1_pm_tmr_blk; + } +} +#endif Index: linux/arch/i386/kernel/timers/hrtimer_pm.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/timers/hrtimer_pm.c @@ -0,0 +1,200 @@ +/* + * This code largely moved from arch/i386/kernel/time.c. + * See comments there for proper credits. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define OK_TO_DO_IO_LOOP // How to do the delay stuff. + + +extern unsigned long do_highres_gettimeoffset_pm(void) +{ + /* + * We are under the xtime_lock here. + */ + return arch_cycle_to_usec(get_arch_cycles(jiffies)); +} + +static void high_res_mark_offset_pm(void) +{ + return; +} +unsigned long long monotonic_clock_hr_pm(void) +{ + unsigned long long timestamp; + unsigned long seq; + do { + seq = read_seqbegin(&xtime_lock); + timestamp = jiffies_64 * (NSEC_PER_SEC / HZ) + + arch_cycle_to_usec(get_arch_cycles(jiffies)); + } while (read_seqretry(&xtime_lock, seq)); + return timestamp; +} + +#ifdef OK_TO_DO_IO_LOOP +/* + * This routine is I/O intensive. If this is a problem we will have to + * use a compute loop as in the PIT code. It is NOT affected by the + * cpu clock, however. + */ +static void delay_pm(unsigned long loops) +{ + unsigned long bclock = inl(acpi_pm_tmr_address); + + /* + * XXX it doesn't depend on a number of processor cycles so + * the value may be very different from the usual one, is that + * a problem? -eric + */ + do { + rep_nop(); + } while (((inl(acpi_pm_tmr_address) - bclock) & SIZE_MASK) < loops); +} +#else +/* + * Avoids the I/O intense stuff but is affected by cpu clock shifting. + */ +static void delay_pm(unsigned long loops) +{ + int d0; + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + +#endif + +#ifdef CONFIG_CPU_FREQ +static unsigned int ref_freq = 0; +static unsigned int variable_tsc = 1; + +#ifdef OK_TO_DO_IO_LOOP +static unsigned long loops_per_jiffy_ref = 0; +#endif + +#ifndef CONFIG_SMP +static unsigned long cpu_khz_ref = 0; +#endif + +static int +time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + + if (!ref_freq) { + ref_freq = freq->old; +#ifdef OK_TO_DO_IO_LOOP + loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; +#endif + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { +#ifdef OK_TO_DO_IO_LOOP + if (variable_tsc) + cpu_data[freq->cpu].loops_per_jiffy = + cpufreq_scale(loops_per_jiffy_ref, + ref_freq, freq->new); +#endif + } + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { +.notifier_call = time_cpufreq_notifier +}; +#endif + + +static int high_res_init_pm(char * override) +{ + if (override[0] && strncmp(override,"hr_pm",5)) + return -ENODEV; + + /* report CPU clock rate in Hz. + * The formula is: + * (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if(tsc_quotient){ + cpu_khz = div_sc32( 1000, tsc_quotient); + { + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + } + } + } + acpi_pm_tmr_address = hrt_get_acpi_pm_ptr(); + if (!acpi_pm_tmr_address){ + printk(pm_hrtimer_message, default_pm_add); + if ( (acpi_pm_tmr_address = default_pm_add)){ + last_update += quick_get_cpuctr(); + hrt_udelay(4); + if (!quick_get_cpuctr()){ + printk("High-res-timers: No ACPI pm " + "timer found at %d.\n", + acpi_pm_tmr_address); + acpi_pm_tmr_address = 0; + } + } + } else { + if (default_pm_add != acpi_pm_tmr_address) { + printk("High-res-timers: Ignoring supplied " + "default ACPI pm timer address.\n"); + } + last_update += quick_get_cpuctr(); + } + start_PIT(); + if (!acpi_pm_tmr_address){ + printk(pm_hrtimer_fail_message); + return -EINVAL; + } else { + printk("High-res-timers: Found ACPI pm timer at %d\n", + acpi_pm_tmr_address); + } +#ifdef CONFIG_CPU_FREQ + /* P4 and above CPU TSC freq doesn't change when CPU frequency changes*/ + if ((boot_cpu_data.x86 >= 15) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + variable_tsc = 0; + + cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); +#endif + return 0; +} + +/************************************************************/ + + +/* hr_pm timer_opts struct */ +struct timer_opts hrtimer_pm = { + .name = "hrt_pm", + .mark_offset = high_res_mark_offset_pm, + .get_offset = do_highres_gettimeoffset_pm, + .monotonic_clock = monotonic_clock_hr_pm, + .delay = delay_pm, +}; + +struct init_timer_opts __initdata hrtimer_pm_init = { + .init = high_res_init_pm, + .opts = &hrtimer_pm, +}; Index: linux/arch/i386/kernel/timers/hrtimer_tsc.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/timers/hrtimer_tsc.c @@ -0,0 +1,274 @@ +/* + * This code largely moved from arch/i386/kernel/time.c. + * See comments there for proper credits. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mach_timer.h" + +extern int x86_udelay_tsc; +extern unsigned long tsc_cycles_per_50_ms; + +#if 0 //ndef CONFIG_SMP +static int use_tsc; +#endif + +/* Cached *multiplier* to convert TSC counts to microseconds. + * (see the equation below). + * Equal to 2^32 * (1 / (clocks per usec) ). + * Initialized in time_init. + */ +static unsigned long fast_gettimeoffset_quotient; + +static unsigned long do_highres_gettimeoffset(void) +{ + /* + * We are under the xtime_lock here. + */ + return arch_cycle_to_usec(get_arch_cycles(jiffies)); +} + +static void high_res_mark_offset_tsc(void) +{ + return; +} +static unsigned long long monotonic_clock_hr_tsc(void) +{ + unsigned long long timestamp; + unsigned long seq; + do { + seq = read_seqbegin(&xtime_lock); + timestamp = jiffies_64 * (NSEC_PER_SEC / HZ) + + arch_cycle_to_usec(get_arch_cycles(jiffies)); + } while (read_seqretry(&xtime_lock, seq)); + return timestamp; +} +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +#ifdef CONFIG_CPU_FREQ +/* If the CPU frequency is scaled, TSC-based delays will need a different + * loops_per_jiffy value to function properly. An exception to this + * are modern Intel Pentium 4 processors, where the TSC runs at a constant + * speed independent of frequency scaling. + */ +static unsigned long ref_arch_to_usec; +static unsigned long ref_arch_to_latch; +static unsigned long ref_arch_to_nsec; +static unsigned long ref_usec_to_arch; +static unsigned long ref_nsec_to_arch; +static long ref_arch_cycles_per_jiffy; +static unsigned int ref_freq = 0; +static unsigned long loops_per_jiffy_ref = 0; +static unsigned int variable_tsc = 1; + +#ifndef CONFIG_SMP +static unsigned long cpu_khz_ref = 0; +#endif + +static int +time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + ref_arch_to_usec = arch_to_usec; + ref_arch_to_latch = arch_to_latch; + ref_arch_to_nsec = arch_to_nsec; + ref_nsec_to_arch = nsec_to_arch; + ref_usec_to_arch = usec_to_arch; + ref_arch_cycles_per_jiffy = arch_cycles_per_jiffy; +#if 0 /* ndef CONFIG_SMP cpu_khz is done in timer_tsc.c */ + cpu_khz_ref = cpu_khz; +#endif + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + + if (variable_tsc) { + cpu_data[freq->cpu].loops_per_jiffy = + cpufreq_scale(loops_per_jiffy_ref, + ref_freq, freq->new); + + arch_to_usec = + /* fast_gettimeoffset_quotient is done + * timer_tsc.c + fast_gettimeoffset_quotient = + */ + cpufreq_scale(ref_arch_to_usec, + freq->new, ref_freq); + arch_to_latch = + cpufreq_scale(ref_arch_to_latch, + freq->new, ref_freq); + arch_to_nsec = + cpufreq_scale(ref_arch_to_nsec, + freq->new, ref_freq); + nsec_to_arch = + cpufreq_scale(ref_nsec_to_arch, + ref_freq, freq->new); + usec_to_arch = + cpufreq_scale(ref_usec_to_arch, + ref_freq, freq->new); + arch_cycles_per_jiffy = + cpufreq_scale(ref_arch_cycles_per_jiffy, + ref_freq, freq->new); + } +#if 0 /* ndef CONFIG_SMP_use_timer_tsc */ + if (use_tsc) + cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); +#endif + } + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + notifier_call: time_cpufreq_notifier +}; +#endif + +static int high_res_init_tsc(char * override) +{ + if (override[0] && strncmp(override,"hrtsc",5)) + return -ENODEV; + + if (tsc_disable) { + printk(KERN_WARNING "notsc: Kernel compiled with " + "CONFIG_HIGH_RES_TIMERS" + " TSC, cannot disable TSC.\n"); + tsc_disable = 0; + } + /* + * If we have APM enabled or the CPU clock speed is variable + * (CPU stops clock on HLT or slows clock to save power) + * then the TSC timestamps may diverge by up to 1 jiffy from + * 'real time' but nothing will break. + * The most frequent case is that the CPU is "woken" from a halt + * state by the timer interrupt itself, so we get 0 error. In the + * rare cases where a driver would "wake" the CPU and request a + * timestamp, the maximum error is < 1 jiffy. But timestamps are + * still perfectly ordered. + * Note that the TSC counter will be reset if APM suspends + * to disk; this won't break the kernel, though, 'cuz we're + * smart. See arch/i386/kernel/apm.c. + */ + /* + * Firstly we have to do a CPU check for chips with + * a potentially buggy TSC. At this point we haven't run + * the ident/bugs checks so we must run this hook as it + * may turn off the TSC flag. + * + * NOTE: this doesnt yet handle SMP 486 machines where only + * some CPU's have a TSC. Thats never worked and nobody has + * moaned if you have the only one in the world - you fix it! + */ + + /* FIXME: Where is this gone ? */ + //dodgy_tsc(); + + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if (tsc_quotient) { + fast_gettimeoffset_quotient = tsc_quotient; + /* + * We could be more selective here I suspect + * and just enable this for the next intel chips ? + */ + /* + * Kick off the high res timers + */ + /* + * The init_hrtimers macro is in the choosen + * support package depending on the clock + * source, TSC, or ACPI pm timer. + */ + arch_to_usec = fast_gettimeoffset_quotient; + + arch_to_latch = div_ll_X_l( + mpy_l_X_l_ll(fast_gettimeoffset_quotient, + CLOCK_TICK_RATE), + (USEC_PER_SEC)); + + arch_to_nsec = div_sc_n(HR_TIME_SCALE_NSEC, + CALIBRATE_TIME * NSEC_PER_USEC, + tsc_cycles_per_50_ms); + + nsec_to_arch = div_sc_n(HR_TIME_SCALE_NSEC, + tsc_cycles_per_50_ms, + CALIBRATE_TIME * NSEC_PER_USEC); + + usec_to_arch = div_sc_n(HR_TIME_SCALE_USEC, + tsc_cycles_per_50_ms, + CALIBRATE_TIME ); + + arch_cycles_per_jiffy = nsec_to_arch_cycle(tick_nsec); + init_hrtimers(); + + start_PIT(); + + /* report CPU clock rate in Hz. + * The formula is: + * (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + cpu_khz = div_sc32( 1000, tsc_quotient); + { + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + } +#ifdef CONFIG_CPU_FREQ + /* + * P4 and above CPU TSC freq doesn't change when + * CPU frequency changes + */ + if ((boot_cpu_data.x86 >= 15) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + variable_tsc = 0; + + cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); +#endif + return 0; + } + } + return -ENODEV; +} + +/************************************************************/ + +/* tsc timer_opts struct */ +struct timer_opts hrtimer_tsc = { + .name = "hrt_tsc", + .mark_offset = high_res_mark_offset_tsc, + .get_offset = do_highres_gettimeoffset, + .monotonic_clock = monotonic_clock_hr_tsc, + .delay = delay_tsc, +}; + +struct init_timer_opts __initdata hrtimer_tsc_init = { + .init = high_res_init_tsc, + .opts = &hrtimer_tsc, +}; Index: linux/arch/i386/kernel/timers/timer.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer.c +++ linux/arch/i386/kernel/timers/timer.c @@ -1,10 +1,12 @@ #include #include #include +#include #include #ifdef CONFIG_HPET_TIMER /* + * If high res, we put that first... * HPET memory read is slower than tsc reads, but is more dependable as it * always runs at constant frequency and reduces complexity due to * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use @@ -13,7 +15,17 @@ #endif /* list of timers, ordered by preference, NULL terminated */ static struct init_timer_opts* __initdata timers[] = { +#ifdef CONFIG_HIGH_RES_TIMERS +#ifdef CONFIG_HIGH_RES_TIMER_ACPI_PM + &hrtimer_pm_init, +#elif CONFIG_HIGH_RES_TIMER_TSC + &hrtimer_tsc_init, +#endif /* CONFIG_HIGH_RES_TIMER_ACPI_PM */ +#endif #ifdef CONFIG_X86_CYCLONE_TIMER +#ifdef CONFIG_HIGH_RES_TIMERS +#error "The High Res Timers option is incompatable with the Cyclone timer" +#endif &timer_cyclone_init, #endif #ifdef CONFIG_HPET_TIMER @@ -28,6 +40,7 @@ static struct init_timer_opts* __initdat }; static char clock_override[10] __initdata; +#ifndef CONFIG_HIGH_RES_TIMERS_try static int __init clock_setup(char* str) { @@ -45,7 +58,7 @@ void clock_fallback(void) { cur_timer = &timer_pit; } - +#endif /* iterates through the list of timers, returning the first * one that initializes successfully. */ Index: linux/arch/i386/kernel/timers/timer_cyclone.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_cyclone.c +++ linux/arch/i386/kernel/timers/timer_cyclone.c @@ -36,7 +36,7 @@ static u32* volatile cyclone_timer; /* C static u32 last_cyclone_low; static u32 last_cyclone_high; static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); /* helper macro to atomically read both cyclone counter registers */ #define read_cyclone_counter(low,high) \ Index: linux/arch/i386/kernel/timers/timer_hpet.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_hpet.c +++ linux/arch/i386/kernel/timers/timer_hpet.c @@ -24,7 +24,7 @@ static unsigned long hpet_last; /* hpet static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); /* convert from cycles(64bits) => nanoseconds (64bits) * basic equation: Index: linux/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_pit.c +++ linux/arch/i386/kernel/timers/timer_pit.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include Index: linux/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_pm.c +++ linux/arch/i386/kernel/timers/timer_pm.c @@ -41,7 +41,7 @@ static u32 offset_tick; static u32 offset_delay; static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ Index: linux/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_tsc.c +++ linux/arch/i386/kernel/timers/timer_tsc.c @@ -43,18 +43,25 @@ static int delay_at_last_interrupt; static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); /* convert from cycles(64bits) => nanoseconds (64bits) * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles / (freq / ns_per_sec) + * ns = cycles / (cpu_cycles_in_time_X / time_X) / ns_per_sec + * ns = cycles / cpu_cycles_in_time_X / (time_X * ns_per_sec) + * ns = cycles * (time_X * ns_per_sec) / cpu_cycles_in_time_X + * + * Here time_X = CALIBRATE_TIME (in USEC) * NSEC_PER_USEC + * and cpu_cycles_in_time_X is tsc_cycles_per_50_ms so... + * + * ns = cycles * (CALIBRATE_TIME * NSEC_PER_USEC) / tsc_cycles_per_50_ms * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC - * ns = cycles * cyc2ns_scale / SC + * + * ns = cycles * CALIBRATE_TIME * NSEC_PER_USEC * SC / tsc_cycles_per_50_ms / SC + * cyc2ns_scale = CALIBRATE_TIME * NSEC_PER_USEC * SC / tsc_cycles_per_50_ms + * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. @@ -63,9 +70,12 @@ static seqlock_t monotonic_lock = SEQLOC static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(void) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + long long it =(long long) (CALIBRATE_TIME * NSEC_PER_USEC) << CYC2NS_SCALE_FACTOR; + + do_div(it, tsc_cycles_per_50_ms); + cyc2ns_scale = (unsigned long)it; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -142,7 +152,7 @@ unsigned long long sched_clock(void) * synchronized across all CPUs. */ #ifndef CONFIG_NUMA - if (!use_tsc) + if (unlikely(!use_tsc)) #endif /* no locking but a rare wrong value is not a big deal */ return jiffies_64 * (1000000000 / HZ); @@ -170,9 +180,9 @@ static void delay_tsc(unsigned long loop static void mark_offset_tsc_hpet(void) { unsigned long long this_offset, last_offset; - unsigned long offset, temp, hpet_current; + unsigned long offset, temp, hpet_current, flags; - write_seqlock(&monotonic_lock); + write_seqlock_irqsave(&monotonic_lock, flags); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; /* * It is important that these two operations happen almost at @@ -200,7 +210,7 @@ static void mark_offset_tsc_hpet(void) /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); + write_sequnlock_irqrestore(&monotonic_lock, flags); /* calculate delay_at_last_interrupt */ /* @@ -237,7 +247,7 @@ static void handle_cpufreq_delayed_get(v * to verify the CPU frequency the timing core thinks the CPU is running * at is still correct. */ -static inline void cpufreq_delayed_get(void) +static inline void cpufreq_delayed_get(void) { if (cpufreq_init && !cpufreq_delayed_issched) { cpufreq_delayed_issched = 1; @@ -252,6 +262,7 @@ static inline void cpufreq_delayed_get(v static unsigned int ref_freq = 0; static unsigned long loops_per_jiffy_ref = 0; +static unsigned long cyc2ns_scale_ref; #ifndef CONFIG_SMP static unsigned long fast_gettimeoffset_ref = 0; @@ -269,6 +280,7 @@ time_cpufreq_notifier(struct notifier_bl if (!ref_freq) { ref_freq = freq->old; loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + cyc2ns_scale_ref = cyc2ns_scale; #ifndef CONFIG_SMP fast_gettimeoffset_ref = fast_gettimeoffset_quotient; cpu_khz_ref = cpu_khz; @@ -286,7 +298,8 @@ time_cpufreq_notifier(struct notifier_bl if (use_tsc) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz/1000); + cyc2ns_scale = cpufreq_scale(cyc2ns_scale_ref, + freq->new, ref_freq); } } #endif @@ -341,7 +354,10 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); static void mark_offset_tsc(void) { - unsigned long lost,delay; + unsigned long lost,delay, flags; +#ifndef CONFIG_PREEMPT_RT + unsigned long flags2; +#endif unsigned long delta = last_tsc_low; int count; int countmp; @@ -349,7 +365,7 @@ static void mark_offset_tsc(void) unsigned long long this_offset, last_offset; static int lost_count = 0; - write_seqlock(&monotonic_lock); + write_seqlock_irqsave(&monotonic_lock, flags); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; /* * It is important that these two operations happen almost at @@ -367,24 +383,34 @@ static void mark_offset_tsc(void) rdtsc(last_tsc_low, last_tsc_high); - spin_lock(&i8253_lock); - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ +#ifdef CONFIG_PREEMPT_RT + /* + * On PREEMPT_RT the timer IRQ never gets delayed by + * other interrupts, so we dont have to read the + * count: + */ + count = LATCH - 2; +#else + spin_lock_irqsave(&i8253_lock, flags2); + outb(0x00, PIT_MODE); /* latch the count ASAP */ + count = inb(PIT_CH0); /* read the latched count */ count |= inb(PIT_CH0) << 8; +# undef VIA686A_WORKAROUND /* * VIA686a test code... reset the latch if count > max + 1 * from timer_pit.c - cjb */ +# ifdef VIA686A_WORKAROUND if (count > LATCH) { outb_p(0x34, PIT_MODE); outb_p(LATCH & 0xff, PIT_CH0); outb(LATCH >> 8, PIT_CH0); count = LATCH - 1; } - - spin_unlock(&i8253_lock); +# endif + spin_unlock_irqrestore(&i8253_lock, flags2); +#endif /* PREEMPT_RT */ if (pit_latch_buggy) { /* get center value of last 3 time lutch */ @@ -437,7 +463,7 @@ static void mark_offset_tsc(void) /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); + write_sequnlock_irqrestore(&monotonic_lock, flags); /* calculate delay_at_last_interrupt */ count = ((LATCH-1) - count) * TICK_SIZE; @@ -536,7 +562,7 @@ static int __init init_tsc(char* overrid printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(); return 0; } } Index: linux/arch/i386/kernel/traps.c =================================================================== --- linux.orig/arch/i386/kernel/traps.c +++ linux/arch/i386/kernel/traps.c @@ -94,7 +94,7 @@ asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +static DEFINE_RAW_SPINLOCK(die_notifier_lock); int register_die_notifier(struct notifier_block *nb) { @@ -117,22 +117,27 @@ static inline unsigned long print_contex unsigned long *stack, unsigned long ebp) { unsigned long addr; +#ifndef CONFIG_FRAME_POINTER + unsigned long prev_frame; +#endif -#ifdef CONFIG_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printk(" [<%08lx>] ", addr); print_symbol("%s", addr); - printk("\n"); + printk(" (%ld)\n", *(unsigned long *)ebp - ebp); ebp = *(unsigned long *)ebp; } #else + prev_frame = (unsigned long)stack; while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) { printk(" [<%08lx>]", addr); print_symbol(" %s", addr); - printk("\n"); + printk(" (%ld)\n", (unsigned long)stack - prev_frame); + prev_frame = (unsigned long)stack; } } #endif @@ -164,6 +169,8 @@ void show_trace(struct task_struct *task break; printk(" =======================\n"); } + print_traces(task); + show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *esp) @@ -202,6 +209,12 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -226,10 +239,17 @@ void show_registers(struct pt_regs *regs regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk("ds: %04x es: %04x ss: %04x preempt: %08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count()); + printk("Process %s (pid: %d, threadinfo=%p task=%p", current->comm, current->pid, current_thread_info(), current); + + if (in_kernel) + printk(" stack_left=%ld worst_left=%ld)", + (esp & (THREAD_SIZE-1))-sizeof(struct thread_info), + worst_stack_left); + else + printk(")"); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -301,11 +321,11 @@ bug: void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -382,6 +402,11 @@ static void do_trap(int trapnr, int sign if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { if (info) force_sig_info(signr, info, tsk); @@ -510,7 +535,7 @@ fastcall void do_general_protection(stru return; gp_in_vm86: - local_irq_enable(); + raw_local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; @@ -564,10 +589,11 @@ static void unknown_nmi_error(unsigned c printk("Do you have a strange power saving mode enabled?\n"); } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + deadlock_trace_off(); spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try @@ -591,10 +617,11 @@ void die_nmi (struct pt_regs *regs, cons crash_kexec(regs); } + nmi_exit(); do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static void notrace default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -613,6 +640,7 @@ static void default_do_nmi(struct pt_reg */ if (nmi_watchdog) { nmi_watchdog_tick(regs); +// trace_special(6, 1, 0); return; } #endif @@ -632,18 +660,19 @@ static void default_do_nmi(struct pt_reg reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu) { return 0; } static nmi_callback_t nmi_callback = dummy_nmi_callback; -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); @@ -721,7 +750,7 @@ fastcall void do_debug(struct pt_regs * return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { Index: linux/arch/i386/kernel/vm86.c =================================================================== --- linux.orig/arch/i386/kernel/vm86.c +++ linux/arch/i386/kernel/vm86.c @@ -105,9 +105,10 @@ struct pt_regs * fastcall save_v86_state * from process context. Enable interrupts here, before trying * to access user space. */ - local_irq_enable(); + raw_local_irq_enable(); if (!current->thread.vm86_info) { + raw_local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } Index: linux/arch/i386/lib/bitops.c =================================================================== --- linux.orig/arch/i386/lib/bitops.c +++ linux/arch/i386/lib/bitops.c @@ -68,3 +68,37 @@ int find_next_zero_bit(const unsigned lo return (offset + set + res); } EXPORT_SYMBOL(find_next_zero_bit); + + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +".section .sched.text\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret" +); + +asm( +".section .sched.text\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + LOCK "incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + LOCK "decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret" +); +#endif Index: linux/arch/i386/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/i386/lib/dec_and_lock.c +++ linux/arch/i386/lib/dec_and_lock.c @@ -11,7 +11,7 @@ #include #include -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; @@ -33,10 +33,10 @@ repeat: return 0; slow_path: - spin_lock(lock); + _raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + _raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); Index: linux/arch/i386/mach-default/setup.c =================================================================== --- linux.orig/arch/i386/mach-default/setup.c +++ linux/arch/i386/mach-default/setup.c @@ -35,7 +35,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -79,7 +79,7 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; /** * time_init_hook - do any specific initialisations for the system timer. Index: linux/arch/i386/mach-visws/setup.c =================================================================== --- linux.orig/arch/i386/mach-visws/setup.c +++ linux/arch/i386/mach-visws/setup.c @@ -114,7 +114,7 @@ void __init pre_setup_arch_hook() static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .name = "timer", }; Index: linux/arch/i386/mach-visws/visws_apic.c =================================================================== --- linux.orig/arch/i386/mach-visws/visws_apic.c +++ linux/arch/i386/mach-visws/visws_apic.c @@ -261,11 +261,13 @@ out_unlock: static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = SA_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = SA_NODELAY, }; Index: linux/arch/i386/mach-voyager/setup.c =================================================================== --- linux.orig/arch/i386/mach-voyager/setup.c +++ linux/arch/i386/mach-voyager/setup.c @@ -17,7 +17,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init intr_init_hook(void) { @@ -40,7 +40,7 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; void __init time_init_hook(void) { Index: linux/arch/i386/mm/fault.c =================================================================== --- linux.orig/arch/i386/mm/fault.c +++ linux/arch/i386/mm/fault.c @@ -38,6 +38,8 @@ void bust_spinlocks(int yes) int loglevel_save = console_loglevel; if (yes) { + stop_trace(); + zap_rt_locks(); oops_in_progress = 1; return; } @@ -199,6 +201,18 @@ static inline int is_prefetch(struct pt_ return 0; } +static void force_sig_info_fault(int si_signo, int si_code, + unsigned long address, struct task_struct *tsk) +{ + siginfo_t info; + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + force_sig_info(si_signo, &info, tsk); +} + fastcall void do_invalid_op(struct pt_regs *, unsigned long); /* @@ -211,29 +225,29 @@ fastcall void do_invalid_op(struct pt_re * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) +fastcall notrace void do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; unsigned long page; - int write; - siginfo_t info; + int write, si_code; /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); + trace_special(regs->eip, error_code, address); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); + raw_local_irq_enable(); tsk = current; - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The @@ -313,7 +327,7 @@ fastcall void do_page_fault(struct pt_re * we can handle it.. */ good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; write = 0; switch (error_code & 3) { default: /* 3: write, present */ @@ -387,11 +401,7 @@ bad_area_nosemaphore: /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *)address; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_info_fault(SIGSEGV, si_code, address, tsk); return; } @@ -440,9 +450,9 @@ no_context: } #endif if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + printk(KERN_ALERT "BUG: Unable to handle kernel NULL pointer dereference"); else - printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(KERN_ALERT "BUG: Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); @@ -500,11 +510,7 @@ do_sigbus: tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); return; vmalloc_fault: Index: linux/arch/i386/mm/highmem.c =================================================================== --- linux.orig/arch/i386/mm/highmem.c +++ linux/arch/i386/mm/highmem.c @@ -18,6 +18,27 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +47,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -48,7 +69,7 @@ void *kmap_atomic(struct page *page, enu return (void*) vaddr; } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -78,7 +99,7 @@ void kunmap_atomic(void *kvaddr, enum km /* This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -93,7 +114,7 @@ void *kmap_atomic_pfn(unsigned long pfn, return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -108,6 +129,7 @@ struct page *kmap_atomic_to_page(void *p EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); Index: linux/arch/i386/mm/init.c =================================================================== --- linux.orig/arch/i386/mm/init.c +++ linux/arch/i386/mm/init.c @@ -42,7 +42,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int noinline do_test_wp_bit(void); Index: linux/arch/i386/mm/pageattr.c =================================================================== --- linux.orig/arch/i386/mm/pageattr.c +++ linux/arch/i386/mm/pageattr.c @@ -206,6 +206,9 @@ void kernel_map_pages(struct page *page, { if (PageHighMem(page)) return; + if (!enable) + check_no_locks_freed(page_address(page), page_address(page+numpages)); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ Index: linux/arch/i386/mm/pgtable.c =================================================================== --- linux.orig/arch/i386/mm/pgtable.c +++ linux/arch/i386/mm/pgtable.c @@ -180,7 +180,7 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * recommendations and having no core impact whatsoever. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) Index: linux/arch/i386/pci/direct.c =================================================================== --- linux.orig/arch/i386/pci/direct.c +++ linux/arch/i386/pci/direct.c @@ -211,16 +211,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -230,17 +237,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } Index: linux/arch/i386/pci/pcbios.c =================================================================== --- linux.orig/arch/i386/pci/pcbios.c +++ linux/arch/i386/pci/pcbios.c @@ -70,7 +70,7 @@ static unsigned long bios32_service(unsi unsigned long entry; /* %edx */ unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__("lcall *(%%edi); cld" : "=a" (return_code), "=b" (address), @@ -79,7 +79,7 @@ static unsigned long bios32_service(unsi : "0" (service), "1" (0), "D" (&bios32_indirect)); - local_irq_restore(flags); + raw_local_irq_restore(flags); switch (return_code) { case 0: @@ -110,7 +110,7 @@ static int __devinit check_pcibios(void) if ((pcibios_entry = bios32_service(PCI_SERVICE))) { pci_indirect.address = pcibios_entry + PAGE_OFFSET; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__( "lcall *(%%edi); cld\n\t" "jc 1f\n\t" @@ -123,7 +123,7 @@ static int __devinit check_pcibios(void) : "1" (PCIBIOS_PCI_BIOS_PRESENT), "D" (&pci_indirect) : "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); status = (eax >> 8) & 0xff; hw_mech = eax & 0xff; Index: linux/arch/ia64/kernel/perfmon.c =================================================================== --- linux.orig/arch/ia64/kernel/perfmon.c +++ linux/arch/ia64/kernel/perfmon.c @@ -496,7 +496,7 @@ typedef struct { static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ -static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pfm_alt_install_check); static pfm_intr_handler_desc_t *pfm_alt_intr_handler; static struct proc_dir_entry *perfmon_dir; Index: linux/arch/ia64/sn/kernel/xpnet.c =================================================================== --- linux.orig/arch/ia64/sn/kernel/xpnet.c +++ linux/arch/ia64/sn/kernel/xpnet.c @@ -130,7 +130,7 @@ struct net_device *xpnet_device; */ static u64 xpnet_broadcast_partitions; /* protect above */ -static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(xpnet_broadcast_lock); /* * Since the Block Transfer Engine (BTE) is being used for the transfer Index: linux/arch/m68k/amiga/amisound.c =================================================================== --- linux.orig/arch/m68k/amiga/amisound.c +++ linux/arch/m68k/amiga/amisound.c @@ -63,7 +63,7 @@ void __init amiga_init_sound(void) } static void nosound( unsigned long ignored ); -static struct timer_list sound_timer = TIMER_INITIALIZER(nosound, 0, 0); +static DEFINE_TIMER(sound_timer, nosound, 0, 0); void amiga_mksound( unsigned int hz, unsigned int ticks ) { Index: linux/arch/m68k/mac/macboing.c =================================================================== --- linux.orig/arch/m68k/mac/macboing.c +++ linux/arch/m68k/mac/macboing.c @@ -56,8 +56,7 @@ static void ( *mac_special_bell )( unsig /* * our timer to start/continue/stop the bell */ -static struct timer_list mac_sound_timer = - TIMER_INITIALIZER(mac_nosound, 0, 0); +static DEFINE_TIMER(mac_sound_timer, mac_nosound, 0, 0); /* * Sort of initialize the sound chip (called from mac_mksound on the first Index: linux/arch/mips/Kconfig =================================================================== --- linux.orig/arch/mips/Kconfig +++ linux/arch/mips/Kconfig @@ -308,6 +308,7 @@ config MIPS_SEAD config MOMENCO_OCELOT bool "Support for Momentum Ocelot board" select DMA_NONCOHERENT + select NO_SPINLOCK select HW_HAS_PCI select IRQ_CPU select IRQ_CPU_RM7K @@ -660,6 +661,7 @@ config SIBYTE_SB1xxx_SOC depends on EXPERIMENTAL select BOOT_ELF32 select DMA_COHERENT + select NO_SPINLOCK select SWAP_IO_SPACE choice @@ -903,12 +905,21 @@ config TOSHIBA_FPCIB0 bool "FPCIB0 Backplane Support" depends on TOSHIBA_RBTX4927 +source "lib/Kconfig.RT" + config RWSEM_GENERIC_SPINLOCK bool + depends on !PREEMPT_RT default y config RWSEM_XCHGADD_ALGORITHM bool + depends on !PREEMPT_RT + +config ASM_SEMAPHORES + bool + depends on !PREEMPT_RT + default y config GENERIC_CALIBRATE_DELAY bool @@ -929,6 +940,9 @@ config ARC config DMA_COHERENT bool +config NO_SPINLOCK + bool + config DMA_IP27 bool @@ -1424,15 +1438,6 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - config RTC_DS1742 bool "DS1742 BRAM/RTC support" depends on TOSHIBA_JMR3927 || TOSHIBA_RBTX4927 @@ -1447,10 +1452,6 @@ config MIPS_INSANE_LARGE This will result in additional memory usage, so it is not recommended for normal users. -config RWSEM_GENERIC_SPINLOCK - bool - default y - endmenu menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" Index: linux/arch/mips/kernel/Makefile =================================================================== --- linux.orig/arch/mips/kernel/Makefile +++ linux/arch/mips/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ - ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \ + ptrace.o reset.o setup.o signal.o syscall.o \ time.o traps.o unaligned.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ @@ -17,6 +17,8 @@ obj-$(CONFIG_MIPS32) += module-elf32.o obj-$(CONFIG_MIPS64) += module-elf64.o endif +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o + obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o Index: linux/arch/mips/kernel/entry.S =================================================================== --- linux.orig/arch/mips/kernel/entry.S +++ linux/arch/mips/kernel/entry.S @@ -48,6 +48,8 @@ FEXPORT(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -57,12 +59,9 @@ need_resched: LONG_L t0, PT_STATUS(sp) # Interrupts off? andi t0, 1 beqz t0, restore_all - li t0, PREEMPT_ACTIVE - sw t0, TI_PRE_COUNT($28) - local_irq_enable t0 - jal schedule - sw zero, TI_PRE_COUNT($28) local_irq_disable t0 + jal preempt_schedule_irq + sw zero, TI_PRE_COUNT($28) b need_resched #endif @@ -92,6 +91,7 @@ FEXPORT(work_pending) andi t0, a2, _TIF_NEED_RESCHED beqz t0, work_notifysig work_resched: + local_irq_enable t0 jal schedule local_irq_disable t0 # make sure need_resched and Index: linux/arch/mips/kernel/gdb-stub.c =================================================================== --- linux.orig/arch/mips/kernel/gdb-stub.c +++ linux/arch/mips/kernel/gdb-stub.c @@ -176,7 +176,7 @@ int kgdb_enabled; /* * spin locks for smp case */ -static spinlock_t kgdb_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(kgdb_lock); static spinlock_t kgdb_cpulock[NR_CPUS] = { [0 ... NR_CPUS-1] = SPIN_LOCK_UNLOCKED}; /* Index: linux/arch/mips/kernel/genrtc.c =================================================================== --- linux.orig/arch/mips/kernel/genrtc.c +++ linux/arch/mips/kernel/genrtc.c @@ -14,7 +14,7 @@ #include #include -static spinlock_t mips_rtc_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(mips_rtc_lock); unsigned int get_rtc_time(struct rtc_time *time) { Index: linux/arch/mips/kernel/i8259.c =================================================================== --- linux.orig/arch/mips/kernel/i8259.c +++ linux/arch/mips/kernel/i8259.c @@ -31,7 +31,7 @@ void disable_8259A_irq(unsigned int irq) * moves to arch independent land */ -spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; +spinlock_t DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { Index: linux/arch/mips/kernel/irq.c =================================================================== --- linux.orig/arch/mips/kernel/irq.c +++ linux/arch/mips/kernel/irq.c @@ -125,7 +125,10 @@ void __init init_IRQ(void) irq_desc[i].action = NULL; irq_desc[i].depth = 1; irq_desc[i].handler = &no_irq_type; - spin_lock_init(&irq_desc[i].lock); + raw_spin_lock_init(&irq_desc[i].lock); +#ifdef CONFIG_PREEMPT_HARDIRQS + irq_desc[i].thread = NULL; +#endif } arch_init_irq(); Index: linux/arch/mips/kernel/module.c =================================================================== --- linux.orig/arch/mips/kernel/module.c +++ linux/arch/mips/kernel/module.c @@ -2,7 +2,7 @@ #include static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_dbetables(unsigned long addr) Index: linux/arch/mips/kernel/process.c =================================================================== --- linux.orig/arch/mips/kernel/process.c +++ linux/arch/mips/kernel/process.c @@ -58,6 +58,7 @@ ATTRIB_NORET void cpu_idle(void) while (!need_resched()) if (cpu_wait) (*cpu_wait)(); + local_irq_enable(); schedule(); } } Index: linux/arch/mips/kernel/semaphore.c =================================================================== --- linux.orig/arch/mips/kernel/semaphore.c +++ linux/arch/mips/kernel/semaphore.c @@ -63,7 +63,7 @@ static inline int __sem_update_count(str : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); Index: linux/arch/mips/kernel/signal.c =================================================================== --- linux.orig/arch/mips/kernel/signal.c +++ linux/arch/mips/kernel/signal.c @@ -449,6 +449,10 @@ static int do_signal(sigset_t *oldset, s } #endif +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux/arch/mips/kernel/signal32.c =================================================================== --- linux.orig/arch/mips/kernel/signal32.c +++ linux/arch/mips/kernel/signal32.c @@ -766,6 +766,10 @@ int do_signal32(sigset_t *oldset, struct siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux/arch/mips/kernel/smp.c =================================================================== --- linux.orig/arch/mips/kernel/smp.c +++ linux/arch/mips/kernel/smp.c @@ -105,7 +105,22 @@ asmlinkage void start_secondary(void) cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -284,6 +299,8 @@ int setup_profiling_timer(unsigned int m return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -315,6 +332,7 @@ static void flush_tlb_mm_ipi(void *mm) void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1); @@ -324,6 +342,7 @@ void flush_tlb_mm(struct mm_struct *mm) if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -347,6 +366,8 @@ void flush_tlb_range(struct vm_area_stru struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -360,6 +381,7 @@ void flush_tlb_range(struct vm_area_stru if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -390,6 +412,8 @@ static void flush_tlb_page_ipi(void *inf void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -402,6 +426,7 @@ void flush_tlb_page(struct vm_area_struc if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } Index: linux/arch/mips/kernel/time.c =================================================================== --- linux.orig/arch/mips/kernel/time.c +++ linux/arch/mips/kernel/time.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(jiffies_64); */ extern volatile unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); /* * By default we provide the null RTC ops @@ -555,7 +555,7 @@ unsigned int mips_hpt_frequency; static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_NODELAY | SA_INTERRUPT, .name = "timer", }; Index: linux/arch/mips/kernel/traps.c =================================================================== --- linux.orig/arch/mips/kernel/traps.c +++ linux/arch/mips/kernel/traps.c @@ -250,7 +250,7 @@ void show_registers(struct pt_regs *regs printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); NORET_TYPE void __die(const char * str, struct pt_regs * regs, const char * file, const char * func, unsigned long line) Index: linux/arch/mips/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/mips/lib/dec_and_lock.c +++ linux/arch/mips/lib/dec_and_lock.c @@ -28,7 +28,7 @@ */ #ifndef ATOMIC_DEC_AND_LOCK -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; @@ -44,12 +44,12 @@ int _atomic_dec_and_lock(atomic_t *atomi return 0; } - spin_lock(lock); + _raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + _raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); #endif /* ATOMIC_DEC_AND_LOCK */ Index: linux/arch/mips/math-emu/cp1emu.c =================================================================== --- linux.orig/arch/mips/math-emu/cp1emu.c +++ linux/arch/mips/math-emu/cp1emu.c @@ -1310,7 +1310,9 @@ int fpu_emulator_cop1Handler(int xcptno, if (sig) break; + preempt_enable(); cond_resched(); + preempt_disable(); } while (xcp->cp0_epc > prevepc); /* SIGILL indicates a non-fpu instruction */ Index: linux/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/irq.c +++ linux/arch/mips/sibyte/sb1250/irq.c @@ -88,7 +88,7 @@ static struct hw_interrupt_type sb1250_i /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -276,7 +276,7 @@ static irqreturn_t sb1250_dummy_handler static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = SA_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, Index: linux/arch/mips/sibyte/sb1250/time.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/time.c +++ linux/arch/mips/sibyte/sb1250/time.c @@ -115,10 +115,12 @@ void sb1250_timer_interrupt(struct pt_re ll_timer_interrupt(irq, regs); } - /* - * every CPU should do profiling and process accouting - */ - ll_local_timer_interrupt(irq, regs); + if (cpu != 0) { + /* + * every CPU should do profiling and process accouting + */ + ll_local_timer_interrupt(irq, regs); + } } /* Index: linux/arch/ppc/8260_io/enet.c =================================================================== --- linux.orig/arch/ppc/8260_io/enet.c +++ linux/arch/ppc/8260_io/enet.c @@ -116,7 +116,7 @@ struct scc_enet_private { scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux/arch/ppc/8260_io/fcc_enet.c =================================================================== --- linux.orig/arch/ppc/8260_io/fcc_enet.c +++ linux/arch/ppc/8260_io/fcc_enet.c @@ -377,7 +377,7 @@ struct fcc_enet_private { volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux/arch/ppc/8xx_io/commproc.c =================================================================== --- linux.orig/arch/ppc/8xx_io/commproc.c +++ linux/arch/ppc/8xx_io/commproc.c @@ -356,7 +356,7 @@ cpm_setbrg(uint brg, uint rate) /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... Index: linux/arch/ppc/8xx_io/cs4218_tdm.c =================================================================== --- linux.orig/arch/ppc/8xx_io/cs4218_tdm.c +++ linux/arch/ppc/8xx_io/cs4218_tdm.c @@ -1380,7 +1380,7 @@ static void cs_nosound(unsigned long xx) spin_unlock_irqrestore(&cs4218_lock, flags); } -static struct timer_list beep_timer = TIMER_INITIALIZER(cs_nosound, 0, 0); +static DEFINE_TIMER(beep_timer, cs_nosound, 0, 0); }; static void cs_mksound(unsigned int hz, unsigned int ticks) Index: linux/arch/ppc/8xx_io/enet.c =================================================================== --- linux.orig/arch/ppc/8xx_io/enet.c +++ linux/arch/ppc/8xx_io/enet.c @@ -144,7 +144,7 @@ struct scc_enet_private { unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux/arch/ppc/8xx_io/fec.c =================================================================== --- linux.orig/arch/ppc/8xx_io/fec.c +++ linux/arch/ppc/8xx_io/fec.c @@ -165,7 +165,7 @@ struct fec_enet_private { struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux/arch/ppc/Kconfig =================================================================== --- linux.orig/arch/ppc/Kconfig +++ linux/arch/ppc/Kconfig @@ -15,13 +15,6 @@ config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -910,6 +903,34 @@ config PPCBUG_NVRAM bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC default y if PPC_PREP +config HIGH_RES_TIMERS + bool "High Resolution Timer Support" + default n + help + This option enables high resolution POSIX clocks and timers with + resolution of at least 1 microsecond. High resolution timers are not + free, a small overhead is incurred each time a timer that does not + expire on a 1/HZ tick boundary is used. If no such timers are used + the overhead is nil. + + The POSIX clocks CLOCK_REALTIME and CLOCK_MONOTONIC are available by + default. This option enables two additional clocks, CLOCK_REALTIME_HR + and CLOCK_MONOTONIC_HR. Note that this option does not change the + resolution of CLOCK_REALTIME or CLOCK_MONOTONIC, which remain at 1/HZ + resolution. + +config HIGH_RES_RESOLUTION + int "High Resolution Timer resolution (nanoseconds)" + depends on HIGH_RES_TIMERS + default 10000 + help + This sets the resolution in nanoseconds of the CLOCK_REALTIME_HR and + CLOCK_MONOTONIC_HR timers. Too fine a resolution (small a number) + will usually not be observable due to normal system latencies. For an + 800 MHz processor about 10,000 (10 microseconds) is recommended as a + finest resolution. If you don't need that sort of resolution, + larger values may generate less overhead. + config SMP depends on PPC_STD_MMU bool "Symmetric multi-processing support" @@ -949,6 +970,14 @@ config HIGHMEM source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "mm/Kconfig" source "fs/Kconfig.binfmt" Index: linux/arch/ppc/boot/Makefile =================================================================== --- linux.orig/arch/ppc/boot/Makefile +++ linux/arch/ppc/boot/Makefile @@ -11,6 +11,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd Index: linux/arch/ppc/kernel/dma-mapping.c =================================================================== --- linux.orig/arch/ppc/kernel/dma-mapping.c +++ linux/arch/ppc/kernel/dma-mapping.c @@ -71,7 +71,7 @@ int map_page(unsigned long va, phys_addr * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -407,7 +407,7 @@ static inline void __dma_sync_page_highm int nr_segs = PAGE_ALIGN(size + (PAGE_SIZE - offset))/PAGE_SIZE; int seg_nr = 0; - local_irq_save(flags); + raw_local_irq_save(flags); do { start = (unsigned long)kmap_atomic(page + seg_nr, @@ -426,7 +426,7 @@ static inline void __dma_sync_page_highm seg_offset = 0; } while (seg_nr < nr_segs); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_HIGHMEM */ Index: linux/arch/ppc/kernel/entry.S =================================================================== --- linux.orig/arch/ppc/kernel/entry.S +++ linux/arch/ppc/kernel/entry.S @@ -240,7 +240,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -318,7 +318,7 @@ syscall_exit_work: rlwinm r12,r1,0,0,18 /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -658,7 +658,7 @@ user_exc_return: /* r10 contains MSR_KE /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -876,7 +876,7 @@ load_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -890,7 +890,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING beq restore_user @@ -1000,3 +1000,85 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_OF */ + +#ifdef CONFIG_MCOUNT + +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function preamble, + * before the stack frame is created. An example of this preamble code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux/arch/ppc/kernel/idle.c =================================================================== --- linux.orig/arch/ppc/kernel/idle.c +++ linux/arch/ppc/kernel/idle.c @@ -38,7 +38,7 @@ void default_idle(void) powersave = ppc_md.power_save; - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { if (powersave != NULL) powersave(); #ifdef CONFIG_SMP @@ -50,8 +50,11 @@ void default_idle(void) } #endif } - if (need_resched()) - schedule(); + if (need_resched()) { + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); + } } /* @@ -59,11 +62,15 @@ void default_idle(void) */ void cpu_idle(void) { - for (;;) + for (;;) { + BUG_ON(raw_irqs_disabled()); + stop_critical_timing(); + propagate_preempt_locks_value(); if (ppc_md.idle != NULL) ppc_md.idle(); else default_idle(); + } } #if defined(CONFIG_SYSCTL) && defined(CONFIG_6xx) Index: linux/arch/ppc/kernel/irq.c =================================================================== --- linux.orig/arch/ppc/kernel/irq.c +++ linux/arch/ppc/kernel/irq.c @@ -138,6 +138,7 @@ skip: void do_IRQ(struct pt_regs *regs) { int irq, first = 1; + irq_enter(); /* @@ -149,6 +150,7 @@ void do_IRQ(struct pt_regs *regs) * has already been handled. -- Tom */ while ((irq = ppc_md.get_irq(regs)) >= 0) { + trace_special(regs->nip, irq, 0); __do_IRQ(irq, regs); first = 0; } Index: linux/arch/ppc/kernel/misc.S =================================================================== --- linux.orig/arch/ppc/kernel/misc.S +++ linux/arch/ppc/kernel/misc.S @@ -302,8 +302,8 @@ _GLOBAL(local_save_flags_ptr) nop _GLOBAL(local_save_flags_ptr_end) -/* void local_irq_restore(unsigned long flags) */ -_GLOBAL(local_irq_restore) +/* void __raw_local_irq_restore(unsigned long flags) */ +_GLOBAL(__raw_local_irq_restore) /* * Just set/clear the MSR_EE bit through restore/flags but do not * change anything else. This is needed by the RT system and makes @@ -341,9 +341,9 @@ _GLOBAL(local_irq_restore) nop nop nop -_GLOBAL(local_irq_restore_end) +_GLOBAL(__raw_local_irq_restore_end) -_GLOBAL(local_irq_disable) +_GLOBAL(__raw_local_irq_disable) mfmsr r0 /* Get current interrupt state */ rlwinm r3,r0,16+1,32-1,31 /* Extract old value of 'EE' */ rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ @@ -370,9 +370,9 @@ _GLOBAL(local_irq_disable) nop nop nop -_GLOBAL(local_irq_disable_end) +_GLOBAL(__raw_local_irq_disable_end) -_GLOBAL(local_irq_enable) +_GLOBAL(__raw_local_irq_enable) mfmsr r3 /* Get current state */ ori r3,r3,MSR_EE /* Turn on 'EE' bit */ SYNC /* Some chip revs have problems here... */ @@ -399,7 +399,7 @@ _GLOBAL(local_irq_enable) nop nop nop -_GLOBAL(local_irq_enable_end) +_GLOBAL(__raw_local_irq_enable_end) /* * complement mask on the msr then "or" some values on. Index: linux/arch/ppc/kernel/ppc_ksyms.c =================================================================== --- linux.orig/arch/ppc/kernel/ppc_ksyms.c +++ linux/arch/ppc/kernel/ppc_ksyms.c @@ -293,9 +293,11 @@ EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(xmon); EXPORT_SYMBOL(xmon_printf); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_up); +EXPORT_SYMBOL(__compat_down); +EXPORT_SYMBOL(__compat_down_interruptible); +#endif #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) extern void (*debugger)(struct pt_regs *regs); Index: linux/arch/ppc/kernel/process.c =================================================================== --- linux.orig/arch/ppc/kernel/process.c +++ linux/arch/ppc/kernel/process.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -52,8 +54,8 @@ struct task_struct *last_task_used_math struct task_struct *last_task_used_altivec = NULL; struct task_struct *last_task_used_spe = NULL; -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); @@ -241,7 +243,7 @@ struct task_struct *__switch_to(struct t unsigned long s; struct task_struct *last; - local_irq_save(s); + raw_local_irq_save(s); #ifdef CHECK_STACK check_stack(prev); check_stack(new); @@ -302,7 +304,7 @@ struct task_struct *__switch_to(struct t new_thread = &new->thread; old_thread = ¤t->thread; last = _switch(old_thread, new_thread); - local_irq_restore(s); + raw_local_irq_restore(s); return last; } Index: linux/arch/ppc/kernel/semaphore.c =================================================================== --- linux.orig/arch/ppc/kernel/semaphore.c +++ linux/arch/ppc/kernel/semaphore.c @@ -29,7 +29,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -48,7 +48,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -70,7 +70,7 @@ void __up(struct semaphore *sem) * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -100,7 +100,7 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -129,3 +129,8 @@ int __sched __down_interruptible(struct wake_up(&sem->wait); return retval; } + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} Index: linux/arch/ppc/kernel/smp-tbsync.c =================================================================== --- linux.orig/arch/ppc/kernel/smp-tbsync.c +++ linux/arch/ppc/kernel/smp-tbsync.c @@ -49,7 +49,7 @@ smp_generic_take_timebase( void ) { int cmd, tbl, tbu; - local_irq_disable(); + raw_local_irq_disable(); while( !running ) ; rmb(); @@ -78,7 +78,7 @@ smp_generic_take_timebase( void ) } enter_contest( tbsync->mark, -1 ); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit @@ -88,7 +88,7 @@ start_contest( int cmd, int offset, int tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for( i=-3; itbu = tbu = get_tbu(); @@ -114,7 +114,7 @@ start_contest( int cmd, int offset, int if( i++ > 0 ) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux/arch/ppc/kernel/smp.c =================================================================== --- linux.orig/arch/ppc/kernel/smp.c +++ linux/arch/ppc/kernel/smp.c @@ -137,6 +137,16 @@ void smp_send_reschedule(int cpu) smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -146,7 +156,7 @@ void smp_send_xmon_break(int cpu) static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -161,7 +171,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -196,7 +206,7 @@ int smp_call_function(void (*func) (void if (num_online_cpus() <= 1) return 0; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); return __smp_call_function(func, info, wait, MSG_ALL_BUT_SELF); } @@ -337,7 +347,7 @@ int __devinit start_secondary(void *unus printk("CPU %i done callin...\n", cpu); smp_ops->setup_cpu(cpu); printk("CPU %i done setup...\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); smp_ops->take_timebase(); printk("CPU %i done timebase take...\n", cpu); Index: linux/arch/ppc/kernel/temp.c =================================================================== --- linux.orig/arch/ppc/kernel/temp.c +++ linux/arch/ppc/kernel/temp.c @@ -143,7 +143,7 @@ static void tau_timeout(void * info) int shrink; /* disabling interrupts *should* be okay */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); #ifndef CONFIG_TAU_INT @@ -186,7 +186,7 @@ static void tau_timeout(void * info) */ mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tau_timeout_smp(unsigned long unused) Index: linux/arch/ppc/kernel/time.c =================================================================== --- linux.orig/arch/ppc/kernel/time.c +++ linux/arch/ppc/kernel/time.c @@ -67,6 +67,14 @@ #include +#ifdef CONFIG_HIGH_RES_TIMERS +#include +int arch_to_nsec; +int nsec_to_arch; +EXPORT_SYMBOL(arch_to_nsec); +EXPORT_SYMBOL(nsec_to_arch); +#endif + /* XXX false sharing with below? */ u64 jiffies_64 = INITIAL_JIFFIES; @@ -87,12 +95,14 @@ unsigned tb_to_us; unsigned tb_last_stamp; unsigned long tb_to_ns_scale; +int hr_timer_pending[NR_CPUS]; + extern unsigned long wall_jiffies; /* used for timezone offset */ static long timezone_offset; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -110,7 +120,7 @@ static inline int tb_delta(unsigned *jif } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -122,6 +132,47 @@ unsigned long profile_pc(struct pt_regs EXPORT_SYMBOL(profile_pc); #endif +#ifdef CONFIG_HIGH_RES_TIMERS +int schedule_hr_timer_int (unsigned long cycles) +{ + unsigned long cpu = smp_processor_id(); + unsigned jiffy_stamp = last_jiffy_stamp(cpu); + int delta; + + if (cycles < hrtimer_latch_min) + return -ETIME; + + hr_timer_pending[cpu] = 1; + delta = tb_ticks_per_jiffy - tb_delta (&jiffy_stamp); + + if (delta < hrtimer_latch_min) + delta = hrtimer_latch_min; + + if (cycles > delta) + cycles = delta; + + set_dec(cycles); + return 0; +} + +void __init hr_time_init (void) +{ + unsigned tb_ticks_per_sec; + + /* + * Use mulhwu_scale_factor to find the number ticks needed to equal + * 1,000,000 uSecs (1 Sec). + */ + tb_ticks_per_sec = mulhwu_scale_factor(tb_to_us, USEC_PER_SEC); + arch_to_nsec = div_sc_n(HR_TIME_SCALE_NSEC, NSEC_PER_SEC, + tb_ticks_per_sec); + nsec_to_arch = div_sc_n(HR_TIME_SCALE_CYCLES, tb_ticks_per_sec, + NSEC_PER_SEC); + printk("HRT init: arch_to_nsec = %u, nsec_to_arch = %u\n", + arch_to_nsec, nsec_to_arch); +} +#endif + /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -181,10 +232,19 @@ void timer_interrupt(struct pt_regs * re } write_sequnlock(&xtime_lock); } - if ( !disarm_decr[smp_processor_id()] ) - set_dec(next_dec); + last_jiffy_stamp(cpu) = jiffy_stamp; + if ( !disarm_decr[smp_processor_id()] ) { + if (hr_timer_pending[cpu]) { + hr_timer_pending[cpu] = 0; + do_hr_timer_int(); + } + if (!hr_timer_pending[cpu]) { + set_dec(next_dec); + } + } + if (ppc_md.heartbeat && !ppc_md.heartbeat_count--) ppc_md.heartbeat(); @@ -300,6 +360,9 @@ void __init time_init(void) ppc_md.calibrate_decr(); tb_to_ns_scale = mulhwu(tb_to_us, 1000 << 10); } +#ifdef CONFIG_HIGH_RES_TIMERS + hr_time_init(); +#endif /* Now that the decrementer is calibrated, it can be used in case the * clock is stuck, but the fact that we have to handle the 601 Index: linux/arch/ppc/kernel/traps.c =================================================================== --- linux.orig/arch/ppc/kernel/traps.c +++ linux/arch/ppc/kernel/traps.c @@ -72,7 +72,7 @@ void (*debugger_fault_handler)(struct pt * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * fp, long err) { @@ -113,6 +113,10 @@ void _exception(int signr, struct pt_reg debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; Index: linux/arch/ppc/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/ppc/lib/dec_and_lock.c +++ linux/arch/ppc/lib/dec_and_lock.c @@ -19,7 +19,7 @@ */ #ifndef ATOMIC_DEC_AND_LOCK -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; @@ -35,12 +35,12 @@ int _atomic_dec_and_lock(atomic_t *atomi return 0; } - spin_lock(lock); + _raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + _raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); #endif /* ATOMIC_DEC_AND_LOCK */ Index: linux/arch/ppc/lib/locks.c =================================================================== --- linux.orig/arch/ppc/lib/locks.c +++ linux/arch/ppc/lib/locks.c @@ -43,7 +43,7 @@ static inline unsigned long __spin_trylo return ret; } -void _raw_spin_lock(spinlock_t *lock) +void __raw_spin_lock(raw_spinlock_t *lock) { int cpu = smp_processor_id(); unsigned int stuck = INIT_STUCK; @@ -63,9 +63,9 @@ void _raw_spin_lock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); lock->owner_cpu = cpu; } -EXPORT_SYMBOL(_raw_spin_lock); +EXPORT_SYMBOL(__raw_spin_lock); -int _raw_spin_trylock(spinlock_t *lock) +int __raw_spin_trylock(raw_spinlock_t *lock) { if (__spin_trylock(&lock->lock)) return 0; @@ -73,9 +73,9 @@ int _raw_spin_trylock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); return 1; } -EXPORT_SYMBOL(_raw_spin_trylock); +EXPORT_SYMBOL(__raw_spin_trylock); -void _raw_spin_unlock(spinlock_t *lp) +void __raw_spin_unlock(raw_spinlock_t *lp) { if ( !lp->lock ) printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", @@ -89,13 +89,13 @@ void _raw_spin_unlock(spinlock_t *lp) wmb(); lp->lock = 0; } -EXPORT_SYMBOL(_raw_spin_unlock); +EXPORT_SYMBOL(__raw_spin_unlock); /* * For rwlocks, zero is unlocked, -1 is write-locked, * positive is read-locked. */ -static __inline__ int __read_trylock(rwlock_t *rw) +static __inline__ int __read_trylock(raw_rwlock_t *rw) { signed int tmp; @@ -115,13 +115,13 @@ static __inline__ int __read_trylock(rwl return tmp; } -int _raw_read_trylock(rwlock_t *rw) +int __raw_read_trylock(raw_rwlock_t *rw) { return __read_trylock(rw) > 0; } -EXPORT_SYMBOL(_raw_read_trylock); +EXPORT_SYMBOL(__raw_read_trylock); -void _raw_read_lock(rwlock_t *rw) +void __raw_read_lock(rwlock_t *rw) { unsigned int stuck; @@ -136,9 +136,9 @@ void _raw_read_lock(rwlock_t *rw) } } } -EXPORT_SYMBOL(_raw_read_lock); +EXPORT_SYMBOL(__raw_read_lock); -void _raw_read_unlock(rwlock_t *rw) +void __raw_read_unlock(raw_rwlock_t *rw) { if ( rw->lock == 0 ) printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", @@ -147,9 +147,9 @@ void _raw_read_unlock(rwlock_t *rw) wmb(); atomic_dec((atomic_t *) &(rw)->lock); } -EXPORT_SYMBOL(_raw_read_unlock); +EXPORT_SYMBOL(__raw_read_unlock); -void _raw_write_lock(rwlock_t *rw) +void __raw_write_lock(raw_rwlock_t *rw) { unsigned int stuck; @@ -165,18 +165,18 @@ void _raw_write_lock(rwlock_t *rw) } wmb(); } -EXPORT_SYMBOL(_raw_write_lock); +EXPORT_SYMBOL(__raw_write_lock); -int _raw_write_trylock(rwlock_t *rw) +int __raw_write_trylock(raw_rwlock_t *rw) { if (cmpxchg(&rw->lock, 0, -1) != 0) return 0; wmb(); return 1; } -EXPORT_SYMBOL(_raw_write_trylock); +EXPORT_SYMBOL(__raw_write_trylock); -void _raw_write_unlock(rwlock_t *rw) +void __raw_write_unlock(raw_rwlock_t *rw) { if (rw->lock >= 0) printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", @@ -185,6 +185,6 @@ void _raw_write_unlock(rwlock_t *rw) wmb(); rw->lock = 0; } -EXPORT_SYMBOL(_raw_write_unlock); +EXPORT_SYMBOL(__raw_write_unlock); #endif Index: linux/arch/ppc/mm/fault.c =================================================================== --- linux.orig/arch/ppc/mm/fault.c +++ linux/arch/ppc/mm/fault.c @@ -92,7 +92,7 @@ static int store_updates_sp(struct pt_re * the error_code parameter is ESR for a data fault, 0 for an instruction * fault. */ -int do_page_fault(struct pt_regs *regs, unsigned long address, +int notrace do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; Index: linux/arch/ppc/mm/init.c =================================================================== --- linux.orig/arch/ppc/mm/init.c +++ linux/arch/ppc/mm/init.c @@ -56,7 +56,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; Index: linux/arch/ppc/platforms/4xx/xilinx_ml300.c =================================================================== --- linux.orig/arch/ppc/platforms/4xx/xilinx_ml300.c +++ linux/arch/ppc/platforms/4xx/xilinx_ml300.c @@ -62,7 +62,7 @@ static volatile unsigned *powerdown_base static void xilinx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); out_be32(powerdown_base, XPAR_POWER_0_POWERDOWN_VALUE); while (1) ; } Index: linux/arch/ppc/platforms/adir_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/adir_setup.c +++ linux/arch/ppc/platforms/adir_setup.c @@ -77,6 +77,7 @@ adir_calibrate_decr(void) freq = adir_get_bus_speed(); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int @@ -133,7 +134,7 @@ adir_setup_arch(void) static void adir_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* SRR0 has system reset vector, SRR1 has default MSR value */ /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ __asm__ __volatile__ Index: linux/arch/ppc/platforms/apus_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/apus_setup.c +++ linux/arch/ppc/platforms/apus_setup.c @@ -282,6 +282,7 @@ void apus_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; __bus_speed = bus_speed; __speed_test_failed = speed_test_failed; @@ -480,7 +481,7 @@ void cache_clear(__u32 addr, int length) void apus_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); APUS_WRITE(APUS_REG_LOCK, REGLOCK_BLACKMAGICK1|REGLOCK_BLACKMAGICK2); @@ -598,7 +599,7 @@ int __debug_serinit( void ) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* turn off Rx and Tx interrupts */ custom.intena = IF_RBF | IF_TBE; @@ -606,7 +607,7 @@ int __debug_serinit( void ) /* clear any pending interrupt */ custom.intreq = IF_RBF | IF_TBE; - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * set the appropriate directions for the modem control flags, Index: linux/arch/ppc/platforms/chestnut.c =================================================================== --- linux.orig/arch/ppc/platforms/chestnut.c +++ linux/arch/ppc/platforms/chestnut.c @@ -456,7 +456,7 @@ chestnut_restart(char *cmd) { volatile ulong i = 10000000; - local_irq_disable(); + raw_local_irq_disable(); /* * Set CPLD Reg 3 bit 0 to 1 to allow MPP signals on reset to work @@ -475,7 +475,7 @@ chestnut_restart(char *cmd) static void chestnut_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for (;;); /* NOTREACHED */ } Index: linux/arch/ppc/platforms/chrp_smp.c =================================================================== --- linux.orig/arch/ppc/platforms/chrp_smp.c +++ linux/arch/ppc/platforms/chrp_smp.c @@ -57,7 +57,7 @@ smp_chrp_setup_cpu(int cpu_nr) do_openpic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit Index: linux/arch/ppc/platforms/chrp_time.c =================================================================== --- linux.orig/arch/ppc/platforms/chrp_time.c +++ linux/arch/ppc/platforms/chrp_time.c @@ -189,4 +189,5 @@ void __init chrp_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux/arch/ppc/platforms/cpci690.c =================================================================== --- linux.orig/arch/ppc/platforms/cpci690.c +++ linux/arch/ppc/platforms/cpci690.c @@ -364,7 +364,7 @@ cpci690_reset_board(void) { u32 i = 10000; - local_irq_disable(); + raw_local_irq_disable(); out_8((u8 *)(cpci690_br_base + CPCI690_BR_SW_RESET), 0x11); while (i != 0) i++; Index: linux/arch/ppc/platforms/ev64260.c =================================================================== --- linux.orig/arch/ppc/platforms/ev64260.c +++ linux/arch/ppc/platforms/ev64260.c @@ -445,7 +445,7 @@ ev64260_platform_notify(struct device *d static void ev64260_reset_board(void *addr) { - local_irq_disable(); + raw_local_irq_disable(); /* disable and invalidate the L2 cache */ _set_L2CR(0); @@ -513,7 +513,7 @@ ev64260_restart(char *cmd) static void ev64260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); /* NOTREACHED */ } @@ -552,6 +552,7 @@ ev64260_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux/arch/ppc/platforms/gemini_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/gemini_setup.c +++ linux/arch/ppc/platforms/gemini_setup.c @@ -303,7 +303,7 @@ void __init gemini_init_l2(void) void gemini_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* make a clean restart, not via the MPIC */ _gemini_reboot(); for(;;); @@ -462,6 +462,7 @@ void __init gemini_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } unsigned long __init gemini_find_end_of_memory(void) Index: linux/arch/ppc/platforms/hdpu.c =================================================================== --- linux.orig/arch/ppc/platforms/hdpu.c +++ linux/arch/ppc/platforms/hdpu.c @@ -58,7 +58,7 @@ static void parse_bootinfo(unsigned long static void hdpu_set_l1pe(void); static void hdpu_cpustate_set(unsigned char new_state); #ifdef CONFIG_SMP -static spinlock_t timebase_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; extern int smp_tb_synchronized; @@ -473,7 +473,7 @@ static void hdpu_reset_board(void) hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_RESET); - local_irq_disable(); + raw_local_irq_disable(); /* Clear all the LEDs */ mv64x60_write(&bh, MV64x60_GPP_VALUE_CLR, ((1 << 4) | @@ -515,7 +515,7 @@ static void hdpu_restart(char *cmd) static void hdpu_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_HALT); Index: linux/arch/ppc/platforms/k2.c =================================================================== --- linux.orig/arch/ppc/platforms/k2.c +++ linux/arch/ppc/platforms/k2.c @@ -411,6 +411,7 @@ static void __init k2_calibrate_decr(voi freq = k2_get_bus_speed(); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int k2_show_cpuinfo(struct seq_file *m) @@ -479,7 +480,7 @@ static void __init k2_setup_arch(void) static void k2_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Flip FLASH back to page 1 to access firmware image */ __raw_writel(0, GPOUT); Index: linux/arch/ppc/platforms/lopec.c =================================================================== --- linux.orig/arch/ppc/platforms/lopec.c +++ linux/arch/ppc/platforms/lopec.c @@ -162,7 +162,7 @@ lopec_restart(char *cmd) reg |= 0x80; *((unsigned char *) LOPEC_SYSSTAT1) = reg; - local_irq_disable(); + raw_local_irq_disable(); while(1); #undef LOPEC_SYSSTAT1 } @@ -170,7 +170,7 @@ lopec_restart(char *cmd) static void lopec_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while(1); } Index: linux/arch/ppc/platforms/mcpn765.c =================================================================== --- linux.orig/arch/ppc/platforms/mcpn765.c +++ linux/arch/ppc/platforms/mcpn765.c @@ -415,7 +415,7 @@ mcpn765_map_io(void) static void mcpn765_reset_board(void) { - local_irq_disable(); + raw_local_irq_disable(); /* set VIA IDE controller into native mode */ mcpn765_set_VIA_IDE_native(); @@ -449,7 +449,7 @@ mcpn765_power_off(void) static void mcpn765_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); /* NOTREACHED */ } Index: linux/arch/ppc/platforms/mvme5100.c =================================================================== --- linux.orig/arch/ppc/platforms/mvme5100.c +++ linux/arch/ppc/platforms/mvme5100.c @@ -267,7 +267,7 @@ mvme5100_map_io(void) static void mvme5100_reset_board(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -291,7 +291,7 @@ mvme5100_restart(char *cmd) static void mvme5100_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux/arch/ppc/platforms/pal4_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/pal4_setup.c +++ linux/arch/ppc/platforms/pal4_setup.c @@ -81,7 +81,7 @@ pal4_show_cpuinfo(struct seq_file *m) static void pal4_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); __asm__ __volatile__("lis 3,0xfff0\n \ ori 3,3,0x100\n \ mtspr 26,3\n \ @@ -95,7 +95,7 @@ pal4_restart(char *cmd) static void pal4_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/platforms/pcore.c =================================================================== --- linux.orig/arch/ppc/platforms/pcore.c +++ linux/arch/ppc/platforms/pcore.c @@ -242,7 +242,7 @@ pcore_setup_arch(void) static void pcore_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Hard reset */ writeb(0x11, 0xfe000332); while(1); @@ -251,7 +251,7 @@ pcore_restart(char *cmd) static void pcore_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Turn off user LEDs */ writeb(0x00, 0xfe000300); while (1); Index: linux/arch/ppc/platforms/pmac_cpufreq.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_cpufreq.c +++ linux/arch/ppc/platforms/pmac_cpufreq.c @@ -285,7 +285,7 @@ static int __pmac pmu_set_cpu_speed(int asm volatile("mtdec %0" : : "r" (0x7fffffff)); /* We can now disable MSR_EE */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Giveup the FPU & vec */ enable_kernel_fp(); @@ -341,7 +341,7 @@ static int __pmac pmu_set_cpu_speed(int openpic_set_priority(pic_prio); /* Let interrupts flow again ... */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #ifdef DEBUG_FREQ debug_calc_bogomips(); Index: linux/arch/ppc/platforms/pmac_feature.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_feature.c +++ linux/arch/ppc/platforms/pmac_feature.c @@ -63,7 +63,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -static DEFINE_SPINLOCK(feature_lock __pmacdata); +static DEFINE_RAW_SPINLOCK(feature_lock __pmacdata); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux/arch/ppc/platforms/pmac_nvram.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_nvram.c +++ linux/arch/ppc/platforms/pmac_nvram.c @@ -80,7 +80,7 @@ static volatile unsigned char *nvram_dat static int nvram_mult, is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); extern int pmac_newworld; extern int system_running; Index: linux/arch/ppc/platforms/pmac_pic.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_pic.c +++ linux/arch/ppc/platforms/pmac_pic.c @@ -68,7 +68,7 @@ static int max_irqs __pmacdata; static int max_real_irqs __pmacdata; static u32 level_mask[4] __pmacdata; -static DEFINE_SPINLOCK(pmac_pic_lock __pmacdata); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock __pmacdata); #define GATWICK_IRQ_POOL_SIZE 10 Index: linux/arch/ppc/platforms/pmac_smp.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_smp.c +++ linux/arch/ppc/platforms/pmac_smp.c @@ -511,8 +511,8 @@ static void __init smp_core99_kick_cpu(i return; if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); - local_irq_save(flags); - local_irq_disable(); + raw_local_irq_save(flags); + raw_local_irq_disable(); /* Save reset vector */ save_vector = *vector; @@ -550,7 +550,7 @@ static void __init smp_core99_kick_cpu(i *vector = save_vector; flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); } @@ -592,7 +592,7 @@ void smp_core99_take_timebase(void) mb(); /* set our stuff the same as the primary */ - local_irq_save(flags); + raw_local_irq_save(flags); set_dec(1); set_tb(pri_tb_hi, pri_tb_lo); last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; @@ -601,7 +601,7 @@ void smp_core99_take_timebase(void) /* tell the primary we're done */ sec_tb_reset = 0; mb(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* not __init, called in sleep/wakeup code */ @@ -621,7 +621,7 @@ void smp_core99_give_timebase(void) /* freeze the timebase and read it */ /* disable interrupts so the timebase is disabled for the shortest possible time */ - local_irq_save(flags); + raw_local_irq_save(flags); pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); mb(); @@ -645,7 +645,7 @@ void smp_core99_give_timebase(void) /* Now, restart the timebase by leaving the GPIO to an open collector */ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/ppc/platforms/pmac_time.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_time.c +++ linux/arch/ppc/platforms/pmac_time.c @@ -197,6 +197,7 @@ via_calibrate_decr(void) tb_ticks_per_jiffy = (dstart - dend) / (6 * (HZ/100)); tb_to_us = mulhwu_scale_factor(dstart - dend, 60000); + cpu_khz = (dstart - dend) / 60; printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", tb_ticks_per_jiffy, dstart - dend); @@ -288,4 +289,5 @@ pmac_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux/arch/ppc/platforms/powerpmc250.c =================================================================== --- linux.orig/arch/ppc/platforms/powerpmc250.c +++ linux/arch/ppc/platforms/powerpmc250.c @@ -167,12 +167,13 @@ powerpmc250_calibrate_decr(void) tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void powerpmc250_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Hard reset */ writeb(0x11, 0xfe000332); while(1); @@ -181,7 +182,7 @@ powerpmc250_restart(char *cmd) static void powerpmc250_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux/arch/ppc/platforms/pplus.c =================================================================== --- linux.orig/arch/ppc/platforms/pplus.c +++ linux/arch/ppc/platforms/pplus.c @@ -608,7 +608,7 @@ static void pplus_restart(char *cmd) { unsigned long i = 10000; - local_irq_disable(); + raw_local_irq_disable(); /* set VIA IDE controller into native mode */ pplus_set_VIA_IDE_native(); Index: linux/arch/ppc/platforms/prep_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/prep_setup.c +++ linux/arch/ppc/platforms/prep_setup.c @@ -458,7 +458,7 @@ static void __prep prep_restart(char *cmd) { #define PREP_SP92 0x92 /* Special Port 92 */ - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -476,7 +476,7 @@ prep_restart(char *cmd) static void __prep prep_halt(void) { - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -544,7 +544,7 @@ prep_sig750_poweroff(void) { /* tweak the power manager found in most IBM PRePs (except Thinkpads) */ - local_irq_disable(); + raw_local_irq_disable(); /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -938,6 +938,7 @@ prep_calibrate_decr(void) (freq/divisor)/1000000, (freq/divisor)%1000000); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; tb_ticks_per_jiffy = freq / HZ / divisor; } } Index: linux/arch/ppc/platforms/prpmc750.c =================================================================== --- linux.orig/arch/ppc/platforms/prpmc750.c +++ linux/arch/ppc/platforms/prpmc750.c @@ -272,18 +272,19 @@ static void __init prpmc750_calibrate_de tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void prpmc750_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); writeb(PRPMC750_MODRST_MASK, PRPMC750_MODRST_REG); while (1) ; } static void prpmc750_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux/arch/ppc/platforms/prpmc800.c =================================================================== --- linux.orig/arch/ppc/platforms/prpmc800.c +++ linux/arch/ppc/platforms/prpmc800.c @@ -331,6 +331,7 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = 100000000 / 4; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; return; } @@ -371,13 +372,14 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = (tbl_end - tbl_start) * 2; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; } static void prpmc800_restart(char *cmd) { ulong temp; - local_irq_disable(); + raw_local_irq_disable(); temp = in_be32((uint *) HARRIER_MISC_CSR_REG); temp |= HARRIER_RSTOUT; out_be32((uint *) HARRIER_MISC_CSR_REG, temp); @@ -386,7 +388,7 @@ static void prpmc800_restart(char *cmd) static void prpmc800_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux/arch/ppc/platforms/radstone_ppc7d.c =================================================================== --- linux.orig/arch/ppc/platforms/radstone_ppc7d.c +++ linux/arch/ppc/platforms/radstone_ppc7d.c @@ -177,7 +177,7 @@ static void ppc7d_power_off(void) { u32 data; - local_irq_disable(); + raw_local_irq_disable(); /* Ensure that internal MV643XX watchdog is disabled. * The Disco watchdog uses MPP17 on this hardware. Index: linux/arch/ppc/platforms/sandpoint.c =================================================================== --- linux.orig/arch/ppc/platforms/sandpoint.c +++ linux/arch/ppc/platforms/sandpoint.c @@ -545,7 +545,7 @@ sandpoint_map_io(void) static void sandpoint_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -559,7 +559,7 @@ sandpoint_restart(char *cmd) static void sandpoint_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); /* No way to shut power off with software */ /* NOTREACHED */ } Index: linux/arch/ppc/platforms/sbc82xx.c =================================================================== --- linux.orig/arch/ppc/platforms/sbc82xx.c +++ linux/arch/ppc/platforms/sbc82xx.c @@ -68,7 +68,7 @@ static void sbc82xx_time_init(void) static volatile char *sbc82xx_i8259_map; static char sbc82xx_i8259_mask = 0xff; -static DEFINE_SPINLOCK(sbc82xx_i8259_lock); +static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock); static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr) { Index: linux/arch/ppc/platforms/spruce.c =================================================================== --- linux.orig/arch/ppc/platforms/spruce.c +++ linux/arch/ppc/platforms/spruce.c @@ -150,6 +150,7 @@ spruce_calibrate_decr(void) freq = SPRUCE_BUS_SPEED; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int @@ -236,7 +237,7 @@ spruce_setup_arch(void) static void spruce_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* SRR0 has system reset vector, SRR1 has default MSR value */ /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ Index: linux/arch/ppc/syslib/cpm2_common.c =================================================================== --- linux.orig/arch/ppc/syslib/cpm2_common.c +++ linux/arch/ppc/syslib/cpm2_common.c @@ -114,7 +114,7 @@ cpm2_fastbrg(uint brg, uint rate, int di /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... */ static rh_block_t cpm_boot_dpmem_rh_block[16]; Index: linux/arch/ppc/syslib/ibm440gx_common.c =================================================================== --- linux.orig/arch/ppc/syslib/ibm440gx_common.c +++ linux/arch/ppc/syslib/ibm440gx_common.c @@ -157,7 +157,7 @@ void __init ibm440gx_l2c_enable(void){ return; } - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable SRAM */ @@ -201,7 +201,7 @@ void __init ibm440gx_l2c_enable(void){ mtdcr(DCRN_L2C0_CFG, r); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Disable L2 cache */ @@ -209,7 +209,7 @@ void __init ibm440gx_l2c_disable(void){ u32 r; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable L2C mode */ @@ -228,7 +228,7 @@ void __init ibm440gx_l2c_disable(void){ SRAM_SBCR_BAS3 | SRAM_SBCR_BS_64KB | SRAM_SBCR_BU_RW); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init ibm440gx_l2c_setup(struct ibm44x_clocks* p) Index: linux/arch/ppc/syslib/ibm44x_common.c =================================================================== --- linux.orig/arch/ppc/syslib/ibm44x_common.c +++ linux/arch/ppc/syslib/ibm44x_common.c @@ -60,6 +60,7 @@ void __init ibm44x_calibrate_decr(unsign { tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -76,19 +77,19 @@ extern void abort(void); static void ibm44x_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } static void ibm44x_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } static void ibm44x_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/syslib/m8260_pci_erratum9.c =================================================================== --- linux.orig/arch/ppc/syslib/m8260_pci_erratum9.c +++ linux/arch/ppc/syslib/m8260_pci_erratum9.c @@ -132,7 +132,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (sinc) @@ -161,7 +161,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } @@ -184,7 +184,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (dinc) @@ -213,7 +213,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } Index: linux/arch/ppc/syslib/m8260_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/m8260_setup.c +++ linux/arch/ppc/syslib/m8260_setup.c @@ -78,6 +78,7 @@ m8260_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } /* The 8260 has an internal 1-second timer update register that @@ -128,7 +129,7 @@ m8260_restart(char *cmd) static void m8260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux/arch/ppc/syslib/m8xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/m8xx_setup.c +++ linux/arch/ppc/syslib/m8xx_setup.c @@ -159,6 +159,7 @@ void __init m8xx_calibrate_decr(void) printk("Decrementer Frequency = %d/%d\n", freq, divisor); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Perform some more timer/timebase initialization. This used * to be done elsewhere, but other changes caused it to get @@ -234,7 +235,7 @@ m8xx_restart(char *cmd) { __volatile__ unsigned char dummy; - local_irq_disable(); + raw_local_irq_disable(); ((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr |= 0x00000080; /* Clear the ME bit in MSR to cause checkstop on machine check Index: linux/arch/ppc/syslib/mpc52xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/mpc52xx_setup.c +++ linux/arch/ppc/syslib/mpc52xx_setup.c @@ -40,7 +40,7 @@ mpc52xx_restart(char *cmd) { struct mpc52xx_gpt __iomem *gpt0 = MPC52xx_VA(MPC52xx_GPTx_OFFSET(0)); - local_irq_disable(); + raw_local_irq_disable(); /* Turn on the watchdog and wait for it to expire. It effectively does a reset */ @@ -53,7 +53,7 @@ mpc52xx_restart(char *cmd) void mpc52xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } @@ -214,6 +214,7 @@ mpc52xx_calibrate_decr(void) tb_ticks_per_jiffy = xlbfreq / HZ / divisor; tb_to_us = mulhwu_scale_factor(xlbfreq / divisor, 1000000); + cpu_khz = (xlbfreq / divisor) / 1000; } int mpc52xx_match_psc_function(int psc_idx, const char *func) Index: linux/arch/ppc/syslib/mv64x60.c =================================================================== --- linux.orig/arch/ppc/syslib/mv64x60.c +++ linux/arch/ppc/syslib/mv64x60.c @@ -31,7 +31,7 @@ u8 mv64x60_pci_exclude_bridge = 1; -spinlock_t mv64x60_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(mv64x60_lock); static phys_addr_t mv64x60_bridge_pbase = 0; static void *mv64x60_bridge_vbase = 0; Index: linux/arch/ppc/syslib/ocp.c =================================================================== --- linux.orig/arch/ppc/syslib/ocp.c +++ linux/arch/ppc/syslib/ocp.c @@ -45,11 +45,11 @@ #include #include #include +#include #include #include #include -#include #include //#define DBG(x) printk x Index: linux/arch/ppc/syslib/open_pic.c =================================================================== --- linux.orig/arch/ppc/syslib/open_pic.c +++ linux/arch/ppc/syslib/open_pic.c @@ -529,7 +529,7 @@ void openpic_reset_processor_phys(u_int } #if defined(CONFIG_SMP) || defined(CONFIG_PM) -static DEFINE_SPINLOCK(openpic_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic_setup_lock); #endif #ifdef CONFIG_SMP Index: linux/arch/ppc/syslib/open_pic2.c =================================================================== --- linux.orig/arch/ppc/syslib/open_pic2.c +++ linux/arch/ppc/syslib/open_pic2.c @@ -385,7 +385,7 @@ static void openpic2_set_spurious(u_int vec); } -static DEFINE_SPINLOCK(openpic2_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic2_setup_lock); /* * Initialize a timer interrupt (and disable it) Index: linux/arch/ppc/syslib/ppc4xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/ppc4xx_setup.c +++ linux/arch/ppc/syslib/ppc4xx_setup.c @@ -147,7 +147,7 @@ static void ppc4xx_power_off(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -155,7 +155,7 @@ static void ppc4xx_halt(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -178,6 +178,7 @@ ppc4xx_calibrate_decr(void) freq = bip->bi_tbfreq; tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero. ** At 200 Mhz, time base will rollover in ~2925 years. Index: linux/arch/ppc/syslib/ppc83xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/ppc83xx_setup.c +++ linux/arch/ppc/syslib/ppc83xx_setup.c @@ -122,7 +122,7 @@ mpc83xx_restart(char *cmd) reg = ioremap(BCSR_PHYS_ADDR, BCSR_SIZE); - local_irq_disable(); + raw_local_irq_disable(); /* * Unlock the BCSR bits so a PRST will update the contents. @@ -151,14 +151,14 @@ mpc83xx_restart(char *cmd) void mpc83xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc83xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/syslib/ppc85xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/ppc85xx_setup.c +++ linux/arch/ppc/syslib/ppc85xx_setup.c @@ -59,6 +59,7 @@ mpc85xx_calibrate_decr(void) divisor = 8; tb_ticks_per_jiffy = freq / divisor / HZ; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -114,21 +115,21 @@ mpc85xx_early_serial_map(void) void mpc85xx_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } void mpc85xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc85xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/syslib/prom.c =================================================================== --- linux.orig/arch/ppc/syslib/prom.c +++ linux/arch/ppc/syslib/prom.c @@ -1397,7 +1397,7 @@ print_properties(struct device_node *np) } #endif -static DEFINE_SPINLOCK(rtas_lock); +static DEFINE_RAW_SPINLOCK(rtas_lock); /* this can be called after setup -- Cort */ int __openfirmware Index: linux/arch/ppc/syslib/qspan_pci.c =================================================================== --- linux.orig/arch/ppc/syslib/qspan_pci.c +++ linux/arch/ppc/syslib/qspan_pci.c @@ -94,7 +94,7 @@ #define mk_config_type1(bus, dev, offset) \ mk_config_addr(bus, dev, offset) | 1; -static spinlock_t pcibios_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pcibios_lock); int qspan_pcibios_read_config_byte(unsigned char bus, unsigned char dev_fn, unsigned char offset, unsigned char *val) Index: linux/arch/ppc/syslib/todc_time.c =================================================================== --- linux.orig/arch/ppc/syslib/todc_time.c +++ linux/arch/ppc/syslib/todc_time.c @@ -508,6 +508,7 @@ todc_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux/arch/ppc/xmon/xmon.c =================================================================== --- linux.orig/arch/ppc/xmon/xmon.c +++ linux/arch/ppc/xmon/xmon.c @@ -291,10 +291,10 @@ irqreturn_t xmon_irq(int irq, void *d, struct pt_regs *regs) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); printf("Keyboard interrupt\n"); xmon(regs); - local_irq_restore(flags); + raw_local_irq_restore(flags); return IRQ_HANDLED; } Index: linux/arch/ppc64/kernel/pmc.c =================================================================== --- linux.orig/arch/ppc64/kernel/pmc.c +++ linux/arch/ppc64/kernel/pmc.c @@ -26,7 +26,7 @@ static void dummy_perf(struct pt_regs *r mtspr(SPRN_MMCR0, mmcr0); } -static spinlock_t pmc_owner_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pmc_owner_lock); static void *pmc_owner_caller; /* mostly for debugging */ perf_irq_t perf_irq = dummy_perf; Index: linux/arch/sparc/lib/atomic32.c =================================================================== --- linux.orig/arch/sparc/lib/atomic32.c +++ linux/arch/sparc/lib/atomic32.c @@ -20,7 +20,7 @@ spinlock_t __atomic_hash[ATOMIC_HASH_SIZ #else /* SMP */ -static spinlock_t dummy = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(dummy); #define ATOMIC_HASH_SIZE 1 #define ATOMIC_HASH(a) (&dummy) Index: linux/arch/x86_64/Kconfig =================================================================== --- linux.orig/arch/x86_64/Kconfig +++ linux/arch/x86_64/Kconfig @@ -34,13 +34,6 @@ config ISA config SBUS bool -config RWSEM_GENERIC_SPINLOCK - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - config GENERIC_CALIBRATE_DELAY bool default y @@ -219,6 +212,14 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on PREEMPT_RT + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + config K8_NUMA bool "K8 NUMA support" select NUMA Index: linux/arch/x86_64/ia32/sys_ia32.c =================================================================== --- linux.orig/arch/x86_64/ia32/sys_ia32.c +++ linux/arch/x86_64/ia32/sys_ia32.c @@ -456,6 +456,10 @@ sys32_settimeofday(struct compat_timeval struct timespec kts; struct timezone ktz; + int ret = timeofday_API_hacks(tv, tz); + if (ret != 1) + return ret; + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; Index: linux/arch/x86_64/kernel/Makefile =================================================================== --- linux.orig/arch/x86_64/kernel/Makefile +++ linux/arch/x86_64/kernel/Makefile @@ -4,11 +4,12 @@ extra-y := head.o head64.o init_task.o vmlinux.lds EXTRA_AFLAGS := -traditional -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ +obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += semaphore.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ Index: linux/arch/x86_64/kernel/apic.c =================================================================== --- linux.orig/arch/x86_64/kernel/apic.c +++ linux/arch/x86_64/kernel/apic.c @@ -514,10 +514,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_save_flags(flags); - local_irq_disable(); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -532,7 +531,7 @@ static int lapic_resume(struct sys_devic /* XXX: Pavel needs this for S3 resume, but can't explain why */ set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - local_irq_save(flags); + raw_local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; @@ -555,7 +554,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -712,7 +711,7 @@ static void setup_APIC_timer(unsigned in { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* For some reasons this doesn't work on Simics, so fake it for now */ if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { @@ -742,7 +741,7 @@ static void setup_APIC_timer(unsigned in __setup_APIC_LVTT(clocks); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -799,7 +798,7 @@ void __init setup_boot_APIC_clock (void) printk(KERN_INFO "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -807,14 +806,14 @@ void __init setup_boot_APIC_clock (void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuinit setup_secondary_APIC_clock(void) { - local_irq_disable(); /* FIXME: Do we need this? --RR */ + raw_local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuinit disable_APIC_timer(void) Index: linux/arch/x86_64/kernel/early_printk.c =================================================================== --- linux.orig/arch/x86_64/kernel/early_printk.c +++ linux/arch/x86_64/kernel/early_printk.c @@ -164,7 +164,7 @@ static int early_console_initialized = 0 void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; Index: linux/arch/x86_64/kernel/entry.S =================================================================== --- linux.orig/arch/x86_64/kernel/entry.S +++ linux/arch/x86_64/kernel/entry.S @@ -211,8 +211,8 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz sysret_signal sti pushq %rdi call schedule @@ -231,7 +231,7 @@ sysret_signal: leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi jmp sysret_check /* Do syscall tracing */ @@ -280,8 +280,8 @@ int_with_check: /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz int_very_careful sti pushq %rdi call schedule @@ -310,7 +310,7 @@ int_signal: movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 call do_notify_resume -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi int_restore_rest: RESTORE_REST cli @@ -478,8 +478,8 @@ bad_iret: /* edi: workmask, edx: work */ retint_careful: - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz retint_signal sti pushq %rdi call schedule @@ -499,7 +499,7 @@ retint_signal: call do_notify_resume RESTORE_REST cli - movl $_TIF_NEED_RESCHED,%edi + movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi GET_THREAD_INFO(%rcx) jmp retint_check @@ -930,3 +930,40 @@ ENTRY(call_softirq) popq %r15 ret +#ifdef CONFIG_LATENCY_TRACE + +ENTRY(mcount) + cmpq $0, trace_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + + leaveq +out: + ret + +#endif + Index: linux/arch/x86_64/kernel/i8259.c =================================================================== --- linux.orig/arch/x86_64/kernel/i8259.c +++ linux/arch/x86_64/kernel/i8259.c @@ -130,7 +130,7 @@ void (*interrupt[NR_IRQS])(void) = { * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -451,7 +451,7 @@ device_initcall(i8259A_init_sysfs); * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init init_ISA_irqs (void) { Index: linux/arch/x86_64/kernel/init_task.c =================================================================== --- linux.orig/arch/x86_64/kernel/init_task.c +++ linux/arch/x86_64/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/x86_64/kernel/io_apic.c =================================================================== --- linux.orig/arch/x86_64/kernel/io_apic.c +++ linux/arch/x86_64/kernel/io_apic.c @@ -45,7 +45,7 @@ int sis_apic_bug; /* not actually suppor static int no_timer_check; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * # of IRQ routing registers @@ -115,6 +115,9 @@ static void add_pin_to_irq(unsigned int reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ + /* Force POST flush by reading: */ \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -127,10 +130,8 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -1249,7 +1250,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1341,11 +1342,48 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +/* + * In the preemptible case mask the IRQ first then handle it and ack it. + * + * (In the non-preemptible case we keep the IRQ unacked in the local APIC + * and dont need to do the masking, because the code executes atomically.) + */ +#ifdef CONFIG_PREEMPT_HARDIRQS + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +static void end_level_ioapic_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_INPROGRESS)) + unmask_IO_APIC_irq(irq); +} + +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} + +#else /* !CONFIG_PREEMPT_HARDIRQS */ + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + static void end_level_ioapic_irq (unsigned int irq) { ack_APIC_irq(); } +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} +#endif /* !CONFIG_PREEMPT_HARDIRQS */ + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { unsigned long flags; @@ -1385,6 +1423,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -1392,6 +1437,11 @@ static void end_level_ioapic_vector (uns end_level_ioapic_irq(irq); } +static void enable_level_ioapic_vector(unsigned int vector) +{ + enable_level_ioapic_irq(vector_to_irq(vector)); +} + static void mask_IO_APIC_vector (unsigned int vector) { int irq = vector_to_irq(vector); Index: linux/arch/x86_64/kernel/nmi.c =================================================================== --- linux.orig/arch/x86_64/kernel/nmi.c +++ linux/arch/x86_64/kernel/nmi.c @@ -44,7 +44,7 @@ * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); +static DEFINE_RAW_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -128,7 +128,7 @@ void __cpuinit nmi_watchdog_default(void static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + raw_local_irq_enable(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -157,7 +157,7 @@ int __init check_nmi_watchdog (void) for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; - local_irq_enable(); + raw_local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { @@ -463,14 +463,46 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) per_cpu(nmi_touch, i) = 1; + + touch_softlockup_watchdog(); +} + +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); } +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = safe_smp_processor_id(); sum = read_pda(apic_timer_irqs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -482,6 +514,11 @@ void nmi_watchdog_tick (struct pt_regs * */ local_inc(&__get_cpu_var(alert_counter)); if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for (i = 0; i < NR_CPUS; i++) + nmi_show_regs[i] = 1; + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); Index: linux/arch/x86_64/kernel/process.c =================================================================== --- linux.orig/arch/x86_64/kernel/process.c +++ linux/arch/x86_64/kernel/process.c @@ -88,12 +88,13 @@ EXPORT_SYMBOL(enable_hlt); void default_idle(void) { if (!atomic_read(&hlt_counter)) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); - } + raw_local_irq_enable(); + } else + raw_local_irq_enable(); } /* @@ -105,7 +106,7 @@ static void poll_idle (void) { int oldval; - local_irq_enable(); + raw_local_irq_enable(); /* * Deal with another CPU just having chosen a thread to @@ -121,7 +122,7 @@ static void poll_idle (void) "rep; nop;" "je 2b;" : : - "i" (_TIF_NEED_RESCHED), + "i" (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED), "m" (current_thread_info()->flags)); } else { set_need_resched(); @@ -189,7 +190,9 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -201,10 +204,13 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - - schedule(); + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); } } @@ -217,16 +223,16 @@ void cpu_idle (void) */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { set_thread_flag(TIF_POLLING_NRFLAG); do { __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); - } while (!need_resched()); + } while (!need_resched() && !need_resched_delayed()); clear_thread_flag(TIF_POLLING_NRFLAG); } } @@ -311,7 +317,7 @@ void __show_regs(struct pt_regs * regs) void show_regs(struct pt_regs *regs) { __show_regs(regs); - show_trace(®s->rsp); + show_trace(current, ®s->rsp); } /* @@ -330,13 +336,14 @@ void exit_thread(void) kprobe_flush_task(me); if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; /* * Careful, clear this in the TSS too: */ + tss = &per_cpu(init_tss, get_cpu()); memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); Index: linux/arch/x86_64/kernel/reboot.c =================================================================== --- linux.orig/arch/x86_64/kernel/reboot.c +++ linux/arch/x86_64/kernel/reboot.c @@ -98,7 +98,7 @@ void machine_shutdown(void) smp_send_stop(); #endif - local_irq_disable(); + raw_local_irq_disable(); #ifndef CONFIG_SMP disable_local_APIC(); @@ -106,7 +106,7 @@ void machine_shutdown(void) disable_IO_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } void machine_emergency_restart(void) Index: linux/arch/x86_64/kernel/semaphore.c =================================================================== --- linux.orig/arch/x86_64/kernel/semaphore.c +++ linux/arch/x86_64/kernel/semaphore.c @@ -50,12 +50,12 @@ * we cannot lose wakeup events. */ -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } -void __sched __down(struct semaphore * sem) +void __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -92,7 +92,7 @@ void __sched __down(struct semaphore * s tsk->state = TASK_RUNNING; } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -155,7 +155,7 @@ int __sched __down_interruptible(struct * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -int __down_trylock(struct semaphore * sem) +int __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; Index: linux/arch/x86_64/kernel/signal.c =================================================================== --- linux.orig/arch/x86_64/kernel/signal.c +++ linux/arch/x86_64/kernel/signal.c @@ -416,6 +416,13 @@ int do_signal(struct pt_regs *regs, sigs siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux/arch/x86_64/kernel/smp.c =================================================================== --- linux.orig/arch/x86_64/kernel/smp.c +++ linux/arch/x86_64/kernel/smp.c @@ -42,7 +42,7 @@ static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL -1ULL /* @@ -267,10 +267,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -421,9 +431,9 @@ void smp_stop_cpu(void) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } static void smp_really_stop_cpu(void *dummy) @@ -447,9 +457,9 @@ void smp_send_stop(void) if (!nolock) spin_unlock(&call_lock); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux/arch/x86_64/kernel/smpboot.c =================================================================== --- linux.orig/arch/x86_64/kernel/smpboot.c +++ linux/arch/x86_64/kernel/smpboot.c @@ -197,7 +197,7 @@ static void __cpuinit smp_store_cpu_info latency and low latency is the primary objective here. -AK */ #define no_cpu_relax() barrier() -static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); +static __cpuinitdata __DEFINE_RAW_SPINLOCK(tsc_sync_lock); static volatile __cpuinitdata unsigned long go[SLAVE + 1]; static int notscsync __cpuinitdata; @@ -213,7 +213,7 @@ static __cpuinit void sync_master(void * go[MASTER] = 0; - local_irq_save(flags); + raw_local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { while (!go[MASTER]) @@ -222,7 +222,7 @@ static __cpuinit void sync_master(void * rdtscll(go[SLAVE]); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Index: linux/arch/x86_64/kernel/time.c =================================================================== --- linux.orig/arch/x86_64/kernel/time.c +++ linux/arch/x86_64/kernel/time.c @@ -53,8 +53,8 @@ static void cpufreq_delayed_get(void); extern void i8254_timer_resume(void); extern int using_apic_timer; -DEFINE_SPINLOCK(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); static int nohpet __initdata = 0; static int notsc __initdata = 0; @@ -883,7 +883,7 @@ int __init time_setup(char *str) } static struct irqaction irq0 = { - timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL + timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL }; extern void __init config_acpi_tables(void); @@ -1041,6 +1041,7 @@ static int timer_resume(struct sys_devic write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } Index: linux/arch/x86_64/kernel/traps.c =================================================================== --- linux.orig/arch/x86_64/kernel/traps.c +++ linux/arch/x86_64/kernel/traps.c @@ -90,7 +90,7 @@ int register_die_notifier(struct notifie static inline void conditional_sti(struct pt_regs *regs) { if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); } static int kstack_depth_to_print = 10; @@ -156,7 +156,7 @@ static unsigned long *in_exception_stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void show_trace(unsigned long *stack) +void show_trace(struct task_struct *task, unsigned long *stack) { unsigned long addr; const unsigned cpu = safe_smp_processor_id(); @@ -221,6 +221,7 @@ void show_trace(unsigned long *stack) HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); #undef HANDLE_STACK printk("\n"); + print_traces(task); } void show_stack(struct task_struct *tsk, unsigned long * rsp) @@ -257,7 +258,7 @@ void show_stack(struct task_struct *tsk, printk("%016lx ", *stack++); touch_nmi_watchdog(); } - show_trace((unsigned long *)rsp); + show_trace(tsk, (unsigned long *)rsp); } /* @@ -266,7 +267,7 @@ void show_stack(struct task_struct *tsk, void dump_stack(void) { unsigned long dummy; - show_trace(&dummy); + show_trace(current, &dummy); } EXPORT_SYMBOL(dump_stack); @@ -339,14 +340,14 @@ void out_of_line_bug(void) } #endif -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int die_owner = -1; void oops_begin(void) { int cpu = safe_smp_processor_id(); /* racy, but better than risking deadlock. */ - local_irq_disable(); + raw_local_irq_disable(); if (!spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; Index: linux/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux.orig/arch/x86_64/kernel/vsyscall.c +++ linux/arch/x86_64/kernel/vsyscall.c @@ -38,7 +38,7 @@ #define force_inline __attribute__((always_inline)) inline int __sysctl_vsyscall __section_sysctl_vsyscall = 1; -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +raw_seqlock_t __xtime_lock __section_xtime_lock = RAW_SEQLOCK_UNLOCKED; #include Index: linux/arch/x86_64/kernel/x8664_ksyms.c =================================================================== --- linux.orig/arch/x86_64/kernel/x8664_ksyms.c +++ linux/arch/x86_64/kernel/x8664_ksyms.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,8 +34,6 @@ #include #include -extern spinlock_t rtc_lock; - #ifdef CONFIG_SMP extern void __write_lock_failed(rwlock_t *rw); extern void __read_lock_failed(rwlock_t *rw); @@ -62,10 +61,12 @@ EXPORT_SYMBOL(pm_idle); EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_nocheck); EXPORT_SYMBOL(ip_compute_csum); @@ -179,7 +180,7 @@ EXPORT_SYMBOL(rwsem_down_write_failed_th EXPORT_SYMBOL(empty_zero_page); #ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); #endif EXPORT_SYMBOL(die_chain); Index: linux/arch/x86_64/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/x86_64/lib/dec_and_lock.c +++ linux/arch/x86_64/lib/dec_and_lock.c @@ -10,7 +10,7 @@ #include #include -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; Index: linux/arch/x86_64/lib/thunk.S =================================================================== --- linux.orig/arch/x86_64/lib/thunk.S +++ linux/arch/x86_64/lib/thunk.S @@ -43,11 +43,13 @@ thunk rwsem_downgrade_thunk,rwsem_downgrade_wake #endif thunk do_softirq_thunk,do_softirq - - thunk __down_failed,__down - thunk_retrax __down_failed_interruptible,__down_interruptible - thunk_retrax __down_failed_trylock,__down_trylock - thunk __up_wakeup,__up + +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + thunk __compat_down_failed,__compat_down + thunk_retrax __compat_down_failed_interruptible,__compat_down_interruptible + thunk_retrax __compat_down_failed_trylock,__compat_down_trylock + thunk __compat_up_wakeup,__compat_up +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC Index: linux/arch/x86_64/mm/fault.c =================================================================== --- linux.orig/arch/x86_64/mm/fault.c +++ linux/arch/x86_64/mm/fault.c @@ -38,6 +38,7 @@ void bust_spinlocks(int yes) { int loglevel_save = console_loglevel; if (yes) { + stop_trace(); oops_in_progress = 1; } else { #ifdef CONFIG_VT @@ -323,7 +324,7 @@ asmlinkage void do_page_fault(struct pt_ return; if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); + raw_local_irq_enable(); if (unlikely(page_fault_trace)) printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", Index: linux/arch/x86_64/mm/init.c =================================================================== --- linux.orig/arch/x86_64/mm/init.c +++ linux/arch/x86_64/mm/init.c @@ -47,7 +47,7 @@ extern int swiotlb; extern char _stext[]; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the Index: linux/drivers/acorn/block/fd1772.c =================================================================== --- linux.orig/drivers/acorn/block/fd1772.c +++ linux/drivers/acorn/block/fd1772.c @@ -376,19 +376,15 @@ static void do_fd_request(request_queue_ /************************* End of Prototypes **************************/ -static struct timer_list motor_off_timer = - TIMER_INITIALIZER(fd_motor_off_timer, 0, 0); +static DEFINE_TIMER(motor_off_timer, fd_motor_off_timer, 0, 0); #ifdef TRACKBUFFER -static struct timer_list readtrack_timer = - TIMER_INITIALIZER(fd_readtrack_check, 0, 0); +static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0); #endif -static struct timer_list timeout_timer = - TIMER_INITIALIZER(fd_times_out, 0, 0); +static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0); -static struct timer_list fd_timer = - TIMER_INITIALIZER(check_change, 0, 0); +static DEFINE_TIMER(fd_timer, check_change, 0, 0); /* DAG: Haven't got a clue what this is? */ int stdma_islocked(void) Index: linux/drivers/acpi/osl.c =================================================================== --- linux.orig/drivers/acpi/osl.c +++ linux/drivers/acpi/osl.c @@ -836,14 +836,14 @@ acpi_os_create_semaphore( u32 initial_units, acpi_handle *handle) { - struct semaphore *sem = NULL; + struct compat_semaphore *sem = NULL; ACPI_FUNCTION_TRACE ("os_create_semaphore"); - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct compat_semaphore)); if (!sem) return_ACPI_STATUS (AE_NO_MEMORY); - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct compat_semaphore)); sema_init(sem, initial_units); @@ -867,7 +867,7 @@ acpi_status acpi_os_delete_semaphore( acpi_handle handle) { - struct semaphore *sem = (struct semaphore*) handle; + struct compat_semaphore *sem = (struct compat_semaphore*) handle; ACPI_FUNCTION_TRACE ("os_delete_semaphore"); @@ -899,7 +899,7 @@ acpi_os_wait_semaphore( u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore*)handle; + struct compat_semaphore *sem = (struct compat_semaphore*)handle; int ret = 0; ACPI_FUNCTION_TRACE ("os_wait_semaphore"); @@ -981,7 +981,7 @@ acpi_os_signal_semaphore( acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *) handle; + struct compat_semaphore *sem = (struct compat_semaphore *) handle; ACPI_FUNCTION_TRACE ("os_signal_semaphore"); Index: linux/drivers/acpi/processor_idle.c =================================================================== --- linux.orig/drivers/acpi/processor_idle.c +++ linux/drivers/acpi/processor_idle.c @@ -186,14 +186,14 @@ static void acpi_processor_idle (void) * Interrupts must be disabled during bus mastering calculations and * for C2/C3 transitions. */ - local_irq_disable(); + raw_local_irq_disable(); /* * Check whether we truly need to go idle, or should * reschedule: */ if (unlikely(need_resched())) { - local_irq_enable(); + raw_local_irq_enable(); return; } @@ -255,7 +255,7 @@ static void acpi_processor_idle (void) * issues (e.g. floppy DMA transfer overrun/underrun). */ if (pr->power.bm_activity & cx->demotion.threshold.bm) { - local_irq_enable(); + raw_local_irq_enable(); next_state = cx->demotion.state; goto end; } @@ -279,7 +279,7 @@ static void acpi_processor_idle (void) if (pm_idle_save) pm_idle_save(); else - safe_halt(); + raw_safe_halt(); /* * TBD: Can't get time duration while in C1, as resumes * go to an ISR rather than here. Need to instrument @@ -298,7 +298,7 @@ static void acpi_processor_idle (void) /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); /* Re-enable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; break; @@ -335,13 +335,13 @@ static void acpi_processor_idle (void) } /* Re-enable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; break; default: - local_irq_enable(); + raw_local_irq_enable(); return; } @@ -417,7 +417,7 @@ end: if (pm_idle_save) pm_idle_save(); else - safe_halt(); + raw_safe_halt(); return; } Index: linux/drivers/acpi/processor_throttling.c =================================================================== --- linux.orig/drivers/acpi/processor_throttling.c +++ linux/drivers/acpi/processor_throttling.c @@ -73,7 +73,7 @@ acpi_processor_get_throttling ( duty_mask <<= pr->throttling.duty_offset; - local_irq_disable(); + raw_local_irq_disable(); value = inl(pr->throttling.address); @@ -91,7 +91,7 @@ acpi_processor_get_throttling ( pr->throttling.state = state; - local_irq_enable(); + raw_local_irq_enable(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Throttling state is T%d (%d%% throttling applied)\n", @@ -138,7 +138,7 @@ int acpi_processor_set_throttling ( duty_mask = ~duty_mask; } - local_irq_disable(); + raw_local_irq_disable(); /* * Disable throttling by writing a 0 to bit 4. Note that we must @@ -165,7 +165,7 @@ int acpi_processor_set_throttling ( pr->throttling.state = state; - local_irq_enable(); + raw_local_irq_enable(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Throttling state set to T%d (%d%%)\n", state, Index: linux/drivers/acpi/sleep/main.c =================================================================== --- linux.orig/drivers/acpi/sleep/main.c +++ linux/drivers/acpi/sleep/main.c @@ -83,7 +83,7 @@ static int acpi_pm_enter(suspend_state_t return error; } - local_irq_save(flags); + raw_local_irq_save(flags); acpi_enable_wakeup_device(acpi_state); switch (pm_state) { case PM_SUSPEND_STANDBY: @@ -108,7 +108,7 @@ static int acpi_pm_enter(suspend_state_t default: return -EINVAL; } - local_irq_restore(flags); + raw_local_irq_restore(flags); printk(KERN_DEBUG "Back to C!\n"); /* restore processor state Index: linux/drivers/acpi/sleep/poweroff.c =================================================================== --- linux.orig/drivers/acpi/sleep/poweroff.c +++ linux/drivers/acpi/sleep/poweroff.c @@ -48,7 +48,7 @@ void acpi_power_off(void) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ printk("%s called\n", __FUNCTION__); - local_irq_disable(); + raw_local_irq_disable(); /* Some SMP machines only can poweroff in boot CPU */ acpi_enter_sleep_state(ACPI_STATE_S5); } Index: linux/drivers/atm/atmtcp.c =================================================================== --- linux.orig/drivers/atm/atmtcp.c +++ linux/drivers/atm/atmtcp.c @@ -368,7 +368,7 @@ static struct atm_dev atmtcp_control_dev .ops = &atmtcp_c_dev_ops, .type = "atmtcp", .number = 999, - .lock = SPIN_LOCK_UNLOCKED + .lock = SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock) }; Index: linux/drivers/atm/idt77105.c =================================================================== --- linux.orig/drivers/atm/idt77105.c +++ linux/drivers/atm/idt77105.c @@ -50,10 +50,8 @@ static void idt77105_stats_timer_func(un static void idt77105_restart_timer_func(unsigned long); -static struct timer_list stats_timer = - TIMER_INITIALIZER(idt77105_stats_timer_func, 0, 0); -static struct timer_list restart_timer = - TIMER_INITIALIZER(idt77105_restart_timer_func, 0, 0); +static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func, 0, 0); +static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func, 0, 0); static int start_timer = 1; static struct idt77105_priv *idt77105_all = NULL; Index: linux/drivers/atm/iphase.c =================================================================== --- linux.orig/drivers/atm/iphase.c +++ linux/drivers/atm/iphase.c @@ -79,7 +79,7 @@ static IADEV *ia_dev[8]; static struct atm_dev *_ia_dev[8]; static int iadev_count; static void ia_led_timer(unsigned long arg); -static struct timer_list ia_timer = TIMER_INITIALIZER(ia_led_timer, 0, 0); +static DEFINE_TIMER(ia_timer, ia_led_timer, 0, 0); static int IA_TX_BUF = DFL_TX_BUFFERS, IA_TX_BUF_SZ = DFL_TX_BUF_SZ; static int IA_RX_BUF = DFL_RX_BUFFERS, IA_RX_BUF_SZ = DFL_RX_BUF_SZ; static uint IADebugFlag = /* IF_IADBG_ERR | IF_IADBG_CBR| IF_IADBG_INIT_ADAPTER Index: linux/drivers/block/acsi.c =================================================================== --- linux.orig/drivers/block/acsi.c +++ linux/drivers/block/acsi.c @@ -371,7 +371,7 @@ static int acsi_revalidate (struct gendi /************************* End of Prototypes **************************/ -struct timer_list acsi_timer = TIMER_INITIALIZER(acsi_times_out, 0, 0); +DEFINE_TIMER(acsi_timer, acsi_times_out, 0, 0); #ifdef CONFIG_ATARI_SLM Index: linux/drivers/block/acsi_slm.c =================================================================== --- linux.orig/drivers/block/acsi_slm.c +++ linux/drivers/block/acsi_slm.c @@ -268,7 +268,7 @@ static int slm_get_pagesize( int device, /************************* End of Prototypes **************************/ -static struct timer_list slm_timer = TIMER_INITIALIZER(slm_test_ready, 0, 0); +static DEFINE_TIMER(slm_timer, slm_test_ready, 0, 0); static struct file_operations slm_fops = { .owner = THIS_MODULE, Index: linux/drivers/block/ataflop.c =================================================================== --- linux.orig/drivers/block/ataflop.c +++ linux/drivers/block/ataflop.c @@ -371,16 +371,10 @@ static int floppy_release( struct inode /************************* End of Prototypes **************************/ -static struct timer_list motor_off_timer = - TIMER_INITIALIZER(fd_motor_off_timer, 0, 0); -static struct timer_list readtrack_timer = - TIMER_INITIALIZER(fd_readtrack_check, 0, 0); - -static struct timer_list timeout_timer = - TIMER_INITIALIZER(fd_times_out, 0, 0); - -static struct timer_list fd_timer = - TIMER_INITIALIZER(check_change, 0, 0); +static DEFINE_TIMER(motor_off_timer, fd_motor_off_timer, 0, 0); +static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0); +static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0); +static DEFINE_TIMER(fd_timer, check_change, 0, 0); static inline void start_motor_off_timer(void) { Index: linux/drivers/block/cfq-iosched.c =================================================================== --- linux.orig/drivers/block/cfq-iosched.c +++ linux/drivers/block/cfq-iosched.c @@ -1376,10 +1376,9 @@ static void cfq_exit_single_io_context(s { struct cfq_data *cfqd = cic->cfqq->cfqd; request_queue_t *q = cfqd->queue; + unsigned long flags; - WARN_ON(!irqs_disabled()); - - spin_lock(q->queue_lock); + spin_lock_irqsave(q->queue_lock, flags); if (unlikely(cic->cfqq == cfqd->active_queue)) { __cfq_slice_expired(cfqd, cic->cfqq, 0); @@ -1388,7 +1387,7 @@ static void cfq_exit_single_io_context(s cfq_put_queue(cic->cfqq); cic->cfqq = NULL; - spin_unlock(q->queue_lock); + spin_unlock_irqrestore(q->queue_lock, flags); } /* @@ -1399,9 +1398,6 @@ static void cfq_exit_io_context(struct c { struct cfq_io_context *__cic; struct list_head *entry; - unsigned long flags; - - local_irq_save(flags); /* * put the reference this task is holding to the various queues @@ -1412,7 +1408,6 @@ static void cfq_exit_io_context(struct c } cfq_exit_single_io_context(cic); - local_irq_restore(flags); } static struct cfq_io_context * Index: linux/drivers/block/floppy.c =================================================================== --- linux.orig/drivers/block/floppy.c +++ linux/drivers/block/floppy.c @@ -626,7 +626,7 @@ static inline void debugt(const char *me #endif /* DEBUGT */ typedef void (*timeout_fn) (unsigned long); -static struct timer_list fd_timeout = TIMER_INITIALIZER(floppy_shutdown, 0, 0); +static DEFINE_TIMER(fd_timeout, floppy_shutdown, 0, 0); static const char *timeout_message; @@ -1010,7 +1010,7 @@ static void schedule_bh(void (*handler) schedule_work(&floppy_work); } -static struct timer_list fd_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(fd_timer, NULL, 0, 0); static void cancel_activity(void) { Index: linux/drivers/block/ll_rw_blk.c =================================================================== --- linux.orig/drivers/block/ll_rw_blk.c +++ linux/drivers/block/ll_rw_blk.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * for max sense size @@ -1412,7 +1413,7 @@ static int ll_merge_requests_fn(request_ */ void blk_plug_device(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -1433,7 +1434,7 @@ EXPORT_SYMBOL(blk_plug_device); */ int blk_remove_plug(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; @@ -3273,13 +3274,17 @@ void exit_io_context(void) unsigned long flags; struct io_context *ioc; - local_irq_save(flags); task_lock(current); + /* + * CHECKME: what does this protect against - can interrupt + * contexts access current->io_context? + */ + local_irq_save_nort(flags); ioc = current->io_context; current->io_context = NULL; ioc->task = NULL; + local_irq_restore_nort(flags); task_unlock(current); - local_irq_restore(flags); if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); Index: linux/drivers/block/loop.c =================================================================== --- linux.orig/drivers/block/loop.c +++ linux/drivers/block/loop.c @@ -514,12 +514,12 @@ static int loop_make_request(request_que lo->lo_pending++; loop_add_bio(lo, old_bio); spin_unlock_irq(&lo->lo_lock); - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); return 0; out: if (lo->lo_pending == 0) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio, old_bio->bi_size); return 0; @@ -580,23 +580,20 @@ static int loop_thread(void *data) lo->lo_pending = 1; /* - * up sem, we are running + * complete it, we are running */ - up(&lo->lo_sem); + complete(&lo->lo_done); for (;;) { int pending; - /* - * interruptible just to not contribute to load avg - */ - if (down_interruptible(&lo->lo_bh_mutex)) + if (wait_for_completion_interruptible(&lo->lo_bh_done)) continue; spin_lock_irq(&lo->lo_lock); /* - * could be upped because of tear-down, not pending work + * could be completed because of tear-down, not pending work */ if (unlikely(!lo->lo_pending)) { spin_unlock_irq(&lo->lo_lock); @@ -619,7 +616,7 @@ static int loop_thread(void *data) break; } - up(&lo->lo_sem); + complete(&lo->lo_done); return 0; } @@ -830,7 +827,7 @@ static int loop_set_fd(struct loop_devic set_blocksize(bdev, lo_blocksize); kernel_thread(loop_thread, lo, CLONE_KERNEL); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); return 0; out_putf: @@ -896,10 +893,10 @@ static int loop_clr_fd(struct loop_devic lo->lo_state = Lo_rundown; lo->lo_pending--; if (!lo->lo_pending) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); lo->lo_backing_file = NULL; @@ -1276,8 +1273,8 @@ static int __init loop_init(void) if (!lo->lo_queue) goto out_mem4; init_MUTEX(&lo->lo_ctl_mutex); - init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); + init_completion(&lo->lo_done); + init_completion(&lo->lo_bh_done); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; Index: linux/drivers/block/paride/pseudo.h =================================================================== --- linux.orig/drivers/block/paride/pseudo.h +++ linux/drivers/block/paride/pseudo.h @@ -43,7 +43,7 @@ static unsigned long ps_timeout; static int ps_tq_active = 0; static int ps_nice = 0; -static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused))); +static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock); static DECLARE_WORK(ps_tq, ps_tq_int, NULL); Index: linux/drivers/block/ps2esdi.c =================================================================== --- linux.orig/drivers/block/ps2esdi.c +++ linux/drivers/block/ps2esdi.c @@ -99,8 +99,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ps2esdi_i static int no_int_yet; static int ps2esdi_drives; static u_short io_base; -static struct timer_list esdi_timer = - TIMER_INITIALIZER(ps2esdi_reset_timer, 0, 0); +static DEFINE_TIMER(esdi_timer, ps2esdi_reset_timer, 0, 0); static int reset_status; static int ps2esdi_slot = -1; static int tp720esdi = 0; /* Is it Integrated ESDI of ThinkPad-720? */ Index: linux/drivers/block/sx8.c =================================================================== --- linux.orig/drivers/block/sx8.c +++ linux/drivers/block/sx8.c @@ -280,7 +280,10 @@ struct carm_host { struct work_struct fsm_task; - struct semaphore probe_sem; + /* + * PREEMPT_RT: should be converted to completions. + */ + struct compat_semaphore probe_sem; }; struct carm_response { Index: linux/drivers/cdrom/aztcd.c =================================================================== --- linux.orig/drivers/cdrom/aztcd.c +++ linux/drivers/cdrom/aztcd.c @@ -297,7 +297,7 @@ static char azt_auto_eject = AZT_AUTO_EJ static int AztTimeout, AztTries; static DECLARE_WAIT_QUEUE_HEAD(azt_waitq); -static struct timer_list delay_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(delay_timer, NULL, 0, 0); static struct azt_DiskInfo DiskInfo; static struct azt_Toc Toc[MAX_TRACKS]; Index: linux/drivers/cdrom/gscd.c =================================================================== --- linux.orig/drivers/cdrom/gscd.c +++ linux/drivers/cdrom/gscd.c @@ -146,7 +146,7 @@ static int AudioStart_f; static int AudioEnd_m; static int AudioEnd_f; -static struct timer_list gscd_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(gscd_timer, NULL, 0, 0); static DEFINE_SPINLOCK(gscd_lock); static struct request_queue *gscd_queue; Index: linux/drivers/cdrom/optcd.c =================================================================== --- linux.orig/drivers/cdrom/optcd.c +++ linux/drivers/cdrom/optcd.c @@ -264,7 +264,7 @@ static inline int flag_low(int flag, uns static int sleep_timeout; /* max # of ticks to sleep */ static DECLARE_WAIT_QUEUE_HEAD(waitq); static void sleep_timer(unsigned long data); -static struct timer_list delay_timer = TIMER_INITIALIZER(sleep_timer, 0, 0); +static DEFINE_TIMER(delay_timer, sleep_timer, 0, 0); static DEFINE_SPINLOCK(optcd_lock); static struct request_queue *opt_queue; @@ -1075,7 +1075,7 @@ static int tries; /* ibid?? */ static int timeout = 0; static void poll(unsigned long data); -static struct timer_list req_timer = {.function = poll}; +static DEFINE_TIMER(req_timer, poll, NULL, NULL); static void poll(unsigned long data) Index: linux/drivers/cdrom/sbpcd.c =================================================================== --- linux.orig/drivers/cdrom/sbpcd.c +++ linux/drivers/cdrom/sbpcd.c @@ -742,13 +742,10 @@ static struct sbpcd_drive *current_drive unsigned long cli_sti; /* for saving the processor flags */ #endif /*==========================================================================*/ -static struct timer_list delay_timer = - TIMER_INITIALIZER(mark_timeout_delay, 0, 0); -static struct timer_list data_timer = - TIMER_INITIALIZER(mark_timeout_data, 0, 0); +static DEFINE_TIMER(delay_timer, mark_timeout_delay, 0, 0); +static DEFINE_TIMER(data_timer, mark_timeout_data, 0, 0); #if 0 -static struct timer_list audio_timer = - TIMER_INITIALIZER(mark_timeout_audio, 0, 0); +static DEFINE_TIMER(audio_timer, mark_timeout_audio, 0, 0); #endif /*==========================================================================*/ /* Index: linux/drivers/cdrom/sjcd.c =================================================================== --- linux.orig/drivers/cdrom/sjcd.c +++ linux/drivers/cdrom/sjcd.c @@ -151,7 +151,7 @@ static struct sjcd_stat statistic; /* * Timer. */ -static struct timer_list sjcd_delay_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(sjcd_delay_timer, NULL, 0, 0); #define SJCD_SET_TIMER( func, tmout ) \ ( sjcd_delay_timer.expires = jiffies+tmout, \ Index: linux/drivers/char/Kconfig =================================================================== --- linux.orig/drivers/char/Kconfig +++ linux/drivers/char/Kconfig @@ -711,6 +711,45 @@ config RTC To compile this driver as a module, choose M here: the module will be called rtc. +config RTC_HISTOGRAM + tristate "Real Time Clock Histogram Support" + default y + depends on RTC + ---help--- + If you say Y here then the kernel will track the delivery and + wakeup latency of /dev/rtc using tasks and will report a + histogram to the kernel log when the application closes /dev/rtc. + +config BLOCKER + tristate "Priority Inheritance Debugging (Blocker) Device Support" + default y + ---help--- + If you say Y here then a device will be created that the userspace + pi_test suite uses to test and measure kernel locking primitives. + +config LPPTEST + tristate "Parallel Port Based Latency Measurement Device" + depends on !PARPORT + default y + ---help--- + If you say Y here then a device will be created that the userspace + testlpp utility uses to measure IRQ latencies of a target system + from an independent measurement system. + + NOTE: this code assumes x86 PCs and that the parallel port is + bidirectional and is on IRQ 7. + + to use the device, both the target and the source system needs to + run a kernel with CONFIG_LPPTEST enabled. To measure latencies, + use the scripts/testlpp utility in your kernel source directory, + and run it (as root) on the source system - it will start printing + out the latencies it took to get a response from the target system: + + Latency of response: 12.2 usecs (121265 cycles) + + then generate various workloads on the target system to see how + (worst-case-) latencies are impacted. + config SGI_DS1286 tristate "SGI DS1286 RTC support" depends on SGI_IP22 Index: linux/drivers/char/Makefile =================================================================== --- linux.orig/drivers/char/Makefile +++ linux/drivers/char/Makefile @@ -57,6 +57,8 @@ obj-$(CONFIG_R3964) += n_r3964.o obj-$(CONFIG_APPLICOM) += applicom.o obj-$(CONFIG_SONYPI) += sonypi.o obj-$(CONFIG_RTC) += rtc.o +obj-$(CONFIG_BLOCKER) += blocker.o +obj-$(CONFIG_LPPTEST) += lpptest.o obj-$(CONFIG_HPET) += hpet.o obj-$(CONFIG_GEN_RTC) += genrtc.o obj-$(CONFIG_EFI_RTC) += efirtc.o Index: linux/drivers/char/blocker.c =================================================================== --- /dev/null +++ linux/drivers/char/blocker.c @@ -0,0 +1,131 @@ +/* + * priority inheritance testing device + */ + +#include +#include +#include + +#define BLOCKER_MINOR 221 + +#define BLOCK_IOCTL 4245 +#define BLOCK_SET_DEPTH 4246 + +#define MAX_LOCK_DEPTH 10 + +/* this needs to be reconciled with driver/char/lpptest.c + */ +static inline u64 notrace get_cpu_tick(void) +{ + u64 tsc; +#if defined(CONFIG_X86) + __asm__ __volatile__("rdtsc" : "=A" (tsc)); +#elif defined(CONFIG_PPC) + unsigned long hi, lo; + + do { + hi = get_tbu(); + lo = get_tbl(); + } while (get_tbu() != hi); + tsc = (u64)hi << 32 | lo; +#elif defined(CONFIG_ARM) + tsc = *oscr; +#else + #error Implement get_cpu_tick() +#endif + return tsc; +} + +void loop(int loops) +{ + int i; + + for (i = 0; i < loops; i++) + get_cpu_tick(); +} + +static spinlock_t blocker_lock[MAX_LOCK_DEPTH]; + +static unsigned int lock_depth = 1; + +void do_the_lock_and_loop(unsigned int args) +{ + int i, max; + + if (rt_task(current)) + max = lock_depth; + else if (lock_depth > 1) + max = (current->pid % lock_depth) + 1; + else + max = 1; + + /* Always lock from the top down */ + for (i = max-1; i >= 0; i--) + spin_lock(&blocker_lock[i]); + loop(args); + for (i = 0; i < max; i++) + spin_unlock(&blocker_lock[i]); +} + +static int blocker_open(struct inode *in, struct file *file) +{ + printk(KERN_INFO "blocker_open called\n"); + + return 0; +} + +static long blocker_ioctl(struct file *file, + unsigned int cmd, unsigned long args) +{ + switch(cmd) { + case BLOCK_IOCTL: + do_the_lock_and_loop(args); + return 0; + case BLOCK_SET_DEPTH: + if (args >= MAX_LOCK_DEPTH) + return -EINVAL; + lock_depth = args; + return 0; + default: + return -EINVAL; + } +} + +static struct file_operations blocker_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = blocker_ioctl, + .open = blocker_open, +}; + +static struct miscdevice blocker_dev = +{ + BLOCKER_MINOR, + "blocker", + &blocker_fops +}; + +static int __init blocker_init(void) +{ + int i; + + if (misc_register(&blocker_dev)) + return -ENODEV; + + for (i = 0; i < MAX_LOCK_DEPTH; i++) + spin_lock_init(blocker_lock + i); + + return 0; +} + +void __exit blocker_exit(void) +{ + printk(KERN_INFO "blocker device uninstalled\n"); + misc_deregister(&blocker_dev); +} + +module_init(blocker_init); +module_exit(blocker_exit); + +MODULE_LICENSE("GPL"); + Index: linux/drivers/char/cyclades.c =================================================================== --- linux.orig/drivers/char/cyclades.c +++ linux/drivers/char/cyclades.c @@ -865,7 +865,7 @@ static void cyz_poll(unsigned long); static long cyz_polling_cycle = CZ_DEF_POLL; static int cyz_timeron = 0; -static struct timer_list cyz_timerlist = TIMER_INITIALIZER(cyz_poll, 0, 0); +static DEFINE_TIMER(cyz_timerlist, cyz_poll, 0, 0); #else /* CONFIG_CYZ_INTR */ static void cyz_rx_restart(unsigned long); Index: linux/drivers/char/hangcheck-timer.c =================================================================== --- linux.orig/drivers/char/hangcheck-timer.c +++ linux/drivers/char/hangcheck-timer.c @@ -149,8 +149,7 @@ static unsigned long long hangcheck_tsc, static void hangcheck_fire(unsigned long); -static struct timer_list hangcheck_ticktock = - TIMER_INITIALIZER(hangcheck_fire, 0, 0); +static DEFINE_TIMER(hangcheck_ticktock, hangcheck_fire, 0, 0); static void hangcheck_fire(unsigned long data) Index: linux/drivers/char/ip2main.c =================================================================== --- linux.orig/drivers/char/ip2main.c +++ linux/drivers/char/ip2main.c @@ -255,7 +255,7 @@ static unsigned long bh_counter = 0; * selected, the board is serviced periodically to see if anything needs doing. */ #define POLL_TIMEOUT (jiffies + 1) -static struct timer_list PollTimer = TIMER_INITIALIZER(ip2_poll, 0, 0); +static DEFINE_TIMER(PollTimer, ip2_poll, 0, 0); static char TimerOn; #ifdef IP2DEBUG_TRACE Index: linux/drivers/char/ipmi/ipmi_si_intf.c =================================================================== --- linux.orig/drivers/char/ipmi/ipmi_si_intf.c +++ linux/drivers/char/ipmi/ipmi_si_intf.c @@ -52,7 +52,7 @@ #include #include #include -#ifdef CONFIG_HIGH_RES_TIMERS +#ifdef CONFIG_HIGH_RES_TIMERS_OLD #include # if defined(schedule_next_int) /* Old high-res timer code, do translations. */ @@ -61,11 +61,11 @@ # endif static inline void add_usec_to_timer(struct timer_list *t, long v) { - t->sub_expires += nsec_to_arch_cycle(v * 1000); - while (t->sub_expires >= arch_cycles_per_jiffy) + t->arch_cycle_expires += nsec_to_arch_cycle(v * 1000); + while (t->arch_cycle_expires >= arch_cycles_per_jiffy) { t->expires++; - t->sub_expires -= arch_cycles_per_jiffy; + t->arch_cycle_expires -= arch_cycles_per_jiffy; } } #endif @@ -758,7 +758,7 @@ static int initialized = 0; /* Must be called with interrupts off and with the si_lock held. */ static void si_restart_short_timer(struct smi_info *smi_info) { -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) unsigned long flags; unsigned long jiffies_now; @@ -772,7 +772,7 @@ static void si_restart_short_timer(struc read_lock(&xtime_lock); jiffies_now = jiffies; smi_info->si_timer.expires = jiffies_now; - smi_info->si_timer.sub_expires = get_arch_cycles(jiffies_now); + smi_info->si_timer.arch_cycle_expires = get_arch_cycles(jiffies_now); add_usec_to_timer(&smi_info->si_timer, SI_SHORT_TIMEOUT_USEC); @@ -829,11 +829,11 @@ static void smi_timeout(unsigned long da spin_lock_irqsave(&smi_info->count_lock, flags); smi_info->short_timeouts++; spin_unlock_irqrestore(&smi_info->count_lock, flags); -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) read_lock(&xtime_lock); smi_info->si_timer.expires = jiffies; - smi_info->si_timer.sub_expires - = get_arch_cycles(smi_info->si_timer.expires); + smi_info->si_timer.arch_cycle_expires = + get_arch_cycles(smi_info->si_timer.expires); read_unlock(&xtime_lock); add_usec_to_timer(&smi_info->si_timer, SI_SHORT_TIMEOUT_USEC); #else @@ -844,8 +844,8 @@ static void smi_timeout(unsigned long da smi_info->long_timeouts++; spin_unlock_irqrestore(&smi_info->count_lock, flags); smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES; -#if defined(CONFIG_HIGH_RES_TIMERS) - smi_info->si_timer.sub_expires = 0; +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) + smi_info->si_timer.arch_cycle_expires = 0; #endif } Index: linux/drivers/char/ipmi/ipmi_watchdog.c =================================================================== --- linux.orig/drivers/char/ipmi/ipmi_watchdog.c +++ linux/drivers/char/ipmi/ipmi_watchdog.c @@ -368,7 +368,8 @@ static void panic_halt_ipmi_set_timeout( when both messages are free. */ static atomic_t heartbeat_tofree = ATOMIC_INIT(0); static DECLARE_MUTEX(heartbeat_lock); -static DECLARE_MUTEX_LOCKED(heartbeat_wait_lock); +/* PREEMPT_RT: should be a completion instead */ +static COMPAT_DECLARE_MUTEX_LOCKED(heartbeat_wait_lock); static void heartbeat_free_smi(struct ipmi_smi_msg *msg) { if (atomic_dec_and_test(&heartbeat_tofree)) Index: linux/drivers/char/istallion.c =================================================================== --- linux.orig/drivers/char/istallion.c +++ linux/drivers/char/istallion.c @@ -780,7 +780,7 @@ static struct file_operations stli_fsiom * much cheaper on host cpu than using interrupts. It turns out to * not increase character latency by much either... */ -static struct timer_list stli_timerlist = TIMER_INITIALIZER(stli_poll, 0, 0); +static DEFINE_TIMER(stli_timerlist, stli_poll, 0, 0); static int stli_timeron; Index: linux/drivers/char/keyboard.c =================================================================== --- linux.orig/drivers/char/keyboard.c +++ linux/drivers/char/keyboard.c @@ -233,8 +233,7 @@ static void kd_nosound(unsigned long ign } } -static struct timer_list kd_mksound_timer = - TIMER_INITIALIZER(kd_nosound, 0, 0); +static DEFINE_TIMER(kd_mksound_timer, kd_nosound, 0, 0); void kd_mksound(unsigned int hz, unsigned int ticks) { Index: linux/drivers/char/lpptest.c =================================================================== --- /dev/null +++ linux/drivers/char/lpptest.c @@ -0,0 +1,186 @@ +/* + * /dev/lpptest device: test IRQ handling latencies over parallel port + * + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar + * + * licensed under the GPL + * + * You need to have CONFIG_PARPORT disabled for this device, it is a + * completely self-contained device that assumes sole ownership of the + * parallel port. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LPPTEST_CHAR_MAJOR 245 +#define LPPTEST_DEVICE_NAME "lpptest" + +#define LPPTEST_IRQ 7 + +#define LPPTEST_TEST _IOR (LPPTEST_CHAR_MAJOR, 1, unsigned long long) +#define LPPTEST_DISABLE _IOR (LPPTEST_CHAR_MAJOR, 2, unsigned long long) +#define LPPTEST_ENABLE _IOR (LPPTEST_CHAR_MAJOR, 3, unsigned long long) + +static char dev_id[] = "lpptest"; + +#define INIT_PORT() outb(0x04, 0x37a) +#define ENABLE_IRQ() outb(0x10, 0x37a) +#define DISABLE_IRQ() outb(0, 0x37a) + +static unsigned char out = 0x5a; + +/* this needs to be reconciled with driver/char/blocker.c + */ +static inline u64 notrace get_cpu_tick(void) +{ + u64 tsc; +#if defined(CONFIG_X86) + __asm__ __volatile__("rdtsc" : "=A" (tsc)); +#elif defined(CONFIG_PPC) + unsigned long hi, lo; + + do { + hi = get_tbu(); + lo = get_tbl(); + } while (get_tbu() != hi); + tsc = (u64)hi << 32 | lo; +#elif defined(CONFIG_ARM) + tsc = *oscr; +#else + #error Implement get_cpu_tick() +#endif + return tsc; +} + +/** + * Interrupt handler. Flip a bit in the reply. + */ +static int lpptest_irq (int irq, void *dev_id, struct pt_regs *regs) +{ + out ^= 0xff; + outb(out, 0x378); + + return IRQ_HANDLED; +} + +static cycles_t test_response(void) +{ + cycles_t now, end; + unsigned char in; + int timeout = 0; + + raw_local_irq_disable(); + in = inb(0x379); + inb(0x378); + outb(0x08, 0x378); + now = get_cpu_tick(); + while(1) { + if (inb(0x379) != in) + break; + if (timeout++ > 1000000) { + outb(0x00, 0x378); + raw_local_irq_enable(); + + return 0; + } + } + end = get_cpu_tick(); + outb(0x00, 0x378); + raw_local_irq_enable(); + + return end - now; +} + +static int lpptest_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int lpptest_close(struct inode *inode, struct file *file) +{ + return 0; +} + +int lpptest_ioctl(struct inode *inode, struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) +{ + int retval = 0; + + switch (ioctl_num) { + + case LPPTEST_DISABLE: + DISABLE_IRQ(); + break; + + case LPPTEST_ENABLE: + ENABLE_IRQ(); + break; + + case LPPTEST_TEST: { + + cycles_t diff = test_response(); + if (copy_to_user((void *)ioctl_param, (void*) &diff, sizeof(diff))) + goto errcpy; + break; + } + default: retval = -EINVAL; + } + + return retval; + + errcpy: + return -EFAULT; +} + +static struct file_operations lpptest_dev_fops = { + .ioctl = lpptest_ioctl, + .open = lpptest_open, + .release = lpptest_close, +}; + +static int __init lpptest_init (void) +{ + if (register_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME, &lpptest_dev_fops)) + { + printk(KERN_NOTICE "Can't allocate major number %d for lpptest.\n", + LPPTEST_CHAR_MAJOR); + return -EAGAIN; + } + + if (request_irq (LPPTEST_IRQ, lpptest_irq, 0, "lpptest", dev_id)) { + printk (KERN_WARNING "lpptest: irq %d in use. Unload parport module!\n", LPPTEST_IRQ); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); + return -EAGAIN; + } + irq_desc[LPPTEST_IRQ].status |= IRQ_NODELAY; + irq_desc[LPPTEST_IRQ].action->flags |= SA_NODELAY | SA_INTERRUPT; + + INIT_PORT(); + ENABLE_IRQ(); + + return 0; +} +module_init (lpptest_init); + +static void __exit lpptest_exit (void) +{ + DISABLE_IRQ(); + + free_irq(LPPTEST_IRQ, dev_id); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); +} +module_exit (lpptest_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("lpp test module"); + Index: linux/drivers/char/random.c =================================================================== --- linux.orig/drivers/char/random.c +++ linux/drivers/char/random.c @@ -417,7 +417,7 @@ static struct entropy_store input_pool = .poolinfo = &poolinfo_table[0], .name = "input", .limit = 1, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -426,7 +426,7 @@ static struct entropy_store blocking_poo .name = "blocking", .limit = 1, .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data }; @@ -434,7 +434,7 @@ static struct entropy_store nonblocking_ .poolinfo = &poolinfo_table[1], .name = "nonblocking", .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), .pool = nonblocking_pool_data }; @@ -581,8 +581,11 @@ static void add_timer_randomness(struct preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -627,9 +630,6 @@ static void add_timer_randomness(struct if(input_pool.entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); - -out: - preempt_enable(); } extern void add_input_randomness(unsigned int type, unsigned int code, Index: linux/drivers/char/rtc.c =================================================================== --- linux.orig/drivers/char/rtc.c +++ linux/drivers/char/rtc.c @@ -88,6 +88,28 @@ #include #endif +#ifdef CONFIG_RTC_HISTOGRAM + +static cycles_t last_interrupt_time; + +#include + +#define CPU_MHZ (cpu_khz / 1000) + +#define HISTSIZE 10000 +static int histogram[HISTSIZE]; + +static int rtc_state; + +enum rtc_states { + S_STARTUP, /* First round - let the application start */ + S_IDLE, /* Waiting for an interrupt */ + S_WAITING_FOR_READ, /* Signal delivered. waiting for rtc_read() */ + S_READ_MISSED, /* Signal delivered, read() deadline missed */ +}; + +#endif + #ifdef __sparc__ #include #include @@ -205,7 +227,147 @@ static inline unsigned char rtc_is_updat return uip; } +#ifndef RTC_IRQ +# undef CONFIG_RTC_HISTOGRAM +#endif + +static inline void rtc_open_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i; + + last_interrupt_time = 0; + rtc_state = S_STARTUP; + rtc_irq_data = 0; + + for (i = 0; i < HISTSIZE; i++) + histogram[i] = 0; +#endif +} + +static inline void rtc_wake_event(void) +{ +#ifndef CONFIG_RTC_HISTOGRAM + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); +#else + if (!(rtc_status & RTC_IS_OPEN)) + return; + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + break; + /* Waiting for an interrupt */ + case S_IDLE: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + last_interrupt_time = get_cycles(); + rtc_state = S_WAITING_FOR_READ; + break; + + /* Signal has been delivered. waiting for rtc_read() */ + case S_WAITING_FOR_READ: + /* + * Well foo. The usermode application didn't + * schedule and read in time. + */ + rtc_state = S_READ_MISSED; + printk("`%s'[%d] is being piggy. need_resched=%d, cpu=%d\n", + current->comm, current->pid, + need_resched(), smp_processor_id()); + printk("Read missed before next interrupt\n"); + break; + /* Signal has been delivered, read() deadline was missed */ + case S_READ_MISSED: + /* + * Not much we can do here. We're waiting for the usermode + * application to read the rtc + */ + break; + } +#endif +} + +static inline void rtc_read_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + cycles_t now = get_cycles(); + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + rtc_state = S_IDLE; + break; + + /* Waiting for an interrupt */ + case S_IDLE: + printk("bug in rtc_read(): called in state S_IDLE!\n"); + break; + case S_WAITING_FOR_READ: /* + * Signal has been delivered. + * waiting for rtc_read() + */ + /* + * Well done + */ + case S_READ_MISSED: /* + * Signal has been delivered, read() + * deadline was missed + */ + /* + * So, you finally got here. + */ + if (!last_interrupt_time) + printk("bug in rtc_read(): last_interrupt_time = 0\n"); + rtc_state = S_IDLE; + { + cycles_t latency = now - last_interrupt_time; + unsigned long delta; /* Microseconds */ + + delta = latency; + delta /= CPU_MHZ; + + if (delta > 1000 * 1000) { + printk("rtc: eek\n"); + } else { + unsigned long slot = delta; + if (slot >= HISTSIZE) + slot = HISTSIZE - 1; + histogram[slot]++; + if (delta > 2000) + printk("wow! That was a " + "%ld millisec bump\n", + delta / 1000); + } + } + rtc_state = S_IDLE; + break; + } +#endif +} + +static inline void rtc_close_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i = 0; + unsigned long total = 0; + + for (i = 0; i < HISTSIZE; i++) + total += histogram[i]; + if (!total) + return; + + printk("\nrtc latency histogram of {%s/%d, %lu samples}:\n", + current->comm, current->pid, total); + for (i = 0; i < HISTSIZE; i++) { + if (histogram[i]) + printk("%d %d\n", i, histogram[i]); + } +#endif +} + #ifdef RTC_IRQ + /* * A very tiny interrupt handler. It runs with SA_INTERRUPT set, * but there is possibility of conflicting with the set_rtc_mmss() @@ -218,6 +380,8 @@ static inline unsigned char rtc_is_updat irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) { + int mod; + /* * Can be an alarm interrupt, update complete interrupt, * or a periodic interrupt. We store the status in the @@ -239,19 +403,22 @@ irqreturn_t rtc_interrupt(int irq, void rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); } + mod = 0; if (rtc_status & RTC_TIMER_ON) - mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + mod = 1; spin_unlock (&rtc_lock); + if (mod) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); /* Now do the rest of the actions */ spin_lock(&rtc_task_lock); if (rtc_callback) rtc_callback->func(rtc_callback->private_data); spin_unlock(&rtc_task_lock); - wake_up_interruptible(&rtc_wait); - kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + rtc_wake_event(); + wake_up_interruptible(&rtc_wait); return IRQ_HANDLED; } @@ -355,6 +522,8 @@ static ssize_t rtc_read(struct file *fil schedule(); } while (1); + rtc_read_event(); + if (count < sizeof(unsigned long)) retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int); else @@ -405,8 +574,8 @@ static int rtc_do_ioctl(unsigned int cmd if (rtc_status & RTC_TIMER_ON) { spin_lock_irq (&rtc_lock); rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); spin_unlock_irq (&rtc_lock); + del_timer(&rtc_irq_timer); } return 0; } @@ -424,9 +593,9 @@ static int rtc_do_ioctl(unsigned int cmd if (!(rtc_status & RTC_TIMER_ON)) { spin_lock_irq (&rtc_lock); rtc_irq_timer.expires = jiffies + HZ/rtc_freq + 2*HZ/100; - add_timer(&rtc_irq_timer); rtc_status |= RTC_TIMER_ON; spin_unlock_irq (&rtc_lock); + add_timer(&rtc_irq_timer); } set_rtc_irq_bit(RTC_PIE); return 0; @@ -584,6 +753,11 @@ static int rtc_do_ioctl(unsigned int cmd save_freq_select = CMOS_READ(RTC_FREQ_SELECT); CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + /* + * Make CMOS date writes nonpreemptible even on PREEMPT_RT. + * There's a limit to everything! =B-) + */ + preempt_disable(); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); #endif @@ -593,6 +767,7 @@ static int rtc_do_ioctl(unsigned int cmd CMOS_WRITE(hrs, RTC_HOURS); CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); + preempt_enable(); CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); @@ -690,6 +865,7 @@ static int rtc_open(struct inode *inode, if(rtc_status & RTC_IS_OPEN) goto out_busy; + rtc_open_event(); rtc_status |= RTC_IS_OPEN; rtc_irq_data = 0; @@ -711,6 +887,7 @@ static int rtc_release(struct inode *ino { #ifdef RTC_IRQ unsigned char tmp; + int del; if (rtc_has_irq == 0) goto no_irq; @@ -729,11 +906,14 @@ static int rtc_release(struct inode *ino CMOS_WRITE(tmp, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); } + del = 0; if (rtc_status & RTC_TIMER_ON) { rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); + del = 1; } spin_unlock_irq(&rtc_lock); + if (del) + del_timer(&rtc_irq_timer); if (file->f_flags & FASYNC) { rtc_fasync (-1, file, 0); @@ -745,6 +925,7 @@ no_irq: rtc_irq_data = 0; rtc_status &= ~RTC_IS_OPEN; spin_unlock_irq (&rtc_lock); + rtc_close_event(); return 0; } @@ -809,6 +990,7 @@ int rtc_unregister(rtc_task_t *task) return -EIO; #else unsigned char tmp; + int del; spin_lock_irq(&rtc_lock); spin_lock(&rtc_task_lock); @@ -828,12 +1010,15 @@ int rtc_unregister(rtc_task_t *task) CMOS_WRITE(tmp, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); } + del = 0; if (rtc_status & RTC_TIMER_ON) { rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); + del = 1; } rtc_status &= ~RTC_IS_OPEN; spin_unlock(&rtc_task_lock); + if (del) + del_timer(&rtc_irq_timer); spin_unlock_irq(&rtc_lock); return 0; #endif @@ -1093,6 +1278,7 @@ module_exit(rtc_exit); static void rtc_dropped_irq(unsigned long data) { unsigned long freq; + int mod; spin_lock_irq (&rtc_lock); @@ -1102,8 +1288,9 @@ static void rtc_dropped_irq(unsigned lon } /* Just in case someone disabled the timer from behind our back... */ + mod = 0; if (rtc_status & RTC_TIMER_ON) - mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + mod = 1; rtc_irq_data += ((rtc_freq/HZ)<<8); rtc_irq_data &= ~0xff; @@ -1112,6 +1299,8 @@ static void rtc_dropped_irq(unsigned lon freq = rtc_freq; spin_unlock_irq(&rtc_lock); + if (mod) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); Index: linux/drivers/char/specialix.c =================================================================== --- linux.orig/drivers/char/specialix.c +++ linux/drivers/char/specialix.c @@ -2491,7 +2491,7 @@ static int __init specialix_init(void) #endif for (i = 0; i < SX_NBOARD; i++) - sx_board[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&sx_board[i].lock); if (sx_init_drivers()) { func_exit(); Index: linux/drivers/char/sx.c =================================================================== --- linux.orig/drivers/char/sx.c +++ linux/drivers/char/sx.c @@ -2321,7 +2321,7 @@ static int sx_init_portstructs (int nboa #ifdef NEW_WRITE_LOCKING port->gs.port_write_sem = MUTEX; #endif - port->gs.driver_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&port->gs.driver_lock); /* * Initializing wait queue */ Index: linux/drivers/char/sysrq.c =================================================================== --- linux.orig/drivers/char/sysrq.c +++ linux/drivers/char/sysrq.c @@ -169,6 +169,38 @@ static struct sysrq_key_op sysrq_showreg .enable_mask = SYSRQ_ENABLE_DUMP, }; +#ifdef CONFIG_RT_DEADLOCK_DETECT + +static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + show_all_locks(); +} + +static struct sysrq_key_op sysrq_showlocks_op = { + .handler = sysrq_handle_showlocks, + .help_msg = "show-all-locks(D)", + .action_msg = "Show Locks Held", +}; + +#endif + +#if defined(__i386__) + +static void sysrq_handle_showallregs(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + nmi_show_all_regs(); +} + +static struct sysrq_key_op sysrq_showallregs_op = { + .handler = sysrq_handle_showallregs, + .help_msg = "showalLcpupc", + .action_msg = "Show Regs On All CPUs", +}; + +#endif + static void sysrq_handle_showstate(int key, struct pt_regs *pt_regs, struct tty_struct *tty) @@ -294,7 +326,11 @@ static struct sysrq_key_op *sysrq_key_ta #else /* c */ NULL, #endif +#ifdef CONFIG_RT_DEADLOCK_DETECT +/* d */ &sysrq_showlocks_op, +#else /* d */ NULL, +#endif /* e */ &sysrq_term_op, /* f */ &sysrq_moom_op, /* g */ NULL, @@ -306,7 +342,11 @@ static struct sysrq_key_op *sysrq_key_ta #else /* k */ NULL, #endif +#if defined(__i386__) +/* l */ &sysrq_showallregs_op, +#else /* l */ NULL, +#endif /* m */ &sysrq_showmem_op, /* n */ &sysrq_unrt_op, /* o */ NULL, /* This will often be registered Index: linux/drivers/char/tty_io.c =================================================================== --- linux.orig/drivers/char/tty_io.c +++ linux/drivers/char/tty_io.c @@ -225,6 +225,7 @@ static int check_tty_count(struct tty_st printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " "!= #fd's(%d) in %s\n", tty->name, tty->count, count, routine); + dump_stack(); return count; } #endif @@ -839,8 +840,8 @@ static void do_tty_hangup(void *data) p->signal->tty = NULL; if (!p->signal->leader) continue; - send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p); - send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p); + group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); + group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); if (tty->pgrp > 0) p->signal->tty_old_pgrp = tty->pgrp; } while_each_task_pid(tty->session, PIDTYPE_SID, p); Index: linux/drivers/char/vt.c =================================================================== --- linux.orig/drivers/char/vt.c +++ linux/drivers/char/vt.c @@ -2433,7 +2433,7 @@ static int con_open(struct tty_struct *t int ret = 0; acquire_console_sem(); - if (tty->count == 1) { + if (tty->driver_data == NULL) { ret = vc_allocate(currcons); if (ret == 0) { struct vc_data *vc = vc_cons[currcons].d; Index: linux/drivers/char/watchdog/cpu5wdt.c =================================================================== --- linux.orig/drivers/char/watchdog/cpu5wdt.c +++ linux/drivers/char/watchdog/cpu5wdt.c @@ -56,7 +56,7 @@ static int ticks = 10000; /* some device data */ static struct { - struct semaphore stop; + struct compat_semaphore stop; volatile int running; struct timer_list timer; volatile int queue; Index: linux/drivers/char/watchdog/mixcomwd.c =================================================================== --- linux.orig/drivers/char/watchdog/mixcomwd.c +++ linux/drivers/char/watchdog/mixcomwd.c @@ -59,7 +59,7 @@ static unsigned long mixcomwd_opened; /* static int watchdog_port; static int mixcomwd_timer_alive; -static struct timer_list mixcomwd_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(mixcomwd_timer, NULL, 0, 0); static char expect_close; static int nowayout = WATCHDOG_NOWAYOUT; Index: linux/drivers/char/watchdog/softdog.c =================================================================== --- linux.orig/drivers/char/watchdog/softdog.c +++ linux/drivers/char/watchdog/softdog.c @@ -75,8 +75,7 @@ MODULE_PARM_DESC(soft_noboot, "Softdog a static void watchdog_fire(unsigned long); -static struct timer_list watchdog_ticktock = - TIMER_INITIALIZER(watchdog_fire, 0, 0); +static DEFINE_TIMER(watchdog_ticktock, watchdog_fire, 0, 0); static unsigned long timer_alive; static char expect_close; Index: linux/drivers/cpufreq/cpufreq.c =================================================================== --- linux.orig/drivers/cpufreq/cpufreq.c +++ linux/drivers/cpufreq/cpufreq.c @@ -605,7 +605,8 @@ static int cpufreq_add_dev (struct sys_d policy->cpu = cpu; policy->cpus = cpumask_of_cpu(cpu); - init_MUTEX_LOCKED(&policy->lock); + init_MUTEX(&policy->lock); + down(&policy->lock); init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update, (void *)(long)cpu); @@ -614,6 +615,7 @@ static int cpufreq_add_dev (struct sys_d */ ret = cpufreq_driver->init(policy); if (ret) { + up(&policy->lock); dprintk("initialization failed\n"); goto err_out; } @@ -626,8 +628,10 @@ static int cpufreq_add_dev (struct sys_d strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN); ret = kobject_register(&policy->kobj); - if (ret) + if (ret) { + up(&policy->lock); goto err_out; + } /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; Index: linux/drivers/i2c/busses/i2c-s3c2410.c =================================================================== --- linux.orig/drivers/i2c/busses/i2c-s3c2410.c +++ linux/drivers/i2c/busses/i2c-s3c2410.c @@ -574,7 +574,7 @@ static struct i2c_algorithm s3c24xx_i2c_ }; static struct s3c24xx_i2c s3c24xx_i2c = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_i2c.lock), .wait = __WAIT_QUEUE_HEAD_INITIALIZER(s3c24xx_i2c.wait), .adap = { .name = "s3c2410-i2c", Index: linux/drivers/ide/ide-io.c =================================================================== --- linux.orig/drivers/ide/ide-io.c +++ linux/drivers/ide/ide-io.c @@ -636,7 +636,7 @@ static ide_startstop_t drive_cmd_intr (i u8 stat = hwif->INB(IDE_STATUS_REG); int retries = 10; - local_irq_enable(); + local_irq_enable_nort(); if ((stat & DRQ_STAT) && args && args[3]) { u8 io_32bit = drive->io_32bit; drive->io_32bit = 0; @@ -1106,7 +1106,7 @@ static void ide_do_request (ide_hwgroup_ ide_get_lock(ide_intr, hwgroup); /* caller must own ide_lock */ - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); while (!hwgroup->busy) { hwgroup->busy = 1; @@ -1211,8 +1211,7 @@ static void ide_do_request (ide_hwgroup_ */ if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&ide_lock); - local_irq_enable(); + spin_unlock_irq(&ide_lock); /* allow other IRQs while we start this request */ startstop = start_request(drive, rq); spin_lock_irq(&ide_lock); @@ -1360,7 +1359,7 @@ void ide_timer_expiry (unsigned long dat #endif /* DISABLE_IRQ_NOSYNC */ /* local CPU only, * as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwgroup->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { @@ -1557,7 +1556,7 @@ irqreturn_t ide_intr (int irq, void *dev spin_unlock(&ide_lock); if (drive->unmask) - local_irq_enable(); + local_irq_enable_nort(); /* service this interrupt, may set handler for next interrupt */ startstop = handler(drive); spin_lock_irq(&ide_lock); Index: linux/drivers/ide/ide-iops.c =================================================================== --- linux.orig/drivers/ide/ide-iops.c +++ linux/drivers/ide/ide-iops.c @@ -246,10 +246,10 @@ static void ata_input_data(ide_drive_t * if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->INSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->INSL(IDE_DATA_REG, buffer, wcount); } else { @@ -268,10 +268,10 @@ static void ata_output_data(ide_drive_t if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->OUTSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->OUTSL(IDE_DATA_REG, buffer, wcount); } else { @@ -570,12 +570,12 @@ int ide_wait_stat (ide_startstop_t *star if (!(stat & BUSY_STAT)) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *startstop = ide_error(drive, "status timeout", stat); return 1; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. @@ -762,17 +762,15 @@ int ide_driveid_update (ide_drive_t *dri printk("%s: CHECK for good STATUS\n", drive->name); return 0; } - local_irq_save(flags); - SELECT_MASK(drive, 0); id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); - if (!id) { - local_irq_restore(flags); + if (!id) return 0; - } + local_irq_save_nort(flags); + SELECT_MASK(drive, 0); ata_input_data(drive, id, SECTOR_WORDS); (void) hwif->INB(IDE_STATUS_REG); /* clear drive IRQ */ - local_irq_enable(); - local_irq_restore(flags); + local_irq_enable_nort(); + local_irq_restore_nort(flags); ide_fix_driveid(id); if (id) { drive->id->dma_ultra = id->dma_ultra; @@ -852,7 +850,7 @@ int ide_config_drive_speed (ide_drive_t if (time_after(jiffies, timeout)) break; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* Index: linux/drivers/ide/ide-lib.c =================================================================== --- linux.orig/drivers/ide/ide-lib.c +++ linux/drivers/ide/ide-lib.c @@ -447,15 +447,16 @@ EXPORT_SYMBOL_GPL(ide_set_xfer_rate); static void ide_dump_opcode(ide_drive_t *drive) { + unsigned long flags; struct request *rq; u8 opcode = 0; int found = 0; - spin_lock(&ide_lock); + spin_lock_irqsave(&ide_lock, flags); rq = NULL; if (HWGROUP(drive)) rq = HWGROUP(drive)->rq; - spin_unlock(&ide_lock); + spin_unlock_irqrestore(&ide_lock, flags); if (!rq) return; if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { @@ -483,10 +484,8 @@ static void ide_dump_opcode(ide_drive_t static u8 ide_dump_ata_status(ide_drive_t *drive, const char *msg, u8 stat) { ide_hwif_t *hwif = HWIF(drive); - unsigned long flags; u8 err = 0; - local_irq_set(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (stat & BUSY_STAT) printk("Busy "); @@ -546,7 +545,7 @@ static u8 ide_dump_ata_status(ide_drive_ printk("\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return err; } @@ -561,14 +560,12 @@ static u8 ide_dump_ata_status(ide_drive_ static u8 ide_dump_atapi_status(ide_drive_t *drive, const char *msg, u8 stat) { - unsigned long flags; - atapi_status_t status; atapi_error_t error; status.all = stat; error.all = 0; - local_irq_set(flags); + printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (status.b.bsy) printk("Busy "); @@ -594,7 +591,7 @@ static u8 ide_dump_atapi_status(ide_driv printk("}\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return error.all; } Index: linux/drivers/ide/ide-probe.c =================================================================== --- linux.orig/drivers/ide/ide-probe.c +++ linux/drivers/ide/ide-probe.c @@ -184,7 +184,7 @@ static inline void do_identify (ide_driv hwif->ata_input_data(drive, id, SECTOR_WORDS); drive->id_read = 1; - local_irq_enable(); + local_irq_enable_nort(); ide_fix_driveid(id); #if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) @@ -362,14 +362,14 @@ static int actual_try_to_identify (ide_d unsigned long flags; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* drive returned ID */ do_identify(drive, cmd); /* drive responded with ID */ rc = 0; /* clear drive IRQ */ (void) hwif->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { /* drive refused ID */ rc = 2; @@ -842,7 +842,7 @@ static void probe_hwif(ide_hwif_t *hwif) } while ((stat & BUSY_STAT) && time_after(timeout, jiffies)); } - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * Use cached IRQ number. It might be (and is...) changed by probe * code above Index: linux/drivers/ide/ide-taskfile.c =================================================================== --- linux.orig/drivers/ide/ide-taskfile.c +++ linux/drivers/ide/ide-taskfile.c @@ -227,7 +227,7 @@ ide_startstop_t task_no_data_intr (ide_d ide_hwif_t *hwif = HWIF(drive); u8 stat; - local_irq_enable(); + local_irq_enable_nort(); if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) { return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ @@ -279,7 +279,7 @@ static void ide_pio_sector(ide_drive_t * offset %= PAGE_SIZE; #ifdef CONFIG_HIGHMEM - local_irq_save(flags); + local_irq_save_nort(flags); #endif buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; @@ -299,7 +299,7 @@ static void ide_pio_sector(ide_drive_t * kunmap_atomic(buf, KM_BIO_SRC_IRQ); #ifdef CONFIG_HIGHMEM - local_irq_restore(flags); + local_irq_restore_nort(flags); #endif } @@ -457,7 +457,7 @@ ide_startstop_t pre_task_out_intr (ide_d } if (!drive->unmask) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_out_intr, WAIT_WORSTCASE, NULL); ide_pio_datablock(drive, rq, 1); Index: linux/drivers/ide/ide.c =================================================================== --- linux.orig/drivers/ide/ide.c +++ linux/drivers/ide/ide.c @@ -1049,15 +1049,13 @@ int ide_spin_wait_hwgroup (ide_drive_t * spin_lock_irq(&ide_lock); while (hwgroup->busy) { - unsigned long lflags; spin_unlock_irq(&ide_lock); - local_irq_set(lflags); + if (time_after(jiffies, timeout)) { - local_irq_restore(lflags); printk(KERN_ERR "%s: channel busy\n", drive->name); return -EBUSY; } - local_irq_restore(lflags); + spin_lock_irq(&ide_lock); } return 0; Index: linux/drivers/ieee1394/ieee1394_types.h =================================================================== --- linux.orig/drivers/ieee1394/ieee1394_types.h +++ linux/drivers/ieee1394/ieee1394_types.h @@ -19,7 +19,7 @@ struct hpsb_tlabel_pool { spinlock_t lock; u8 next; u32 allocations; - struct semaphore count; + struct compat_semaphore count; }; #define HPSB_TPOOL_INIT(_tp) \ @@ -28,7 +28,7 @@ do { \ spin_lock_init(&(_tp)->lock); \ (_tp)->next = 0; \ (_tp)->allocations = 0; \ - sema_init(&(_tp)->count, 63); \ + sema_init(&(_tp)->count, 63); \ } while (0) Index: linux/drivers/ieee1394/nodemgr.c =================================================================== --- linux.orig/drivers/ieee1394/nodemgr.c +++ linux/drivers/ieee1394/nodemgr.c @@ -114,7 +114,7 @@ struct host_info { struct hpsb_host *host; struct list_head list; struct completion exited; - struct semaphore reset_sem; + struct compat_semaphore reset_sem; int pid; char daemon_name[15]; int kill_me; Index: linux/drivers/ieee1394/raw1394-private.h =================================================================== --- linux.orig/drivers/ieee1394/raw1394-private.h +++ linux/drivers/ieee1394/raw1394-private.h @@ -29,7 +29,7 @@ struct file_info { struct list_head req_pending; struct list_head req_complete; - struct semaphore complete_sem; + struct compat_semaphore complete_sem; spinlock_t reqlists_lock; wait_queue_head_t poll_wait_complete; Index: linux/drivers/media/dvb/dvb-core/dvb_frontend.c =================================================================== --- linux.orig/drivers/media/dvb/dvb-core/dvb_frontend.c +++ linux/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -97,7 +97,7 @@ struct dvb_frontend_private { struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct compat_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; pid_t thread_pid; @@ -556,7 +556,7 @@ static void dvb_frontend_stop(struct dvb printk("dvb_frontend_stop: thread PID %d already died\n", fepriv->thread_pid); /* make sure the mutex was not held by the thread */ - init_MUTEX (&fepriv->sem); + sema_init (&fepriv->sem, 1); return; } @@ -908,10 +908,10 @@ int dvb_register_frontend(struct dvb_ada fepriv = fe->frontend_priv; memset(fe->frontend_priv, 0, sizeof(struct dvb_frontend_private)); - init_MUTEX (&fepriv->sem); + sema_init (&fepriv->sem, 1); init_waitqueue_head (&fepriv->wait_queue); init_waitqueue_head (&fepriv->events.wait_queue); - init_MUTEX (&fepriv->events.sem); + sema_init (&fepriv->events.sem, 1); fe->dvb = dvb; fepriv->inversion = INVERSION_OFF; fepriv->tone = SEC_TONE_OFF; Index: linux/drivers/media/dvb/dvb-core/dvb_frontend.h =================================================================== --- linux.orig/drivers/media/dvb/dvb-core/dvb_frontend.h +++ linux/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -86,7 +86,7 @@ struct dvb_fe_events { int eventr; int overflow; wait_queue_head_t wait_queue; - struct semaphore sem; + struct compat_semaphore sem; }; struct dvb_frontend { Index: linux/drivers/media/dvb/ttpci/av7110_ir.c =================================================================== --- linux.orig/drivers/media/dvb/ttpci/av7110_ir.c +++ linux/drivers/media/dvb/ttpci/av7110_ir.c @@ -50,7 +50,7 @@ static void av7110_emit_keyup(unsigned l } -static struct timer_list keyup_timer = { .function = av7110_emit_keyup }; +static DEFINE_TIMER(keyup_timer, av7110_emit_keyup, NULL, NULL); static void av7110_emit_key(u32 ircom) Index: linux/drivers/media/video/zr36120_i2c.c =================================================================== --- linux.orig/drivers/media/video/zr36120_i2c.c +++ linux/drivers/media/video/zr36120_i2c.c @@ -120,7 +120,7 @@ struct i2c_bus zoran_i2c_bus_template = I2C_BUSID_ZORAN, NULL, - SPIN_LOCK_UNLOCKED, + SPIN_LOCK_UNLOCKED(zoran_i2c_bus_template.lock), attach_inform, detach_inform, Index: linux/drivers/message/i2o/exec-osm.c =================================================================== --- linux.orig/drivers/message/i2o/exec-osm.c +++ linux/drivers/message/i2o/exec-osm.c @@ -204,7 +204,7 @@ static int i2o_msg_post_wait_complete(st { struct i2o_exec_wait *wait, *tmp; unsigned long flags; - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); int rc = 1; /* Index: linux/drivers/misc/ibmasm/module.c =================================================================== --- linux.orig/drivers/misc/ibmasm/module.c +++ linux/drivers/misc/ibmasm/module.c @@ -85,7 +85,7 @@ static int __devinit ibmasm_init_one(str } memset(sp, 0, sizeof(struct service_processor)); - sp->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&sp->lock); INIT_LIST_HEAD(&sp->command_queue); pci_set_drvdata(pdev, (void *)sp); Index: linux/drivers/net/3c59x.c =================================================================== --- linux.orig/drivers/net/3c59x.c +++ linux/drivers/net/3c59x.c @@ -956,9 +956,9 @@ static void poll_vortex(struct net_devic struct vortex_private *vp = netdev_priv(dev); unsigned long flags; local_save_flags(flags); - local_irq_disable(); + local_irq_disable_nort(); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev,NULL); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1987,13 +1987,17 @@ static void vortex_tx_timeout(struct net /* * Block interrupts because vortex_interrupt does a bare spin_lock() */ +#ifndef CONFIG_PREEMPT_RT unsigned long flags; local_irq_save(flags); +#endif if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev, NULL); else vortex_interrupt(dev->irq, dev, NULL); +#ifndef CONFIG_PREEMPT_RT local_irq_restore(flags); +#endif } } Index: linux/drivers/net/8139too.c =================================================================== --- linux.orig/drivers/net/8139too.c +++ linux/drivers/net/8139too.c @@ -2128,10 +2128,10 @@ static int rtl8139_poll(struct net_devic * Order is important since data can get interrupted * again when we think we are done. */ - local_irq_disable(); + raw_local_irq_disable(); RTL_W16_F(IntrMask, rtl8139_intr_mask); __netif_rx_complete(dev); - local_irq_enable(); + raw_local_irq_enable(); } spin_unlock(&tp->rx_lock); Index: linux/drivers/net/atari_bionet.c =================================================================== --- linux.orig/drivers/net/atari_bionet.c +++ linux/drivers/net/atari_bionet.c @@ -155,7 +155,7 @@ static int bionet_close(struct net_devic static struct net_device_stats *net_get_stats(struct net_device *dev); static void bionet_tick(unsigned long); -static struct timer_list bionet_timer = TIMER_INITIALIZER(bionet_tick, 0, 0); +static DEFINE_TIMER(bionet_timer, bionet_tick, 0, 0); #define STRAM_ADDR(a) (((a) & 0xff000000) == 0) Index: linux/drivers/net/atari_pamsnet.c =================================================================== --- linux.orig/drivers/net/atari_pamsnet.c +++ linux/drivers/net/atari_pamsnet.c @@ -165,7 +165,7 @@ static void pamsnet_tick(unsigned long); static irqreturn_t pamsnet_intr(int irq, void *data, struct pt_regs *fp); -static struct timer_list pamsnet_timer = TIMER_INITIALIZER(pamsnet_tick, 0, 0); +static DEFINE_TIMER(pamsnet_timer, pamsnet_tick, 0, 0); #define STRAM_ADDR(a) (((a) & 0xff000000) == 0) Index: linux/drivers/net/cris/eth_v10.c =================================================================== --- linux.orig/drivers/net/cris/eth_v10.c +++ linux/drivers/net/cris/eth_v10.c @@ -384,8 +384,8 @@ static unsigned int mdio_phy_addr; /* Tr static unsigned int network_tr_ctrl_shadow = 0; /* Network speed indication. */ -static struct timer_list speed_timer = TIMER_INITIALIZER(NULL, 0, 0); -static struct timer_list clear_led_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(speed_timer, NULL, 0, 0); +static DEFINE_TIMER(clear_led_timer, NULL, 0, 0); static int current_speed; /* Speed read from transceiver */ static int current_speed_selection; /* Speed selected by user */ static unsigned long led_next_time; @@ -393,7 +393,7 @@ static int led_active; static int rx_queue_len; /* Duplex */ -static struct timer_list duplex_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(duplex_timer, NULL, 0, 0); static int full_duplex; static enum duplex current_duplex; Index: linux/drivers/net/e1000/e1000_main.c =================================================================== --- linux.orig/drivers/net/e1000/e1000_main.c +++ linux/drivers/net/e1000/e1000_main.c @@ -2262,10 +2262,10 @@ e1000_xmit_frame(struct sk_buff *skb, st if(adapter->pcix_82544) count += nr_frags; - local_irq_save(flags); + local_irq_save_nort(flags); if (!spin_trylock(&adapter->tx_lock)) { /* Collision - tell upper layer to requeue */ - local_irq_restore(flags); + local_irq_restore_nort(flags); return NETDEV_TX_LOCKED; } if(adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) Index: linux/drivers/net/hamradio/yam.c =================================================================== --- linux.orig/drivers/net/hamradio/yam.c +++ linux/drivers/net/hamradio/yam.c @@ -170,7 +170,7 @@ static char ax25_bcast[7] = static char ax25_test[7] = {'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, '1' << 1}; -static struct timer_list yam_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(yam_timer, NULL, 0, 0); /* --------------------------------------------------------------------- */ Index: linux/drivers/net/mv643xx_eth.c =================================================================== --- linux.orig/drivers/net/mv643xx_eth.c +++ linux/drivers/net/mv643xx_eth.c @@ -95,7 +95,7 @@ static char mv643xx_driver_version[] = " static void __iomem *mv643xx_eth_shared_base; /* used to protect MV643XX_ETH_SMI_REG, which is shared across ports */ -static spinlock_t mv643xx_eth_phy_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(mv643xx_eth_phy_lock); static inline u32 mv_read(int offset) { Index: linux/drivers/net/netconsole.c =================================================================== --- linux.orig/drivers/net/netconsole.c +++ linux/drivers/net/netconsole.c @@ -75,10 +75,19 @@ static void write_msg(struct console *co return; local_irq_save(flags); +#ifdef CONFIG_PREEMPT_RT + /* + * A bit hairy. Netconsole uses mutexes (indirectly) and + * thus must have interrupts enabled: + */ + local_irq_enable(); +#endif for(left = len; left; ) { frag = min(left, MAX_PRINT_CHUNK); + WARN_ON_RT(irqs_disabled()); netpoll_send_udp(&np, msg, frag); + WARN_ON_RT(irqs_disabled()); msg += frag; left -= frag; } Index: linux/drivers/net/ns83820.c =================================================================== --- linux.orig/drivers/net/ns83820.c +++ linux/drivers/net/ns83820.c @@ -1014,8 +1014,6 @@ static void do_tx_done(struct net_device struct ns83820 *dev = PRIV(ndev); u32 cmdsts, tx_done_idx, *desc; - spin_lock_irq(&dev->tx_lock); - dprintk("do_tx_done(%p)\n", ndev); tx_done_idx = dev->tx_done_idx; desc = dev->tx_descs + (tx_done_idx * DESC_SIZE); @@ -1071,7 +1069,6 @@ static void do_tx_done(struct net_device netif_start_queue(ndev); netif_wake_queue(ndev); } - spin_unlock_irq(&dev->tx_lock); } static void ns83820_cleanup_tx(struct ns83820 *dev) @@ -1372,7 +1369,9 @@ static void ns83820_do_isr(struct net_de * work has accumulated */ if ((ISR_TXDESC | ISR_TXIDLE | ISR_TXOK | ISR_TXERR) & isr) { + spin_lock_irq(&dev->tx_lock); do_tx_done(ndev); + spin_unlock_irq(&dev->tx_lock); /* Disable TxOk if there are no outstanding tx packets. */ @@ -1457,7 +1456,7 @@ static void ns83820_tx_timeout(struct ne u32 tx_done_idx, *desc; unsigned long flags; - local_irq_save(flags); + spin_lock_irqsave(&dev->tx_lock, flags); tx_done_idx = dev->tx_done_idx; desc = dev->tx_descs + (tx_done_idx * DESC_SIZE); @@ -1484,7 +1483,7 @@ static void ns83820_tx_timeout(struct ne ndev->name, tx_done_idx, dev->tx_free_idx, le32_to_cpu(desc[DESC_CMDSTS])); - local_irq_restore(flags); + spin_unlock_irqrestore(&dev->tx_lock, flags); } static void ns83820_tx_watch(unsigned long data) Index: linux/drivers/net/plip.c =================================================================== --- linux.orig/drivers/net/plip.c +++ linux/drivers/net/plip.c @@ -229,7 +229,10 @@ struct net_local { struct hh_cache *hh); spinlock_t lock; atomic_t kill_timer; - struct semaphore killed_timer_sem; + /* + * PREEMPT_RT: this isnt a mutex, it should be struct completion. + */ + struct compat_semaphore killed_timer_sem; }; static inline void enable_parport_interrupts (struct net_device *dev) Index: linux/drivers/net/ppp_async.c =================================================================== --- linux.orig/drivers/net/ppp_async.c +++ linux/drivers/net/ppp_async.c @@ -65,7 +65,7 @@ struct asyncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; Index: linux/drivers/net/ppp_synctty.c =================================================================== --- linux.orig/drivers/net/ppp_synctty.c +++ linux/drivers/net/ppp_synctty.c @@ -70,7 +70,7 @@ struct syncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ }; Index: linux/drivers/net/tulip/tulip_core.c =================================================================== --- linux.orig/drivers/net/tulip/tulip_core.c +++ linux/drivers/net/tulip/tulip_core.c @@ -1822,6 +1822,7 @@ static void __devexit tulip_remove_one ( pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device (pdev); pci_set_drvdata (pdev, NULL); /* pci_power_off (pdev, -1); */ Index: linux/drivers/oprofile/oprofilefs.c =================================================================== --- linux.orig/drivers/oprofile/oprofilefs.c +++ linux/drivers/oprofile/oprofilefs.c @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_RAW_SPINLOCK(oprofilefs_lock); static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) { Index: linux/drivers/parisc/iosapic.c =================================================================== --- linux.orig/drivers/parisc/iosapic.c +++ linux/drivers/parisc/iosapic.c @@ -215,7 +215,7 @@ static inline void iosapic_write(void __ #define IOSAPIC_IRDT_ID_EID_SHIFT 0x10 -static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(iosapic_lock); static inline void iosapic_eoi(void __iomem *addr, unsigned int data) { Index: linux/drivers/pci/hotplug/cpci_hotplug_core.c =================================================================== --- linux.orig/drivers/pci/hotplug/cpci_hotplug_core.c +++ linux/drivers/pci/hotplug/cpci_hotplug_core.c @@ -60,8 +60,8 @@ static int slots; static atomic_t extracting; int cpci_debug; static struct cpci_hp_controller *controller; -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ static int thread_finished = 1; static int enable_slot(struct hotplug_slot *slot); Index: linux/drivers/pci/hotplug/cpqphp_ctrl.c =================================================================== --- linux.orig/drivers/pci/hotplug/cpqphp_ctrl.c +++ linux/drivers/pci/hotplug/cpqphp_ctrl.c @@ -45,8 +45,8 @@ static int configure_new_function(struct u8 behind_bridge, struct resource_lists *resources); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux/drivers/pci/hotplug/ibmphp_hpc.c =================================================================== --- linux.orig/drivers/pci/hotplug/ibmphp_hpc.c +++ linux/drivers/pci/hotplug/ibmphp_hpc.c @@ -104,7 +104,7 @@ static int tid_poll; static struct semaphore sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct compat_semaphore sem_exit; // make sure polling thread goes away //---------------------------------------------------------------------------- // local function prototypes //---------------------------------------------------------------------------- Index: linux/drivers/pci/hotplug/pciehp_ctrl.c =================================================================== --- linux.orig/drivers/pci/hotplug/pciehp_ctrl.c +++ linux/drivers/pci/hotplug/pciehp_ctrl.c @@ -48,8 +48,8 @@ static int configure_new_function( struc u8 behind_bridge, struct resource_lists *resources, u8 bridge_bus, u8 bridge_dev); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ static unsigned long surprise_rm_pending; /* = 0 */ Index: linux/drivers/pci/hotplug/shpchp_ctrl.c =================================================================== --- linux.orig/drivers/pci/hotplug/shpchp_ctrl.c +++ linux/drivers/pci/hotplug/shpchp_ctrl.c @@ -47,8 +47,8 @@ static int configure_new_function( struc u8 behind_bridge, struct resource_lists *resources, u8 bridge_bus, u8 bridge_dev); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux/drivers/pci/quirks.c =================================================================== --- linux.orig/drivers/pci/quirks.c +++ linux/drivers/pci/quirks.c @@ -422,6 +422,25 @@ static void __devinit quirk_via_ioapic(s DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic ); /* + * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit. + * This leads to doubled level interrupt rates. + * Set this bit to get rid of cycle wastage. + * Otherwise uncritical. + */ +static void __devinit quirk_via_vt8237_bypass_apic_deassert(struct pci_dev *dev) +{ + u8 misc_control2; +#define BYPASS_APIC_DEASSERT 8 + + pci_read_config_byte(dev, 0x5B, &misc_control2); + if (!(misc_control2 & BYPASS_APIC_DEASSERT)) { + printk(KERN_INFO "PCI: Bypassing VIA 8237 APIC De-Assert Message\n"); + pci_write_config_byte(dev, 0x5B, misc_control2|BYPASS_APIC_DEASSERT); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); + +/* * The AMD io apic can hang the box when an apic irq is masked. * We check all revs >= B0 (yet not in the pre production!) as the bug * is currently marked NoFix @@ -507,6 +526,47 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_V DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_via_acpi ); /* + * Devices part of the VIA VT8237 don't need quirk_via_irq(). + * They also don't get confused by it, but dmesg gets quiter + * with this 'anti'-quirk. + * Here we are overly paranoic: + * we assume there might also exist via devices not part of the VT8237 + * needing quirk_via_irq(). + * This might never be the case in reality, when there is a VT8237. + */ +static unsigned int vt8237_devfn[] = { + PCI_DEVFN(15, 0), + PCI_DEVFN(15, 1), + PCI_DEVFN(16, 0), + PCI_DEVFN(16, 1), + PCI_DEVFN(16, 2), + PCI_DEVFN(16, 3), + PCI_DEVFN(16, 4), + PCI_DEVFN(16, 5), + PCI_DEVFN(17, 5), + PCI_DEVFN(17, 6), + PCI_DEVFN(18, 0) +}; +static struct pci_dev *quirk_via_irq_not[ARRAY_SIZE(vt8237_devfn)]; +static void quirk_via_irq_not_for_8237(struct pci_dev *dev) +{ + // Make sure we do this only once + if (quirk_via_irq_not[0] != NULL) + return; + + if (dev->devfn == PCI_DEVFN(0x11, 0)) { + int i, j; + for (i = 0, j = 0; i < ARRAY_SIZE(vt8237_devfn); i++) { + struct pci_dev * d; + d = pci_find_slot(dev->bus->number, vt8237_devfn[i]); + if (d != NULL) + quirk_via_irq_not[j++] = d; + } + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_irq_not_for_8237); + +/* * Via 686A/B: The PCI_INTERRUPT_LINE register for the on-chip * devices, USB0/1, AC97, MC97, and ACPI, has an unusual feature: * when written, it makes an internal connection to the PIC. @@ -519,6 +579,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_V static void quirk_via_irq(struct pci_dev *dev) { u8 irq, new_irq; + int i; + + for (i = 0; i < ARRAY_SIZE(vt8237_devfn); i++) + if (quirk_via_irq_not[i] == dev) + return; new_irq = dev->irq & 0xf; pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); Index: linux/drivers/pcmcia/ds.c =================================================================== --- linux.orig/drivers/pcmcia/ds.c +++ linux/drivers/pcmcia/ds.c @@ -424,9 +424,13 @@ static int pcmcia_device_query(struct pc { cistpl_manfid_t manf_id; cistpl_funcid_t func_id; - cistpl_vers_1_t vers1; + cistpl_vers_1_t *vers1; unsigned int i; + vers1 = kmalloc(sizeof(*vers1), GFP_KERNEL); + if (!vers1) + return -ENOMEM; + if (!pccard_read_tuple(p_dev->socket, p_dev->func, CISTPL_MANFID, &manf_id)) { p_dev->manf_id = manf_id.manf; @@ -443,23 +447,30 @@ static int pcmcia_device_query(struct pc /* rule of thumb: cards with no FUNCID, but with * common memory device geometry information, are * probably memory cards (from pcmcia-cs) */ - cistpl_device_geo_t devgeo; + cistpl_device_geo_t *devgeo; + + devgeo = kmalloc(sizeof(*devgeo), GFP_KERNEL); + if (!devgeo) { + kfree(vers1); + return -ENOMEM; + } if (!pccard_read_tuple(p_dev->socket, p_dev->func, - CISTPL_DEVICE_GEO, &devgeo)) { + CISTPL_DEVICE_GEO, devgeo)) { ds_dbg(0, "mem device geometry probably means " "FUNCID_MEMORY\n"); p_dev->func_id = CISTPL_FUNCID_MEMORY; p_dev->has_func_id = 1; } + kfree(devgeo); } if (!pccard_read_tuple(p_dev->socket, p_dev->func, CISTPL_VERS_1, - &vers1)) { - for (i=0; i < vers1.ns; i++) { + vers1)) { + for (i=0; i < vers1->ns; i++) { char *tmp; unsigned int length; - tmp = vers1.str + vers1.ofs[i]; + tmp = vers1->str + vers1->ofs[i]; length = strlen(tmp) + 1; if ((length < 3) || (length > 255)) @@ -475,6 +486,7 @@ static int pcmcia_device_query(struct pc } } + kfree(vers1); return 0; } Index: linux/drivers/s390/char/vmlogrdr.c =================================================================== --- linux.orig/drivers/s390/char/vmlogrdr.c +++ linux/drivers/s390/char/vmlogrdr.c @@ -145,7 +145,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "EREP", .minor_num = 0, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[0].priv_lock), .autorecording = 1, .autopurge = 1, }, @@ -154,7 +154,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "ACCOUNT", .minor_num = 1, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[1].priv_lock), .autorecording = 1, .autopurge = 1, }, @@ -163,7 +163,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "SYMPTOM", .minor_num = 2, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[2].priv_lock), .autorecording = 1, .autopurge = 1, } Index: linux/drivers/s390/cio/cmf.c =================================================================== --- linux.orig/drivers/s390/cio/cmf.c +++ linux/drivers/s390/cio/cmf.c @@ -297,7 +297,7 @@ struct cmb_area { }; static struct cmb_area cmb_area = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cmb_area.lock), .list = LIST_HEAD_INIT(cmb_area.list), .num_channels = 1024, }; Index: linux/drivers/sbus/char/aurora.c =================================================================== --- linux.orig/drivers/sbus/char/aurora.c +++ linux/drivers/sbus/char/aurora.c @@ -871,8 +871,7 @@ static irqreturn_t aurora_interrupt(int #ifdef AURORA_INT_DEBUG static void aurora_timer (unsigned long ignored); -static struct timer_list aurora_poll_timer = - TIMER_INITIALIZER(aurora_timer, 0, 0); +static DEFINE_TIMER(aurora_poll_timer, aurora_timer, 0, 0); static void aurora_timer (unsigned long ignored) Index: linux/drivers/sbus/char/cpwatchdog.c =================================================================== --- linux.orig/drivers/sbus/char/cpwatchdog.c +++ linux/drivers/sbus/char/cpwatchdog.c @@ -155,7 +155,7 @@ struct wd_device { }; static struct wd_device wd_dev = { - 0, SPIN_LOCK_UNLOCKED, 0, 0, 0, 0, + 0, SPIN_LOCK_UNLOCKED(wd_dev.lock), 0, 0, 0, 0, }; static struct timer_list wd_timer; Index: linux/drivers/scsi/aacraid/aacraid.h =================================================================== --- linux.orig/drivers/scsi/aacraid/aacraid.h +++ linux/drivers/scsi/aacraid/aacraid.h @@ -702,7 +702,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct compat_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -769,7 +769,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct compat_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ Index: linux/drivers/scsi/aic7xxx/aic79xx_osm.h =================================================================== --- linux.orig/drivers/scsi/aic7xxx/aic79xx_osm.h +++ linux/drivers/scsi/aic7xxx/aic79xx_osm.h @@ -512,9 +512,9 @@ struct ahd_platform_data { struct timer_list completeq_timer; struct timer_list reset_timer; struct timer_list stats_timer; - struct semaphore eh_sem; - struct semaphore dv_sem; - struct semaphore dv_cmd_sem; /* XXX This needs to be in + struct compat_semaphore eh_sem; + struct compat_semaphore dv_sem; + struct compat_semaphore dv_cmd_sem; /* XXX This needs to be in * the target struct */ struct scsi_device *dv_scsi_dev; Index: linux/drivers/scsi/aic7xxx/aic7xxx_osm.h =================================================================== --- linux.orig/drivers/scsi/aic7xxx/aic7xxx_osm.h +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h @@ -395,7 +395,7 @@ struct ahc_platform_data { spinlock_t spin_lock; u_int qfrozen; struct timer_list reset_timer; - struct semaphore eh_sem; + struct compat_semaphore eh_sem; struct Scsi_Host *host; /* pointer to scsi host */ #define AHC_LINUX_NOIRQ ((uint32_t)~0) uint32_t irq; /* IRQ for this adapter */ Index: linux/drivers/scsi/ch.c =================================================================== --- linux.orig/drivers/scsi/ch.c +++ linux/drivers/scsi/ch.c @@ -117,7 +117,7 @@ typedef struct { } scsi_changer; static LIST_HEAD(ch_devlist); -static spinlock_t ch_devlist_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(ch_devlist_lock); static int ch_devcount; static struct scsi_driver ch_template = Index: linux/drivers/scsi/pluto.c =================================================================== --- linux.orig/drivers/scsi/pluto.c +++ linux/drivers/scsi/pluto.c @@ -95,8 +95,7 @@ int __init pluto_detect(Scsi_Host_Templa int i, retry, nplutos; fc_channel *fc; Scsi_Device dev; - struct timer_list fc_timer = - TIMER_INITIALIZER(pluto_detect_timeout, 0, 0); + struct DEFINE_TIMER(fc_timer, pluto_detect_timeout, 0, 0); tpnt->proc_name = "pluto"; fcscount = 0; Index: linux/drivers/scsi/qla2xxx/qla_def.h =================================================================== --- linux.orig/drivers/scsi/qla2xxx/qla_def.h +++ linux/drivers/scsi/qla2xxx/qla_def.h @@ -2273,7 +2273,7 @@ typedef struct scsi_qla_host { spinlock_t mbx_reg_lock; /* Mbx Cmd Register Lock */ struct semaphore mbx_cmd_sem; /* Serialialize mbx access */ - struct semaphore mbx_intr_sem; /* Used for completion notification */ + struct compat_semaphore mbx_intr_sem; /* Used for completion notification */ uint32_t mbx_flags; #define MBX_IN_PROGRESS BIT_0 Index: linux/drivers/scsi/qla2xxx/qla_os.c =================================================================== --- linux.orig/drivers/scsi/qla2xxx/qla_os.c +++ linux/drivers/scsi/qla2xxx/qla_os.c @@ -2097,12 +2097,13 @@ qla2x00_free_sp_pool( scsi_qla_host_t *h static int qla2x00_do_dpc(void *data) { - DECLARE_MUTEX_LOCKED(sem); + DECLARE_MUTEX(sem); scsi_qla_host_t *ha; fc_port_t *fcport; uint8_t status; uint16_t next_loopid; + down(&sem); ha = (scsi_qla_host_t *)data; lock_kernel(); Index: linux/drivers/scsi/scsi.c =================================================================== --- linux.orig/drivers/scsi/scsi.c +++ linux/drivers/scsi/scsi.c @@ -771,10 +771,10 @@ void __scsi_done(struct scsi_cmnd *cmd) * It is a per-CPU queue, so we just disable local interrupts * and need no spinlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -791,9 +791,9 @@ static void scsi_softirq(struct softirq_ int disposition; LIST_HEAD(local_q); - local_irq_disable(); + raw_local_irq_disable(); list_splice_init(&__get_cpu_var(scsi_done_q), &local_q); - local_irq_enable(); + raw_local_irq_enable(); while (!list_empty(&local_q)) { struct scsi_cmnd *cmd = list_entry(local_q.next, @@ -1272,11 +1272,11 @@ static int scsi_cpu_notify(struct notifi switch(action) { case CPU_DEAD: /* Drain scsi_done_q. */ - local_irq_disable(); + raw_local_irq_disable(); list_splice_init(&per_cpu(scsi_done_q, cpu), &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_enable(); + raw_local_irq_enable(); break; default: break; Index: linux/drivers/serial/cpm_uart/cpm_uart_core.c =================================================================== --- linux.orig/drivers/serial/cpm_uart/cpm_uart_core.c +++ linux/drivers/serial/cpm_uart/cpm_uart_core.c @@ -908,7 +908,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SMC1_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SMC1].port.lock), }, .flags = FLAG_SMC, .tx_nrfifos = TX_NUM_FIFO, @@ -922,7 +922,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SMC2_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SMC2].port.lock), }, .flags = FLAG_SMC, .tx_nrfifos = TX_NUM_FIFO, @@ -939,7 +939,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC1_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC1].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -953,7 +953,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC2_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC2].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -967,7 +967,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC3_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC3].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -981,7 +981,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC4_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC4].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, Index: linux/drivers/serial/s3c2410.c =================================================================== --- linux.orig/drivers/serial/s3c2410.c +++ linux/drivers/serial/s3c2410.c @@ -970,7 +970,7 @@ static struct uart_driver s3c24xx_uart_d static struct s3c24xx_uart_port s3c24xx_serial_ports[NR_PORTS] = { [0] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[0].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX0, .uartclk = 0, @@ -982,7 +982,7 @@ static struct s3c24xx_uart_port s3c24xx_ }, [1] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[1].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX1, .uartclk = 0, @@ -996,7 +996,7 @@ static struct s3c24xx_uart_port s3c24xx_ [2] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[2].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX2, .uartclk = 0, Index: linux/drivers/usb/core/devio.c =================================================================== --- linux.orig/drivers/usb/core/devio.c +++ linux/drivers/usb/core/devio.c @@ -274,10 +274,11 @@ static void async_completed(struct urb * struct async *as = (struct async *)urb->context; struct dev_state *ps = as->ps; struct siginfo sinfo; + unsigned long flags; - spin_lock(&ps->lock); - list_move_tail(&as->asynclist, &ps->async_completed); - spin_unlock(&ps->lock); + spin_lock_irqsave(&ps->lock, flags); + list_move_tail(&as->asynclist, &ps->async_completed); + spin_unlock_irqrestore(&ps->lock, flags); if (as->signr) { sinfo.si_signo = as->signr; sinfo.si_errno = as->urb->status; Index: linux/drivers/usb/core/hcd.c =================================================================== --- linux.orig/drivers/usb/core/hcd.c +++ linux/drivers/usb/core/hcd.c @@ -506,13 +506,11 @@ error: } /* any errors get returned through the urb completion */ - local_irq_save (flags); - spin_lock (&urb->lock); + spin_lock_irqsave(&urb->lock, flags); if (urb->status == -EINPROGRESS) urb->status = status; - spin_unlock (&urb->lock); + spin_unlock_irqrestore(&urb->lock, flags); usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_restore (flags); return 0; } @@ -540,8 +538,7 @@ void usb_hcd_poll_rh_status(struct usb_h if (length > 0) { /* try to complete the status urb */ - local_irq_save (flags); - spin_lock(&hcd_root_hub_lock); + spin_lock_irqsave(&hcd_root_hub_lock, flags); urb = hcd->status_urb; if (urb) { spin_lock(&urb->lock); @@ -557,14 +554,13 @@ void usb_hcd_poll_rh_status(struct usb_h spin_unlock(&urb->lock); } else length = 0; - spin_unlock(&hcd_root_hub_lock); + spin_unlock_irqrestore(&hcd_root_hub_lock, flags); /* local irqs are always blocked in completions */ if (length > 0) usb_hcd_giveback_urb (hcd, urb, NULL); else hcd->poll_pending = 1; - local_irq_restore (flags); } /* The USB 2.0 spec says 256 ms. This is close enough and won't @@ -647,17 +643,15 @@ static int usb_rh_urb_dequeue (struct us } else { /* Status URB */ if (!hcd->uses_new_polling) del_timer_sync (&hcd->rh_timer); - local_irq_disable (); - spin_lock (&hcd_root_hub_lock); + spin_lock_irq(&hcd_root_hub_lock); if (urb == hcd->status_urb) { hcd->status_urb = NULL; urb->hcpriv = NULL; } else urb = NULL; /* wasn't fully queued */ - spin_unlock (&hcd_root_hub_lock); + spin_unlock_irq(&hcd_root_hub_lock); if (urb) usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_enable (); } return 0; @@ -1367,15 +1361,13 @@ hcd_endpoint_disable (struct usb_device WARN_ON (!HC_IS_RUNNING (hcd->state) && hcd->state != HC_STATE_HALT && udev->state != USB_STATE_NOTATTACHED); - local_irq_disable (); - /* FIXME move most of this into message.c as part of its * endpoint disable logic */ /* ep is already gone from udev->ep_{in,out}[]; no more submits */ rescan: - spin_lock (&hcd_data_lock); + spin_lock_irq(&hcd_data_lock); list_for_each_entry (urb, &ep->urb_list, urb_list) { int tmp; @@ -1388,13 +1380,13 @@ rescan: if (urb->status != -EINPROGRESS) continue; usb_get_urb (urb); - spin_unlock (&hcd_data_lock); + spin_unlock_irq(&hcd_data_lock); - spin_lock (&urb->lock); + spin_lock_irq(&urb->lock); tmp = urb->status; if (tmp == -EINPROGRESS) urb->status = -ESHUTDOWN; - spin_unlock (&urb->lock); + spin_unlock_irq(&urb->lock); /* kick hcd unless it's already returning this */ if (tmp == -EINPROGRESS) { @@ -1417,8 +1409,7 @@ rescan: /* list contents may have changed */ goto rescan; } - spin_unlock (&hcd_data_lock); - local_irq_enable (); + spin_unlock_irq(&hcd_data_lock); /* synchronize with the hardware, so old configuration state * clears out immediately (and will be freed). Index: linux/drivers/usb/core/message.c =================================================================== --- linux.orig/drivers/usb/core/message.c +++ linux/drivers/usb/core/message.c @@ -225,8 +225,9 @@ static void sg_clean (struct usb_sg_requ static void sg_complete (struct urb *urb, struct pt_regs *regs) { struct usb_sg_request *io = (struct usb_sg_request *) urb->context; + unsigned long flags; - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); /* In 2.5 we require hcds' endpoint queues not to progress after fault * reports, until the completion callback (this!) returns. That lets @@ -260,7 +261,7 @@ static void sg_complete (struct urb *urb * unlink pending urbs so they won't rx/tx bad data. * careful: unlink can sometimes be synchronous... */ - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); for (i = 0, found = 0; i < io->entries; i++) { if (!io->urbs [i] || !io->urbs [i]->dev) continue; @@ -273,7 +274,7 @@ static void sg_complete (struct urb *urb } else if (urb == io->urbs [i]) found = 1; } - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); } urb->dev = NULL; @@ -283,7 +284,7 @@ static void sg_complete (struct urb *urb if (!io->count) complete (&io->complete); - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); } Index: linux/drivers/usb/host/hc_crisv10.c =================================================================== --- linux.orig/drivers/usb/host/hc_crisv10.c +++ linux/drivers/usb/host/hc_crisv10.c @@ -178,8 +178,8 @@ static __u8 root_hub_hub_des[] = 0xff /* __u8 PortPwrCtrlMask; *** 7 ports max *** */ }; -static struct timer_list bulk_start_timer = TIMER_INITIALIZER(NULL, 0, 0); -static struct timer_list bulk_eot_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(bulk_start_timer, NULL, 0, 0); +static DEFINE_TIMER(bulk_eot_timer, NULL, 0, 0); /* We want the start timer to expire before the eot timer, because the former might start traffic, thus making it unnecessary for the latter to time out. */ Index: linux/drivers/usb/net/usbnet.c =================================================================== --- linux.orig/drivers/usb/net/usbnet.c +++ linux/drivers/usb/net/usbnet.c @@ -3490,6 +3490,8 @@ static void tx_complete (struct urb *urb urb->dev = NULL; entry->state = tx_done; + spin_lock_rt(&dev->txq.lock); + spin_unlock_rt(&dev->txq.lock); defer_bh (dev, skb); } Index: linux/drivers/usb/storage/usb.c =================================================================== --- linux.orig/drivers/usb/storage/usb.c +++ linux/drivers/usb/storage/usb.c @@ -317,6 +317,7 @@ static int usb_stor_control_thread(void if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) { US_DEBUGP("-- exiting\n"); up(&(us->dev_semaphore)); + up(&us->sema); break; } Index: linux/drivers/usb/storage/usb.h =================================================================== --- linux.orig/drivers/usb/storage/usb.h +++ linux/drivers/usb/storage/usb.h @@ -171,7 +171,7 @@ struct us_data { dma_addr_t iobuf_dma; /* mutual exclusion and synchronization structures */ - struct semaphore sema; /* to sleep thread on */ + struct compat_semaphore sema; /* to sleep thread on */ struct completion notify; /* thread begin/end */ wait_queue_head_t delay_wait; /* wait during scan, reset */ Index: linux/drivers/video/backlight/corgi_bl.c =================================================================== --- linux.orig/drivers/video/backlight/corgi_bl.c +++ linux/drivers/video/backlight/corgi_bl.c @@ -29,7 +29,7 @@ static int corgibl_powermode = FB_BLANK_UNBLANK; static int current_intensity = 0; static int corgibl_limit = 0; -static spinlock_t bl_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(bl_lock); static void corgibl_send_intensity(int intensity) { Index: linux/drivers/video/console/fbcon.c =================================================================== --- linux.orig/drivers/video/console/fbcon.c +++ linux/drivers/video/console/fbcon.c @@ -1047,7 +1047,6 @@ static void fbcon_clear(struct vc_data * { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1076,10 +1075,11 @@ static void fbcon_putcs(struct vc_data * struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -2763,6 +2763,7 @@ static const struct consw fb_con = { .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { Index: linux/drivers/video/console/vgacon.c =================================================================== --- linux.orig/drivers/video/console/vgacon.c +++ linux/drivers/video/console/vgacon.c @@ -53,7 +53,7 @@ #include