Index: linux/arch/arm/mach-pxa/corgi_ssp.c =================================================================== --- linux.orig/arch/arm/mach-pxa/corgi_ssp.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/arm/mach-pxa/corgi_ssp.c 2005-07-29 12:06:11.000000000 -0300 @@ -22,7 +22,7 @@ #include #include -static spinlock_t corgi_ssp_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(corgi_ssp_lock); static struct ssp_dev corgi_ssp_dev; static struct ssp_state corgi_ssp_state; Index: linux/arch/i386/Kconfig =================================================================== --- linux.orig/arch/i386/Kconfig 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/Kconfig 2005-07-29 13:26:39.000000000 -0300 @@ -368,16 +368,6 @@ default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 default "6" if MK7 || MK8 || MPENTIUMM -config RWSEM_GENERIC_SPINLOCK - bool - depends on M386 - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -434,7 +424,7 @@ config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on (MCYRIXIII || MK7) && !PREEMPT_RT default y config X86_OOSTORE @@ -512,6 +502,20 @@ source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + default y + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) @@ -547,6 +551,16 @@ depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) default y +config X86_IOAPIC_FAST + bool "enhanced IO-APIC support" + depends on X86_IO_APIC + default y + help + this option will activate further optimizations in the IO-APIC + code. NOTE: this is experimental code, and disabled by default. + Symptoms of non-working systems are boot-time lockups, stray or + screaming interrupts and other interrupt related weirdnesses. + config X86_VISWS_APIC bool depends on X86_VISWS @@ -912,7 +926,7 @@ config REGPARM bool "Use register arguments (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && !MCOUNT default n help Compile the kernel with -mregparm=3. This uses a different ABI Index: linux/arch/i386/Kconfig.debug =================================================================== --- linux.orig/arch/i386/Kconfig.debug 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/Kconfig.debug 2005-07-29 12:06:11.000000000 -0300 @@ -18,6 +18,7 @@ config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + default y config KPROBES bool "Kprobes" @@ -32,6 +33,7 @@ config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL + default y help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. Index: linux/arch/i386/boot/compressed/misc.c =================================================================== --- linux.orig/arch/i386/boot/compressed/misc.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/boot/compressed/misc.c 2005-07-29 12:06:11.000000000 -0300 @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_MCOUNT +void notrace mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -112,7 +118,7 @@ #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; Index: linux/arch/i386/kernel/Makefile =================================================================== --- linux.orig/arch/i386/kernel/Makefile 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/Makefile 2005-07-29 12:06:11.000000000 -0300 @@ -4,11 +4,12 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ doublefault.o quirks.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ @@ -20,6 +21,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o Index: linux/arch/i386/kernel/apic.c =================================================================== --- linux.orig/arch/i386/kernel/apic.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/apic.c 2005-07-29 12:06:11.000000000 -0300 @@ -566,9 +566,9 @@ if (!cpu_has_apic || !enabled_via_apicbase) return; - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_PM @@ -612,9 +612,9 @@ apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -626,7 +626,7 @@ if (!apic_pm_state.active) return 0; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Make sure the APICBASE points to the right address @@ -657,7 +657,7 @@ apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -854,10 +854,10 @@ ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); + set_fixmap_nocache(idx, ioapic_phys); + printk(KERN_DEBUG "faked IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); idx++; } } @@ -956,7 +956,7 @@ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Wait for IRQ0's slice: @@ -965,7 +965,7 @@ __setup_APIC_LVTT(clocks); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -1054,7 +1054,7 @@ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -1062,7 +1062,7 @@ */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __devinit setup_secondary_APIC_clock(void) @@ -1134,7 +1134,10 @@ { int cpu = smp_processor_id(); +#if 0 profile_tick(CPU_PROFILING, regs); +#endif + if (--per_cpu(prof_counter, cpu) <= 0) { /* * The multiplier may have changed since the last time we got @@ -1180,7 +1183,7 @@ * interrupt as well. Thus we cannot inline the local irq ... ] */ -fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) +fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); @@ -1189,6 +1192,8 @@ */ per_cpu(irq_stat, cpu).apic_timer_irqs++; + trace_special(regs->eip, 0, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. Index: linux/arch/i386/kernel/apm.c =================================================================== --- linux.orig/arch/i386/kernel/apm.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/apm.c 2005-07-29 12:06:11.000000000 -0300 @@ -552,9 +552,9 @@ */ #define APM_DO_CLI \ if (apm_info.allow_ints) \ - local_irq_enable(); \ + raw_local_irq_enable(); \ else \ - local_irq_disable(); + raw_local_irq_disable(); #ifdef APM_ZERO_SEGS # define APM_DECL_SEGS \ @@ -604,12 +604,12 @@ save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -647,12 +647,12 @@ save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -1201,7 +1201,7 @@ } device_suspend(PMSG_SUSPEND); - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ @@ -1217,14 +1217,14 @@ */ spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); ignore_normal_resume = 1; restore_processor_state(); - local_irq_disable(); + raw_local_irq_disable(); write_seqlock(&xtime_lock); spin_lock(&i8253_lock); reinit_timer(); @@ -1239,7 +1239,7 @@ apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(APM_NORMAL_RESUME, NULL); @@ -1258,22 +1258,22 @@ { int err; - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ write_seqlock(&xtime_lock); /* If needed, notify drivers here */ get_time_diff(); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) apm_error("standby", err); - local_irq_disable(); + raw_local_irq_disable(); device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); } static apm_event_t get_event(void) Index: linux/arch/i386/kernel/cpu/mtrr/cyrix.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/cyrix.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/cpu/mtrr/cyrix.c 2005-07-29 12:06:11.000000000 -0300 @@ -17,7 +17,7 @@ arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ /* Save flags and disable interrupts */ - local_irq_save(flags); + raw_local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -28,7 +28,7 @@ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); + raw_local_irq_restore(flags); shift = ((unsigned char *) base)[1] & 0x0f; *base >>= PAGE_SHIFT; Index: linux/arch/i386/kernel/cpu/mtrr/generic.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/generic.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/cpu/mtrr/generic.c 2005-07-29 12:06:11.000000000 -0300 @@ -241,7 +241,7 @@ static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -303,14 +303,14 @@ unsigned long mask, count; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); /* Actually set the state */ mask = set_mtrr_state(deftype_lo,deftype_hi); post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { @@ -335,7 +335,7 @@ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); if (size == 0) { @@ -350,7 +350,7 @@ } post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) Index: linux/arch/i386/kernel/cpu/mtrr/main.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/main.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/cpu/mtrr/main.c 2005-07-29 12:06:11.000000000 -0300 @@ -146,7 +146,7 @@ struct set_mtrr_data *data = info; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); atomic_dec(&data->count); while(!atomic_read(&data->gate)) @@ -164,7 +164,7 @@ cpu_relax(); atomic_dec(&data->count); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -225,7 +225,7 @@ if (smp_call_function(ipi_handler, &data, 1, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); - local_irq_save(flags); + raw_local_irq_save(flags); while(atomic_read(&data.count)) cpu_relax(); @@ -259,7 +259,7 @@ while(atomic_read(&data.count)) cpu_relax(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** Index: linux/arch/i386/kernel/cpu/mtrr/state.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/state.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/cpu/mtrr/state.c 2005-07-29 12:06:11.000000000 -0300 @@ -12,7 +12,7 @@ unsigned int cr0; /* Disable interrupts locally */ - local_irq_save(ctxt->flags); + raw_local_irq_save(ctxt->flags); if (use_intel() || is_cpu(CYRIX)) { @@ -73,6 +73,6 @@ write_cr4(ctxt->cr4val); } /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); + raw_local_irq_restore(ctxt->flags); } Index: linux/arch/i386/kernel/entry.S =================================================================== --- linux.orig/arch/i386/kernel/entry.S 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/entry.S 2005-07-29 12:06:11.000000000 -0300 @@ -76,10 +76,10 @@ VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +# define preempt_stop cli #else -#define preempt_stop -#define resume_kernel restore_nocheck +# define preempt_stop +# define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -160,14 +160,17 @@ #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) cli + cmpl $0, kernel_preemption + jz restore_nocheck cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck + cli call preempt_schedule_irq jmp need_resched #endif @@ -200,6 +203,11 @@ pushl %eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -213,6 +221,11 @@ movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -225,6 +238,11 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) # system call tracing in operation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -254,6 +272,17 @@ cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS restore_nocheck: +#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE) + pushl %eax +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + call trace_irqs_on +#endif +#ifdef CONFIG_LATENCY_TRACE + call sys_ret +#endif + popl %eax +#endif +restore_nocheck_nmi: RESTORE_REGS addl $4, %esp 1: iret @@ -297,18 +326,22 @@ # perform work that needs to be done immediately before resumption ALIGN work_pending: - testb $_TIF_NEED_RESCHED, %cl + testb $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %cl jz work_notifysig work_resched: - call schedule - cli # make sure we don't miss an interrupt + cli + call __schedule +#ifdef CONFIG_PREEMPT_RT + call local_irq_enable_noresched +#endif + # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testb $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %cl jnz work_resched work_notifysig: # deal with pending signals and @@ -348,6 +381,11 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + pushl %eax + call trace_irqs_on + popl %eax +#endif sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -409,9 +447,16 @@ vector=vector+1 .endr +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel; +#else +# define TRACE_IRQS_OFF +#endif + ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -420,6 +465,7 @@ ENTRY(name) \ pushl $nr-256; \ SAVE_ALL \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; @@ -549,7 +595,7 @@ xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_nmi nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) Index: linux/arch/i386/kernel/i386_ksyms.c =================================================================== --- linux.orig/arch/i386/kernel/i386_ksyms.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/i386_ksyms.c 2005-07-29 12:17:55.000000000 -0300 @@ -6,10 +6,12 @@ /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); @@ -28,8 +30,10 @@ #ifdef CONFIG_SMP extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); +#ifdef CONFIG_ASM_SEMAPHORES EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); #endif +#endif EXPORT_SYMBOL(csum_partial); Index: linux/arch/i386/kernel/i8259.c =================================================================== --- linux.orig/arch/i386/kernel/i8259.c 2005-07-29 12:02:23.000000000 -0300 +++ linux/arch/i386/kernel/i8259.c 2005-07-29 12:06:11.000000000 -0300 @@ -38,7 +38,7 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -369,7 +369,7 @@ * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL }; void __init init_ISA_irqs (void) { Index: linux/arch/i386/kernel/init_task.c =================================================================== --- linux.orig/arch/i386/kernel/init_task.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/init_task.c 2005-07-29 12:06:11.000000000 -0300 @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/i386/kernel/io_apic.c =================================================================== --- linux.orig/arch/i386/kernel/io_apic.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/io_apic.c 2005-07-29 12:06:11.000000000 -0300 @@ -46,7 +46,7 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * Is the SiS APIC rmw bug present ? @@ -55,11 +55,6 @@ int sis_apic_bug = -1; /* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ @@ -128,19 +123,131 @@ } } +#ifdef CONFIG_X86_IOAPIC_FAST +# define IOAPIC_CACHE +#endif + + + +struct ioapic_data_struct { + struct sys_device dev; + int nr_registers; // # of IRQ routing registers + volatile unsigned int *base; + struct IO_APIC_route_entry *entry; +#ifdef IOAPIC_CACHE + unsigned int reg_set; + u32 cached_val[0]; +#endif +}; + +static struct ioapic_data_struct *ioapic_data[MAX_IO_APICS]; + + +static inline unsigned int __raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ +# ifdef IOAPIC_CACHE + ioapic->reg_set = reg; +# endif + ioapic->base[0] = reg; + return ioapic->base[4]; +} + + +# ifdef IOAPIC_CACHE +static void __init ioapic_cache_init(struct ioapic_data_struct *ioapic) +{ + int reg; + for (reg = 0; reg < (0x10 + 2 * ioapic->nr_registers); reg++) + ioapic->cached_val[reg] = __raw_io_apic_read(ioapic, reg); +} +# endif + + +static unsigned int raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ + unsigned int val = __raw_io_apic_read(ioapic, reg); + +# ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; +# endif + return val; +} + +static unsigned int io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ +# ifdef IOAPIC_CACHE + if (likely(!sis_apic_bug)) { + ioapic->reg_set = -1; + return ioapic->cached_val[reg]; + } +# endif + return raw_io_apic_read(ioapic, reg); +} + +static void io_apic_write(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val) +{ +# ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; + ioapic->reg_set = reg; +# endif + ioapic->base[0] = reg; + ioapic->base[4] = val; +} + + +/* + * Some systems need a POST flush or else level-triggered interrupts + * generate lots of spurious interrupts due to the POST-ed write not + * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC. + * + * It seems most systems need this - disable the optimization for now. + */ +//#ifndef CONFIG_X86_IOAPIC_FAST +# define IOAPIC_POSTFLUSH +//#endif + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index regiser + */ +static void io_apic_modify(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; + if (ioapic->reg_set != reg || sis_apic_bug) { + ioapic->reg_set = reg; +#else + if (unlikely(sis_apic_bug)) { +#endif + ioapic->base[0] = reg; + } + ioapic->base[4] = val; +#ifndef IOAPIC_POSTFLUSH + if (unlikely(sis_apic_bug)) +#endif + /* + * Force POST flush by reading: + */ + val = ioapic->base[4]; +} + static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int pin, reg; + unsigned int pin, val; + struct ioapic_data_struct *ioapic; for (;;) { pin = entry->pin; if (pin == -1) break; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); + ioapic = ioapic_data[entry->apic]; + val = io_apic_read(ioapic, 0x10 + pin*2); + val &= ~disable; + val |= enable; + io_apic_modify(ioapic, 0x10 + pin*2, val); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -148,29 +255,17 @@ } /* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) +static inline void __mask_IO_APIC_irq (unsigned int irq) { __modify_IO_APIC_irq(irq, 0x00010000, 0); } /* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) +static inline void __unmask_IO_APIC_irq (unsigned int irq) { __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -189,15 +284,15 @@ spin_unlock_irqrestore(&ioapic_lock, flags); } -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +static void clear_IO_APIC_pin(struct ioapic_data_struct *ioapic, unsigned int pin) { struct IO_APIC_route_entry entry; unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ spin_lock_irqsave(&ioapic_lock, flags); - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int*)&entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * pin); + *(((int*)&entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); if (entry.delivery_mode == dest_SMI) return; @@ -208,8 +303,8 @@ memset(&entry, 0, sizeof(entry)); entry.mask = 1; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); + io_apic_write(ioapic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(ioapic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -217,9 +312,14 @@ { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); + for (apic = 0; apic < nr_ioapics; apic++) { + struct ioapic_data_struct *ioapic = ioapic_data[apic]; +#ifdef IOAPIC_CACHE + ioapic->reg_set = -1; +#endif + for (pin = 0; pin < ioapic->nr_registers; pin++) + clear_IO_APIC_pin(ioapic, pin); + } } static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) @@ -237,7 +337,7 @@ pin = entry->pin; if (pin == -1) break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); + io_apic_write(ioapic_data[entry->apic], 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -828,7 +928,7 @@ return; for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + for (pin = 0; pin < ioapic_data[ioapic]->nr_registers; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; @@ -1071,7 +1171,7 @@ */ i = irq = 0; while (i < apic) - irq += nr_ioapic_registers[i++]; + irq += ioapic_data[i++]->nr_registers; irq += pin; /* @@ -1114,7 +1214,7 @@ int apic, idx, pin; for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (pin = 0; pin < ioapic_data[apic]->nr_registers; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) return irq_trigger(idx); @@ -1186,11 +1286,13 @@ struct IO_APIC_route_entry entry; int apic, pin, idx, irq, first_notcon = 1, vector; unsigned long flags; + struct ioapic_data_struct *ioapic; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + ioapic = ioapic_data[apic]; + for (pin = 0; pin < ioapic->nr_registers; pin++) { /* * add it to the IO-APIC irq-routing table: @@ -1247,8 +1349,8 @@ disable_8259A_irq(irq); } spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1294,8 +1396,8 @@ * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); enable_8259A_irq(0); @@ -1305,7 +1407,7 @@ { } -void __init print_IO_APIC(void) +void /*__init*/ print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1313,6 +1415,7 @@ union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; + struct ioapic_data_struct *ioapic; if (apic_verbosity == APIC_QUIET) return; @@ -1320,7 +1423,7 @@ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mpc_apicid, ioapic_data[i]->nr_registers); /* * We are a bit conservative about what we expect. We have to @@ -1329,14 +1432,14 @@ printk(KERN_INFO "testing the IO APIC.......................\n"); for (apic = 0; apic < nr_ioapics; apic++) { - + ioapic = ioapic_data[apic]; spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); + reg_00.raw = io_apic_read(ioapic, 0); + reg_01.raw = io_apic_read(ioapic, 1); if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); + reg_02.raw = io_apic_read(ioapic, 2); if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + reg_03.raw = io_apic_read(ioapic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); @@ -1407,8 +1510,8 @@ struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + *(((int *)&entry)+0) = raw_io_apic_read(ioapic, 0x10+i*2); + *(((int *)&entry)+1) = raw_io_apic_read(ioapic, 0x11+i*2); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", @@ -1454,7 +1557,7 @@ return; } -#if 0 +#if 1 static void print_APIC_bitfield (int base) { @@ -1601,9 +1704,7 @@ static void __init enable_IO_APIC(void) { - union IO_APIC_reg_01 reg_01; int i; - unsigned long flags; for (i = 0; i < PIN_MAP_SIZE; i++) { irq_2_pin[i].pin = -1; @@ -1614,16 +1715,6 @@ pirq_entries[i] = -1; /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (i = 0; i < nr_ioapics; i++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(i, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.bits.entries+1; - } - - /* * Do not trust the IO-APIC being empty at bootup */ clear_IO_APIC(); @@ -1689,6 +1780,7 @@ int i; unsigned char old_id; unsigned long flags; + struct ioapic_data_struct *ioapic; /* * Don't check I/O APIC IDs for xAPIC systems. They have @@ -1706,10 +1798,10 @@ * Set the IOAPIC ID to the value stored in the MPC table. */ for (apic = 0; apic < nr_ioapics; apic++) { - + ioapic = ioapic_data[apic]; /* Read the register 0 value */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(ioapic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic].mpc_apicid; @@ -1770,14 +1862,14 @@ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); + io_apic_write(ioapic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(ioapic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) printk("could not set ID!\n"); @@ -1801,7 +1893,7 @@ { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1865,9 +1957,11 @@ static void ack_edge_ioapic_irq(unsigned int irq) { move_irq(irq); +#if 0 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); +#endif ack_APIC_irq(); } @@ -1892,6 +1986,30 @@ return 0; /* don't check for pending */ } +#ifdef CONFIG_PREEMPT_HARDIRQS + +/* + * in the PREEMPT_HARDIRQS case we dont want to keep the local + * APIC unacked, because the prevents further interrupts from + * being handled - and with IRQ threads being delayed arbitrarily, + * that's unacceptable. So we first mask the IRQ, then ack it. + * The hardirq thread will then unmask it. + */ +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +#else + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + +#endif + static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1926,8 +2044,10 @@ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + /* mask = 1, trigger = 0 */ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + /* mask = 0, trigger = 1 */ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); spin_unlock(&ioapic_lock); } } @@ -1954,6 +2074,13 @@ return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -1993,25 +2120,25 @@ * races. */ static struct hw_interrupt_type ioapic_edge_type = { - .typename = "IO-APIC-edge", + .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, .enable = enable_edge_ioapic, .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity, }; static struct hw_interrupt_type ioapic_level_type = { - .typename = "IO-APIC-level", + .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, .enable = enable_level_ioapic, .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity, }; static inline void init_IO_APIC_traps(void) @@ -2075,13 +2202,13 @@ static void end_lapic_irq (unsigned int i) { /* nothing */ } static struct hw_interrupt_type lapic_irq_type = { - .typename = "local-APIC-edge", - .startup = NULL, /* startup_irq() not used for IRQ0 */ - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ - .enable = enable_lapic_irq, - .disable = disable_lapic_irq, - .ack = ack_lapic_irq, - .end = end_lapic_irq + .typename = "local-APIC-edge", + .startup = NULL, /* startup_irq() not used for IRQ0 */ + .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ + .enable = enable_lapic_irq, + .disable = disable_lapic_irq, + .ack = ack_lapic_irq, + .end = end_lapic_irq }; static void setup_nmi (void) @@ -2109,22 +2236,23 @@ * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static void __init unlock_ExtINT_logic(void) { int pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; unsigned long flags; + struct ioapic_data_struct *ioapic0 = ioapic_data[0]; pin = find_isa_irq_pin(8, mp_INT); if (pin == -1) return; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + *(((int *)&entry0) + 1) = io_apic_read(ioapic0, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(ioapic0, 0x10 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(ioapic0, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2137,8 +2265,8 @@ entry1.vector = 0; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); @@ -2156,11 +2284,11 @@ CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(ioapic0, pin); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2170,10 +2298,11 @@ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. */ -static inline void check_timer(void) +static void __init check_timer(void) { int pin1, pin2; int vector; + struct ioapic_data_struct *ioapic0 = ioapic_data[0]; /* * get/set the timer IRQ vector: @@ -2191,7 +2320,10 @@ */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; +#ifdef CONFIG_PREEMPT_RT + if (nmi_watchdog) +#endif + timer_ack = 1; enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); @@ -2212,7 +2344,7 @@ } return; } - clear_IO_APIC_pin(0, pin1); + clear_IO_APIC_pin(ioapic0, pin1); printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); } @@ -2237,7 +2369,7 @@ /* * Cleanup, just in case ... */ - clear_IO_APIC_pin(0, pin2); + clear_IO_APIC_pin(ioapic0, pin2); } printk(" failed.\n"); @@ -2278,6 +2410,46 @@ "report. Then try booting with the 'noapic' option"); } +void __init setup_IO_APIC_early(int _ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int size, nr_ioapic_registers; + volatile int *ioapic; + if (ioapic_data[_ioapic]) { + printk("been in %s before !!!!!\n", __FUNCTION__); + return; + } + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + _ioapic, mp_ioapics[_ioapic].mpc_apicaddr); + printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(FIX_IO_APIC_BASE_0 + _ioapic), mp_ioapics[_ioapic].mpc_apicaddr); + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + ioapic = IO_APIC_BASE(_ioapic); + spin_lock_irqsave(&ioapic_lock, flags); + ioapic[0] = 1; + reg_01.raw = ioapic[4]; + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers = reg_01.bits.entries+1; + + /* + * Initialsize ioapic_data struct: + */ + size = sizeof(struct ioapic_data_struct); +#ifdef IOAPIC_CACHE + size += 0x10 * sizeof(u32) + nr_ioapic_registers * sizeof(struct IO_APIC_route_entry); +#endif + ioapic_data[_ioapic] = alloc_bootmem(size); + memset(ioapic_data[_ioapic], 0, size); + ioapic_data[_ioapic]->nr_registers = nr_ioapic_registers; + ioapic_data[_ioapic]->base = ioapic; +#ifdef IOAPIC_CACHE + ioapic_cache_init(ioapic_data[_ioapic]); +#endif +} + /* * * IRQ's that are handled by the PIC in the MPS IOAPIC case. @@ -2325,25 +2497,22 @@ late_initcall(io_apic_bug_finalize); -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct ioapic_data_struct *data; unsigned long flags; int i; + struct ioapic_data_struct *ioapic; - data = container_of(dev, struct sysfs_ioapic_data, dev); + data = container_of(dev, struct ioapic_data_struct, dev); entry = data->entry; + + ioapic = ioapic_data[dev->id]; spin_lock_irqsave(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); + for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) { + *(((int *)entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * i); + *(((int *)entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * i); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2353,23 +2522,25 @@ static int ioapic_resume(struct sys_device *dev) { struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct ioapic_data_struct *data; unsigned long flags; union IO_APIC_reg_00 reg_00; int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); + struct ioapic_data_struct *ioapic; + + data = container_of(dev, struct ioapic_data_struct, dev); entry = data->entry; + ioapic = ioapic_data[dev->id]; spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); + reg_00.raw = io_apic_read(ioapic, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; - io_apic_write(dev->id, 0, reg_00.raw); + io_apic_write(ioapic, 0, reg_00.raw); } - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); + for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) { + io_apic_write(ioapic, 0x11+2*i, *(((int *)entry)+1)); + io_apic_write(ioapic, 0x10+2*i, *(((int *)entry)+0)); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2392,21 +2563,20 @@ return error; for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { + size = ioapic_data[i]->nr_registers * sizeof(struct IO_APIC_route_entry); + ioapic_data[i]->entry = kmalloc(size, GFP_KERNEL); + if (!ioapic_data[i]->entry) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); - dev = &mp_ioapic_data[i]->dev; + memset(ioapic_data[i]->entry, 0, size); + dev = &ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; error = sysdev_register(dev); if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; + kfree(ioapic_data[i]->entry); + ioapic_data[i]->entry = NULL; printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } @@ -2423,13 +2593,14 @@ #ifdef CONFIG_ACPI_BOOT -int __init io_apic_get_unique_id (int ioapic, int apic_id) +int __init io_apic_get_unique_id (int apic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; physid_mask_t tmp; unsigned long flags; int i = 0; + struct ioapic_data_struct *ioapic = ioapic_data[apic]; /* * The P4 platform supports up to 256 APIC IDs on two separate APIC @@ -2449,7 +2620,7 @@ if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); + "%d\n", apic, apic_id, reg_00.bits.ID); apic_id = reg_00.bits.ID; } @@ -2468,7 +2639,7 @@ panic("Max apic_id exceeded!\n"); printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); + "trying %d\n", apic, apic_id, i); apic_id = i; } @@ -2486,50 +2657,50 @@ /* Sanity check */ if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + panic("IOAPIC[%d]: Unable change apic_id!\n", apic); } apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + "IOAPIC[%d]: Assigned apic_id %d\n", apic, apic_id); return apic_id; } -int __init io_apic_get_version (int ioapic) +int __init io_apic_get_version (int apic) { union IO_APIC_reg_01 reg_01; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); + reg_01.raw = io_apic_read(ioapic_data[apic], 1); spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } -int __init io_apic_get_redir_entries (int ioapic) +int __init io_apic_get_redir_entries (int apic) { union IO_APIC_reg_01 reg_01; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); + reg_01.raw = io_apic_read(ioapic_data[apic], 1); spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing (int apic, int pin, int irq, int edge_level, int active_high_low) { struct IO_APIC_route_entry entry; unsigned long flags; - + struct ioapic_data_struct *ioapic = ioapic_data[apic]; if (!IO_APIC_IRQ(irq)) { printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); + apic); return -EINVAL; } @@ -2552,18 +2723,18 @@ * IRQs < 16 are already in the irq_2_pin[] map */ if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + add_pin_to_irq(irq, apic, pin); entry.vector = assign_irq_vector(irq); apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", apic, + mp_ioapics[apic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); - if (!ioapic && (irq < 16)) + if (!apic && (irq < 16)) disable_8259A_irq(irq); spin_lock_irqsave(&ioapic_lock, flags); Index: linux/arch/i386/kernel/irq.c =================================================================== --- linux.orig/arch/i386/kernel/irq.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/irq.c 2005-07-29 12:06:11.000000000 -0300 @@ -51,7 +51,7 @@ * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { /* high bits used in ret_from_ code */ int irq = regs->orig_eax & 0xff; @@ -59,8 +59,12 @@ union irq_ctx *curctx, *irqctx; u32 *isp; #endif - irq_enter(); +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -69,7 +73,7 @@ __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } @@ -173,7 +177,7 @@ if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); if (local_softirq_pending()) { curctx = current_thread_info(); @@ -194,7 +198,7 @@ ); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); @@ -224,8 +228,10 @@ } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + irq_desc_t *desc = irq_desc + i; + + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ",i); @@ -235,15 +241,28 @@ for_each_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %-14s", desc->handler->typename); +#define F(x,c) ((desc->status & x) ? c : '.') + seq_printf(p, " [%c%c%c%c%c%c%c%c%c%c/", + F(IRQ_INPROGRESS, 'I'), + F(IRQ_DISABLED, 'D'), + F(IRQ_PENDING, 'P'), + F(IRQ_REPLAY, 'R'), + F(IRQ_AUTODETECT, 'A'), + F(IRQ_WAITING, 'W'), + F(IRQ_LEVEL, 'L'), + F(IRQ_MASKED, 'M'), + F(IRQ_PER_CPU, 'C'), + F(IRQ_NODELAY, 'N')); +#undef F + seq_printf(p, "%3d]", desc->irqs_unhandled); seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_cpu(j) Index: linux/arch/i386/kernel/mcount-wrapper.S =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/arch/i386/kernel/mcount-wrapper.S 2005-07-29 12:06:11.000000000 -0300 @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + Index: linux/arch/i386/kernel/microcode.c =================================================================== --- linux.orig/arch/i386/kernel/microcode.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/microcode.c 2005-07-29 12:06:11.000000000 -0300 @@ -109,7 +109,7 @@ #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); Index: linux/arch/i386/kernel/mpparse.c =================================================================== --- linux.orig/arch/i386/kernel/mpparse.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/mpparse.c 2005-07-29 12:29:18.000000000 -0300 @@ -261,6 +261,7 @@ return; } mp_ioapics[nr_ioapics] = *m; + setup_IO_APIC_early(nr_ioapics); nr_ioapics++; } @@ -911,7 +912,7 @@ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; mp_ioapics[idx].mpc_apicaddr = address; - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + setup_IO_APIC_early(idx); if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); else Index: linux/arch/i386/kernel/nmi.c =================================================================== --- linux.orig/arch/i386/kernel/nmi.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/nmi.c 2005-07-29 12:34:20.000000000 -0300 @@ -35,7 +35,7 @@ unsigned int nmi_watchdog = NMI_NONE; extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 10000; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); @@ -113,8 +113,8 @@ for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + raw_local_irq_enable(); + mdelay((100*1000)/nmi_hz); // wait 100 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP @@ -135,7 +135,7 @@ /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; + nmi_hz = 10000; return 0; } @@ -478,13 +478,39 @@ */ for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} +static DEFINE_RAW_SPINLOCK(nmi_print_lock); +void notrace nmi_watchdog_tick (struct pt_regs * regs) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -494,14 +520,39 @@ sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + profile_tick(CPU_PROFILING, regs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk("NMI show regs on CPU#%d:\n", cpu); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } + if (last_irq_sums[cpu] == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + bust_spinlocks(1); + spin_lock(&nmi_print_lock); + printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) + if (i != cpu) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; Index: linux/arch/i386/kernel/process.c =================================================================== --- linux.orig/arch/i386/kernel/process.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/process.c 2005-07-29 12:06:11.000000000 -0300 @@ -102,12 +102,13 @@ void default_idle(void) { if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } else { + raw_local_irq_enable(); cpu_relax(); } } @@ -124,7 +125,7 @@ { int oldval; - local_irq_enable(); + raw_local_irq_enable(); /* * Deal with another CPU just having chosen a thread to @@ -139,7 +140,7 @@ "testl %0, %1;" "rep; nop;" "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + : : "i"(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), "m" (current_thread_info()->flags)); clear_thread_flag(TIF_POLLING_NRFLAG); } else { @@ -185,7 +186,9 @@ /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -201,9 +204,13 @@ play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - schedule(); + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); } } @@ -244,16 +251,16 @@ */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { set_thread_flag(TIF_POLLING_NRFLAG); do { __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); - } while (!need_resched()); + } while (!need_resched() && !need_resched_delayed()); clear_thread_flag(TIF_POLLING_NRFLAG); } } @@ -384,11 +391,16 @@ /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + int cpu; + struct tss_struct *tss; + void *io_bitmap_ptr = t->io_bitmap_ptr; - kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + mb(); + kfree(io_bitmap_ptr); + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); /* * Careful, clear this in the TSS too: */ @@ -701,10 +713,10 @@ /* * Restore %fs and %gs if needed. */ - if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) { + if (prev->fs | next->fs) loadsegment(fs, next->fs); + if (prev->gs | next->gs) loadsegment(gs, next->gs); - } /* * Now maybe reload the debug registers Index: linux/arch/i386/kernel/reboot.c =================================================================== --- linux.orig/arch/i386/kernel/reboot.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/reboot.c 2005-07-29 12:06:11.000000000 -0300 @@ -194,7 +194,7 @@ { unsigned long flags; - local_irq_disable(); + raw_local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST routine will recognize as telling it to do a proper reboot. (Well Index: linux/arch/i386/kernel/semaphore.c =================================================================== --- linux.orig/arch/i386/kernel/semaphore.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/semaphore.c 2005-07-29 12:06:11.000000000 -0300 @@ -16,6 +16,7 @@ #include #include #include +#include #include /* @@ -49,12 +50,12 @@ * we cannot lose wakeup events. */ -static fastcall void __attribute_used__ __up(struct semaphore *sem) +static fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } -static fastcall void __attribute_used__ __sched __down(struct semaphore * sem) +static fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -91,7 +92,7 @@ tsk->state = TASK_RUNNING; } -static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem) +static fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -154,7 +155,7 @@ * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem) +static fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -190,15 +191,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" +".globl __compat_down_failed\n" +"__compat_down_failed:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down\n\t" + "call __compat_down\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -211,15 +212,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" +".globl __compat_down_failed_interruptible\n" +"__compat_down_failed_interruptible:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_interruptible\n\t" + "call __compat_down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -232,15 +233,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" +".globl __compat_down_failed_trylock\n" +"__compat_down_failed_trylock:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_trylock\n\t" + "call __compat_down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -253,45 +254,20 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" +".globl __compat_up_wakeup\n" +"__compat_up_wakeup:\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" - "call __up\n\t" + "call __compat_up\n\t" "popl %ecx\n\t" "popl %edx\n\t" "ret" ); -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); +int fastcall compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + +EXPORT_SYMBOL(compat_sem_is_locked); -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif Index: linux/arch/i386/kernel/signal.c =================================================================== --- linux.orig/arch/i386/kernel/signal.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/signal.c 2005-07-29 12:06:11.000000000 -0300 @@ -599,6 +599,13 @@ int signr; struct k_sigaction ka; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux/arch/i386/kernel/smp.c =================================================================== --- linux.orig/arch/i386/kernel/smp.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/smp.c 2005-07-29 12:35:16.000000000 -0300 @@ -164,7 +164,7 @@ unsigned long cfg; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. @@ -187,7 +187,7 @@ */ apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -201,7 +201,7 @@ * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + raw_local_irq_save(flags); for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { @@ -228,7 +228,7 @@ apic_write_around(APIC_ICR, cfg); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } #include /* must come after the send_IPI functions above for inlining */ @@ -246,7 +246,7 @@ static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL 0xffffffff /* @@ -391,7 +391,7 @@ while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; @@ -482,10 +482,20 @@ } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -539,7 +549,7 @@ } /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -573,7 +583,7 @@ * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) __asm__("hlt"); @@ -588,19 +598,20 @@ { smp_call_function(stop_this_cpu, NULL, 1, 0); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) Index: linux/arch/i386/kernel/smpboot.c =================================================================== --- linux.orig/arch/i386/kernel/smpboot.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/smpboot.c 2005-07-29 12:37:36.000000000 -0300 @@ -516,7 +516,7 @@ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); + raw_local_irq_enable(); wmb(); cpu_idle(); @@ -1350,11 +1350,11 @@ /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); return -EIO; } - local_irq_enable(); + raw_local_irq_enable(); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); Index: linux/arch/i386/kernel/time.c =================================================================== --- linux.orig/arch/i386/kernel/time.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/time.c 2005-07-29 12:39:11.000000000 -0300 @@ -83,12 +83,12 @@ extern unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); #include -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); struct timer_opts *cur_timer = &timer_none; @@ -236,7 +236,7 @@ EXPORT_SYMBOL(monotonic_clock); #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -257,17 +257,18 @@ { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { + unsigned long flags; /* * Subtle, when I/O APICs are used we have to ack timer IRQ * manually to reset the IRR bit for do_slow_gettimeoffset(). * This will also deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + spin_lock_irqsave(&i8259A_lock, flags); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore(&i8259A_lock, flags); } #endif @@ -333,8 +334,7 @@ static void sync_cmos_clock(unsigned long dummy); -static struct timer_list sync_cmos_timer = - TIMER_INITIALIZER(sync_cmos_clock, 0, 0); +static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); static void sync_cmos_clock(unsigned long dummy) { @@ -412,6 +412,7 @@ write_sequnlock_irqrestore(&xtime_lock, flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } Index: linux/arch/i386/kernel/timers/timer_cyclone.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_cyclone.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/timers/timer_cyclone.c 2005-07-29 12:06:11.000000000 -0300 @@ -36,7 +36,7 @@ static u32 last_cyclone_low; static u32 last_cyclone_high; static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); /* helper macro to atomically read both cyclone counter registers */ #define read_cyclone_counter(low,high) \ Index: linux/arch/i386/kernel/timers/timer_hpet.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_hpet.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/timers/timer_hpet.c 2005-07-29 12:06:11.000000000 -0300 @@ -24,7 +24,7 @@ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); /* convert from cycles(64bits) => nanoseconds (64bits) * basic equation: Index: linux/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_pm.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/timers/timer_pm.c 2005-07-29 12:06:11.000000000 -0300 @@ -41,7 +41,7 @@ static u32 offset_delay; static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ Index: linux/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_tsc.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/timers/timer_tsc.c 2005-07-29 12:06:11.000000000 -0300 @@ -43,7 +43,7 @@ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; +static DECLARE_RAW_SEQLOCK(monotonic_lock); /* convert from cycles(64bits) => nanoseconds (64bits) * basic equation: @@ -142,7 +142,7 @@ * synchronized across all CPUs. */ #ifndef CONFIG_NUMA - if (!use_tsc) + if (unlikely(!use_tsc)) #endif /* no locking but a rare wrong value is not a big deal */ return jiffies_64 * (1000000000 / HZ); @@ -170,9 +170,9 @@ static void mark_offset_tsc_hpet(void) { unsigned long long this_offset, last_offset; - unsigned long offset, temp, hpet_current; + unsigned long offset, temp, hpet_current, flags; - write_seqlock(&monotonic_lock); + write_seqlock_irqsave(&monotonic_lock, flags); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; /* * It is important that these two operations happen almost at @@ -200,7 +200,7 @@ /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); + write_sequnlock_irqrestore(&monotonic_lock, flags); /* calculate delay_at_last_interrupt */ /* @@ -341,7 +341,10 @@ static void mark_offset_tsc(void) { - unsigned long lost,delay; + unsigned long lost,delay, flags; +#ifndef CONFIG_PREEMPT_RT + unsigned long flags2; +#endif unsigned long delta = last_tsc_low; int count; int countmp; @@ -349,7 +352,7 @@ unsigned long long this_offset, last_offset; static int lost_count = 0; - write_seqlock(&monotonic_lock); + write_seqlock_irqsave(&monotonic_lock, flags); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; /* * It is important that these two operations happen almost at @@ -367,24 +370,34 @@ rdtsc(last_tsc_low, last_tsc_high); - spin_lock(&i8253_lock); - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ +#ifdef CONFIG_PREEMPT_RT + /* + * On PREEMPT_RT the timer IRQ never gets delayed by + * other interrupts, so we dont have to read the + * count: + */ + count = LATCH - 2; +#else + spin_lock_irqsave(&i8253_lock, flags2); + outb(0x00, PIT_MODE); /* latch the count ASAP */ + count = inb(PIT_CH0); /* read the latched count */ count |= inb(PIT_CH0) << 8; +# undef VIA686A_WORKAROUND /* * VIA686a test code... reset the latch if count > max + 1 * from timer_pit.c - cjb */ +# ifdef VIA686A_WORKAROUND if (count > LATCH) { outb_p(0x34, PIT_MODE); outb_p(LATCH & 0xff, PIT_CH0); outb(LATCH >> 8, PIT_CH0); count = LATCH - 1; } - - spin_unlock(&i8253_lock); +# endif + spin_unlock_irqrestore(&i8253_lock, flags2); +#endif /* PREEMPT_RT */ if (pit_latch_buggy) { /* get center value of last 3 time lutch */ @@ -437,7 +450,7 @@ /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); + write_sequnlock_irqrestore(&monotonic_lock, flags); /* calculate delay_at_last_interrupt */ count = ((LATCH-1) - count) * TICK_SIZE; Index: linux/arch/i386/kernel/traps.c =================================================================== --- linux.orig/arch/i386/kernel/traps.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/traps.c 2005-07-29 12:41:06.000000000 -0300 @@ -94,7 +94,7 @@ static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +static DEFINE_RAW_SPINLOCK(die_notifier_lock); int register_die_notifier(struct notifier_block *nb) { @@ -117,22 +117,27 @@ unsigned long *stack, unsigned long ebp) { unsigned long addr; +#ifndef CONFIG_FRAME_POINTER + unsigned long prev_frame; +#endif -#ifdef CONFIG_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printk(" [<%08lx>] ", addr); print_symbol("%s", addr); - printk("\n"); + printk(" (%ld)\n", *(unsigned long *)ebp - ebp); ebp = *(unsigned long *)ebp; } #else + prev_frame = (unsigned long)stack; while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) { printk(" [<%08lx>]", addr); print_symbol(" %s", addr); - printk("\n"); + printk(" (%ld)\n", (unsigned long)stack - prev_frame); + prev_frame = (unsigned long)stack; } } #endif @@ -164,6 +169,8 @@ break; printk(" =======================\n"); } + print_traces(task); + show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *esp) @@ -202,6 +209,12 @@ EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -226,10 +239,17 @@ regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk("ds: %04x es: %04x ss: %04x preempt: %08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count()); + printk("Process %s (pid: %d, threadinfo=%p task=%p", current->comm, current->pid, current_thread_info(), current); + + if (in_kernel) + printk(" stack_left=%ld worst_left=%ld)", + (esp & (THREAD_SIZE-1))-sizeof(struct thread_info), + worst_stack_left); + else + printk(")"); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -301,11 +321,11 @@ void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -382,6 +402,11 @@ if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { if (info) force_sig_info(signr, info, tsk); @@ -510,7 +535,7 @@ return; gp_in_vm86: - local_irq_enable(); + raw_local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; @@ -564,10 +589,11 @@ printk("Do you have a strange power saving mode enabled?\n"); } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + deadlock_trace_off(); spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try @@ -590,11 +616,11 @@ current->thread.trap_no = 2; crash_kexec(regs); } - + nmi_exit(); do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static void notrace default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -613,6 +639,7 @@ */ if (nmi_watchdog) { nmi_watchdog_tick(regs); +// trace_special(6, 1, 0); return; } #endif @@ -632,18 +659,19 @@ reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu) { return 0; } static nmi_callback_t nmi_callback = dummy_nmi_callback; -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); @@ -721,7 +749,7 @@ return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { Index: linux/arch/i386/kernel/vm86.c =================================================================== --- linux.orig/arch/i386/kernel/vm86.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/kernel/vm86.c 2005-07-29 12:06:11.000000000 -0300 @@ -105,9 +105,10 @@ * from process context. Enable interrupts here, before trying * to access user space. */ - local_irq_enable(); + raw_local_irq_enable(); if (!current->thread.vm86_info) { + raw_local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } Index: linux/arch/i386/lib/bitops.c =================================================================== --- linux.orig/arch/i386/lib/bitops.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/lib/bitops.c 2005-07-29 12:06:11.000000000 -0300 @@ -68,3 +68,37 @@ return (offset + set + res); } EXPORT_SYMBOL(find_next_zero_bit); + + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +".section .sched.text\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret" +); + +asm( +".section .sched.text\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + LOCK "incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + LOCK "decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret" +); +#endif Index: linux/arch/i386/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/i386/lib/dec_and_lock.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/lib/dec_and_lock.c 2005-07-29 12:27:22.000000000 -0300 @@ -11,7 +11,7 @@ #include #include -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; @@ -33,10 +33,11 @@ return 0; slow_path: - spin_lock(lock); + _raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + _raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); + Index: linux/arch/i386/mach-default/setup.c =================================================================== --- linux.orig/arch/i386/mach-default/setup.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mach-default/setup.c 2005-07-29 12:06:11.000000000 -0300 @@ -35,7 +35,7 @@ /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -79,7 +79,7 @@ { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; /** * time_init_hook - do any specific initialisations for the system timer. Index: linux/arch/i386/mach-visws/setup.c =================================================================== --- linux.orig/arch/i386/mach-visws/setup.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mach-visws/setup.c 2005-07-29 12:06:11.000000000 -0300 @@ -112,7 +112,7 @@ static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .name = "timer", }; Index: linux/arch/i386/mach-visws/visws_apic.c =================================================================== --- linux.orig/arch/i386/mach-visws/visws_apic.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mach-visws/visws_apic.c 2005-07-29 12:06:11.000000000 -0300 @@ -261,11 +261,13 @@ static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = SA_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = SA_NODELAY, }; Index: linux/arch/i386/mach-voyager/setup.c =================================================================== --- linux.orig/arch/i386/mach-voyager/setup.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mach-voyager/setup.c 2005-07-29 12:06:11.000000000 -0300 @@ -17,7 +17,7 @@ /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init intr_init_hook(void) { @@ -40,7 +40,7 @@ { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; void __init time_init_hook(void) { Index: linux/arch/i386/mm/fault.c =================================================================== --- linux.orig/arch/i386/mm/fault.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mm/fault.c 2005-07-29 12:06:11.000000000 -0300 @@ -38,6 +38,8 @@ int loglevel_save = console_loglevel; if (yes) { + stop_trace(); + zap_rt_locks(); oops_in_progress = 1; return; } @@ -199,6 +201,18 @@ return 0; } +static void force_sig_info_fault(int si_signo, int si_code, + unsigned long address, struct task_struct *tsk) +{ + siginfo_t info; + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + force_sig_info(si_signo, &info, tsk); +} + fastcall void do_invalid_op(struct pt_regs *, unsigned long); /* @@ -211,29 +225,29 @@ * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) +fastcall notrace void do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; unsigned long page; - int write; - siginfo_t info; - + int write, si_code; + /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); + trace_special(regs->eip, error_code, address); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); + raw_local_irq_enable(); tsk = current; - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The @@ -313,7 +327,7 @@ * we can handle it.. */ good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; write = 0; switch (error_code & 3) { default: /* 3: write, present */ @@ -387,11 +401,7 @@ /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *)address; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_info_fault(SIGSEGV, si_code, address, tsk); return; } @@ -440,9 +450,9 @@ } #endif if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + printk(KERN_ALERT "BUG: Unable to handle kernel NULL pointer dereference"); else - printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(KERN_ALERT "BUG: Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); @@ -500,11 +510,7 @@ tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); return; vmalloc_fault: Index: linux/arch/i386/mm/highmem.c =================================================================== --- linux.orig/arch/i386/mm/highmem.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mm/highmem.c 2005-07-29 12:28:02.000000000 -0300 @@ -18,6 +18,27 @@ kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +47,7 @@ * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -48,7 +69,7 @@ return (void*) vaddr; } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -93,7 +114,7 @@ return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -108,6 +129,9 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kmap_to_page); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic_to_page); + Index: linux/arch/i386/mm/init.c =================================================================== --- linux.orig/arch/i386/mm/init.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mm/init.c 2005-07-29 12:06:11.000000000 -0300 @@ -42,7 +42,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int noinline do_test_wp_bit(void); Index: linux/arch/i386/mm/pageattr.c =================================================================== --- linux.orig/arch/i386/mm/pageattr.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mm/pageattr.c 2005-07-29 12:06:11.000000000 -0300 @@ -206,6 +206,9 @@ { if (PageHighMem(page)) return; + if (!enable) + check_no_locks_freed(page_address(page), page_address(page+numpages)); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ Index: linux/arch/i386/mm/pgtable.c =================================================================== --- linux.orig/arch/i386/mm/pgtable.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/i386/mm/pgtable.c 2005-07-29 12:06:11.000000000 -0300 @@ -180,7 +180,7 @@ * recommendations and having no core impact whatsoever. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) Index: linux/arch/ia64/kernel/perfmon.c =================================================================== --- linux.orig/arch/ia64/kernel/perfmon.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ia64/kernel/perfmon.c 2005-07-29 12:06:11.000000000 -0300 @@ -497,7 +497,7 @@ static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ -static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pfm_alt_install_check); static pfm_intr_handler_desc_t *pfm_alt_intr_handler; static struct proc_dir_entry *perfmon_dir; Index: linux/arch/ia64/sn/kernel/xpnet.c =================================================================== --- linux.orig/arch/ia64/sn/kernel/xpnet.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ia64/sn/kernel/xpnet.c 2005-07-29 12:06:11.000000000 -0300 @@ -130,7 +130,7 @@ */ static u64 xpnet_broadcast_partitions; /* protect above */ -static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(xpnet_broadcast_lock); /* * Since the Block Transfer Engine (BTE) is being used for the transfer Index: linux/arch/m68k/amiga/amisound.c =================================================================== --- linux.orig/arch/m68k/amiga/amisound.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/m68k/amiga/amisound.c 2005-07-29 12:06:11.000000000 -0300 @@ -63,7 +63,7 @@ } static void nosound( unsigned long ignored ); -static struct timer_list sound_timer = TIMER_INITIALIZER(nosound, 0, 0); +static DEFINE_TIMER(sound_timer, nosound, 0, 0); void amiga_mksound( unsigned int hz, unsigned int ticks ) { Index: linux/arch/m68k/mac/macboing.c =================================================================== --- linux.orig/arch/m68k/mac/macboing.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/m68k/mac/macboing.c 2005-07-29 12:06:11.000000000 -0300 @@ -56,8 +56,7 @@ /* * our timer to start/continue/stop the bell */ -static struct timer_list mac_sound_timer = - TIMER_INITIALIZER(mac_nosound, 0, 0); +static DEFINE_TIMER(mac_sound_timer, mac_nosound, 0, 0); /* * Sort of initialize the sound chip (called from mac_mksound on the first Index: linux/arch/mips/Kconfig =================================================================== --- linux.orig/arch/mips/Kconfig 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/Kconfig 2005-07-29 12:06:11.000000000 -0300 @@ -308,6 +308,7 @@ config MOMENCO_OCELOT bool "Support for Momentum Ocelot board" select DMA_NONCOHERENT + select NO_SPINLOCK select HW_HAS_PCI select IRQ_CPU select IRQ_CPU_RM7K @@ -660,6 +661,7 @@ depends on EXPERIMENTAL select BOOT_ELF32 select DMA_COHERENT + select NO_SPINLOCK select SWAP_IO_SPACE choice @@ -903,12 +905,21 @@ bool "FPCIB0 Backplane Support" depends on TOSHIBA_RBTX4927 +source "lib/Kconfig.RT" + config RWSEM_GENERIC_SPINLOCK bool + depends on !PREEMPT_RT default y config RWSEM_XCHGADD_ALGORITHM bool + depends on !PREEMPT_RT + +config ASM_SEMAPHORES + bool + depends on !PREEMPT_RT + default y config GENERIC_CALIBRATE_DELAY bool @@ -929,6 +940,9 @@ config DMA_COHERENT bool +config NO_SPINLOCK + bool + config DMA_IP27 bool @@ -1459,15 +1473,6 @@ This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - config RTC_DS1742 bool "DS1742 BRAM/RTC support" depends on TOSHIBA_JMR3927 || TOSHIBA_RBTX4927 @@ -1482,10 +1487,6 @@ This will result in additional memory usage, so it is not recommended for normal users. -config RWSEM_GENERIC_SPINLOCK - bool - default y - endmenu menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" Index: linux/arch/mips/kernel/Makefile =================================================================== --- linux.orig/arch/mips/kernel/Makefile 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/Makefile 2005-07-29 12:06:11.000000000 -0300 @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ - ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \ + ptrace.o reset.o setup.o signal.o syscall.o \ time.o traps.o unaligned.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ @@ -17,6 +17,8 @@ obj-$(CONFIG_MIPS64) += module-elf64.o endif +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o + obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o Index: linux/arch/mips/kernel/entry.S =================================================================== --- linux.orig/arch/mips/kernel/entry.S 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/entry.S 2005-07-29 12:06:11.000000000 -0300 @@ -48,6 +48,8 @@ #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -57,12 +59,9 @@ LONG_L t0, PT_STATUS(sp) # Interrupts off? andi t0, 1 beqz t0, restore_all - li t0, PREEMPT_ACTIVE - sw t0, TI_PRE_COUNT($28) - local_irq_enable t0 - jal schedule - sw zero, TI_PRE_COUNT($28) local_irq_disable t0 + jal preempt_schedule_irq + sw zero, TI_PRE_COUNT($28) b need_resched #endif @@ -92,6 +91,7 @@ andi t0, a2, _TIF_NEED_RESCHED beqz t0, work_notifysig work_resched: + local_irq_enable t0 jal schedule local_irq_disable t0 # make sure need_resched and Index: linux/arch/mips/kernel/gdb-stub.c =================================================================== --- linux.orig/arch/mips/kernel/gdb-stub.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/gdb-stub.c 2005-07-29 12:06:11.000000000 -0300 @@ -176,7 +176,7 @@ /* * spin locks for smp case */ -static spinlock_t kgdb_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(kgdb_lock); static spinlock_t kgdb_cpulock[NR_CPUS] = { [0 ... NR_CPUS-1] = SPIN_LOCK_UNLOCKED}; /* Index: linux/arch/mips/kernel/genrtc.c =================================================================== --- linux.orig/arch/mips/kernel/genrtc.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/genrtc.c 2005-07-29 12:06:11.000000000 -0300 @@ -14,7 +14,7 @@ #include #include -static spinlock_t mips_rtc_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(mips_rtc_lock); unsigned int get_rtc_time(struct rtc_time *time) { Index: linux/arch/mips/kernel/i8259.c =================================================================== --- linux.orig/arch/mips/kernel/i8259.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/i8259.c 2005-07-29 12:06:11.000000000 -0300 @@ -31,7 +31,7 @@ * moves to arch independent land */ -spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; +spinlock_t DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { Index: linux/arch/mips/kernel/irq.c =================================================================== --- linux.orig/arch/mips/kernel/irq.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/irq.c 2005-07-29 12:06:11.000000000 -0300 @@ -125,7 +125,10 @@ irq_desc[i].action = NULL; irq_desc[i].depth = 1; irq_desc[i].handler = &no_irq_type; - spin_lock_init(&irq_desc[i].lock); + raw_spin_lock_init(&irq_desc[i].lock); +#ifdef CONFIG_PREEMPT_HARDIRQS + irq_desc[i].thread = NULL; +#endif } arch_init_irq(); Index: linux/arch/mips/kernel/module.c =================================================================== --- linux.orig/arch/mips/kernel/module.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/module.c 2005-07-29 12:06:11.000000000 -0300 @@ -2,7 +2,7 @@ #include static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_dbetables(unsigned long addr) Index: linux/arch/mips/kernel/process.c =================================================================== --- linux.orig/arch/mips/kernel/process.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/process.c 2005-07-29 12:06:11.000000000 -0300 @@ -58,6 +58,7 @@ while (!need_resched()) if (cpu_wait) (*cpu_wait)(); + local_irq_enable(); schedule(); } } Index: linux/arch/mips/kernel/semaphore.c =================================================================== --- linux.orig/arch/mips/kernel/semaphore.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/semaphore.c 2005-07-29 12:06:11.000000000 -0300 @@ -63,7 +63,7 @@ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); Index: linux/arch/mips/kernel/signal.c =================================================================== --- linux.orig/arch/mips/kernel/signal.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/signal.c 2005-07-29 12:06:11.000000000 -0300 @@ -449,6 +449,10 @@ } #endif +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux/arch/mips/kernel/signal32.c =================================================================== --- linux.orig/arch/mips/kernel/signal32.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/signal32.c 2005-07-29 12:06:11.000000000 -0300 @@ -766,6 +766,10 @@ siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux/arch/mips/kernel/smp.c =================================================================== --- linux.orig/arch/mips/kernel/smp.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/smp.c 2005-07-29 12:06:11.000000000 -0300 @@ -105,7 +105,22 @@ cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -284,6 +299,8 @@ return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -315,6 +332,7 @@ void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1); @@ -324,6 +342,7 @@ if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -347,6 +366,8 @@ struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -360,6 +381,7 @@ if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -390,6 +412,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -402,6 +426,7 @@ if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } Index: linux/arch/mips/kernel/time.c =================================================================== --- linux.orig/arch/mips/kernel/time.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/time.c 2005-07-29 12:06:11.000000000 -0300 @@ -52,7 +52,7 @@ */ extern volatile unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); /* * By default we provide the null RTC ops @@ -555,7 +555,7 @@ static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_NODELAY | SA_INTERRUPT, .name = "timer", }; Index: linux/arch/mips/kernel/traps.c =================================================================== --- linux.orig/arch/mips/kernel/traps.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/kernel/traps.c 2005-07-29 12:06:11.000000000 -0300 @@ -250,7 +250,7 @@ printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); NORET_TYPE void __die(const char * str, struct pt_regs * regs, const char * file, const char * func, unsigned long line) Index: linux/arch/mips/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/mips/lib/dec_and_lock.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/lib/dec_and_lock.c 2005-07-29 12:06:11.000000000 -0300 @@ -28,7 +28,7 @@ */ #ifndef ATOMIC_DEC_AND_LOCK -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; @@ -44,12 +44,12 @@ return 0; } - spin_lock(lock); + _raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + _raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); #endif /* ATOMIC_DEC_AND_LOCK */ Index: linux/arch/mips/math-emu/cp1emu.c =================================================================== --- linux.orig/arch/mips/math-emu/cp1emu.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/math-emu/cp1emu.c 2005-07-29 12:06:11.000000000 -0300 @@ -1310,7 +1310,9 @@ if (sig) break; + preempt_enable(); cond_resched(); + preempt_disable(); } while (xcp->cp0_epc > prevepc); /* SIGILL indicates a non-fpu instruction */ Index: linux/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/irq.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/sibyte/sb1250/irq.c 2005-07-29 12:06:11.000000000 -0300 @@ -88,7 +88,7 @@ /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -276,7 +276,7 @@ static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = SA_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, Index: linux/arch/mips/sibyte/sb1250/time.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/time.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/mips/sibyte/sb1250/time.c 2005-07-29 12:06:11.000000000 -0300 @@ -115,10 +115,12 @@ ll_timer_interrupt(irq, regs); } - /* - * every CPU should do profiling and process accouting - */ - ll_local_timer_interrupt(irq, regs); + if (cpu != 0) { + /* + * every CPU should do profiling and process accouting + */ + ll_local_timer_interrupt(irq, regs); + } } /* Index: linux/arch/ppc/8260_io/enet.c =================================================================== --- linux.orig/arch/ppc/8260_io/enet.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/8260_io/enet.c 2005-07-29 12:06:11.000000000 -0300 @@ -116,7 +116,7 @@ scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux/arch/ppc/8260_io/fcc_enet.c =================================================================== --- linux.orig/arch/ppc/8260_io/fcc_enet.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/8260_io/fcc_enet.c 2005-07-29 12:06:11.000000000 -0300 @@ -377,7 +377,7 @@ volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux/arch/ppc/8xx_io/commproc.c =================================================================== --- linux.orig/arch/ppc/8xx_io/commproc.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/8xx_io/commproc.c 2005-07-29 12:06:11.000000000 -0300 @@ -372,7 +372,7 @@ /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... Index: linux/arch/ppc/8xx_io/cs4218_tdm.c =================================================================== --- linux.orig/arch/ppc/8xx_io/cs4218_tdm.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/8xx_io/cs4218_tdm.c 2005-07-29 12:06:11.000000000 -0300 @@ -1380,7 +1380,7 @@ spin_unlock_irqrestore(&cs4218_lock, flags); } -static struct timer_list beep_timer = TIMER_INITIALIZER(cs_nosound, 0, 0); +static DEFINE_TIMER(beep_timer, cs_nosound, 0, 0); }; static void cs_mksound(unsigned int hz, unsigned int ticks) Index: linux/arch/ppc/8xx_io/enet.c =================================================================== --- linux.orig/arch/ppc/8xx_io/enet.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/8xx_io/enet.c 2005-07-29 12:06:11.000000000 -0300 @@ -144,7 +144,7 @@ unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux/arch/ppc/8xx_io/fec.c =================================================================== --- linux.orig/arch/ppc/8xx_io/fec.c 2005-07-29 12:03:13.000000000 -0300 +++ linux/arch/ppc/8xx_io/fec.c 2005-07-29 12:06:11.000000000 -0300 @@ -165,7 +165,7 @@ struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux/arch/ppc/Kconfig =================================================================== --- linux.orig/arch/ppc/Kconfig 2005-07-29 12:03:11.000000000 -0300 +++ linux/arch/ppc/Kconfig 2005-07-29 12:06:11.000000000 -0300 @@ -15,13 +15,6 @@ bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -918,15 +911,21 @@ depends on SMP default "4" -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. +source "kernel/Kconfig.preempt" - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +config RWSEM_GENERIC_SPINLOCK + bool + depends on !PREEMPT_RT + +config ASM_SEMAPHORES + bool + depends on !PREEMPT_RT + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + default y config HIGHMEM bool "High memory support" Index: linux/arch/ppc/boot/Makefile =================================================================== --- linux.orig/arch/ppc/boot/Makefile 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/boot/Makefile 2005-07-29 12:06:11.000000000 -0300 @@ -11,6 +11,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd Index: linux/arch/ppc/kernel/Makefile =================================================================== --- linux.orig/arch/ppc/kernel/Makefile 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/Makefile 2005-07-29 12:06:11.000000000 -0300 @@ -14,8 +14,9 @@ obj-y := entry.o traps.o irq.o idle.o time.o misc.o \ process.o signal.o ptrace.o align.o \ - semaphore.o syscalls.o setup.o \ + syscalls.o setup.o \ cputable.o ppc_htab.o perfmon.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o obj-$(CONFIG_POWER4) += cpu_setup_power4.o Index: linux/arch/ppc/kernel/dma-mapping.c =================================================================== --- linux.orig/arch/ppc/kernel/dma-mapping.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/dma-mapping.c 2005-07-29 12:06:11.000000000 -0300 @@ -71,7 +71,7 @@ * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -407,7 +407,7 @@ int nr_segs = PAGE_ALIGN(size + (PAGE_SIZE - offset))/PAGE_SIZE; int seg_nr = 0; - local_irq_save(flags); + raw_local_irq_save(flags); do { start = (unsigned long)kmap_atomic(page + seg_nr, @@ -426,7 +426,7 @@ seg_offset = 0; } while (seg_nr < nr_segs); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_HIGHMEM */ Index: linux/arch/ppc/kernel/entry.S =================================================================== --- linux.orig/arch/ppc/kernel/entry.S 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/entry.S 2005-07-29 13:28:14.000000000 -0300 @@ -239,7 +239,7 @@ SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -317,7 +317,7 @@ rlwinm r12,r1,0,0,18 /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -657,7 +657,7 @@ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -875,7 +875,7 @@ #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -889,7 +889,7 @@ MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING beq restore_user @@ -999,3 +999,85 @@ /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_OF */ + +#ifdef CONFIG_MCOUNT + +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function preamble, + * before the stack frame is created. An example of this preamble code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux/arch/ppc/kernel/idle.c =================================================================== --- linux.orig/arch/ppc/kernel/idle.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/idle.c 2005-07-29 12:06:11.000000000 -0300 @@ -38,7 +38,7 @@ powersave = ppc_md.power_save; - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { if (powersave != NULL) powersave(); #ifdef CONFIG_SMP @@ -50,8 +50,11 @@ } #endif } - if (need_resched()) - schedule(); + if (need_resched()) { + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); + } } /* @@ -59,11 +62,15 @@ */ void cpu_idle(void) { - for (;;) + for (;;) { + BUG_ON(raw_irqs_disabled()); + stop_critical_timing(); + propagate_preempt_locks_value(); if (ppc_md.idle != NULL) ppc_md.idle(); else default_idle(); + } } #if defined(CONFIG_SYSCTL) && defined(CONFIG_6xx) Index: linux/arch/ppc/kernel/irq.c =================================================================== --- linux.orig/arch/ppc/kernel/irq.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/irq.c 2005-07-29 12:06:11.000000000 -0300 @@ -138,6 +138,7 @@ void do_IRQ(struct pt_regs *regs) { int irq, first = 1; + irq_enter(); /* @@ -149,6 +150,7 @@ * has already been handled. -- Tom */ while ((irq = ppc_md.get_irq(regs)) >= 0) { + trace_special(regs->nip, irq, 0); __do_IRQ(irq, regs); first = 0; } Index: linux/arch/ppc/kernel/misc.S =================================================================== --- linux.orig/arch/ppc/kernel/misc.S 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/misc.S 2005-07-29 12:06:11.000000000 -0300 @@ -302,8 +302,8 @@ nop _GLOBAL(local_save_flags_ptr_end) -/* void local_irq_restore(unsigned long flags) */ -_GLOBAL(local_irq_restore) +/* void __raw_local_irq_restore(unsigned long flags) */ +_GLOBAL(__raw_local_irq_restore) /* * Just set/clear the MSR_EE bit through restore/flags but do not * change anything else. This is needed by the RT system and makes @@ -341,9 +341,9 @@ nop nop nop -_GLOBAL(local_irq_restore_end) +_GLOBAL(__raw_local_irq_restore_end) -_GLOBAL(local_irq_disable) +_GLOBAL(__raw_local_irq_disable) mfmsr r0 /* Get current interrupt state */ rlwinm r3,r0,16+1,32-1,31 /* Extract old value of 'EE' */ rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ @@ -370,9 +370,9 @@ nop nop nop -_GLOBAL(local_irq_disable_end) +_GLOBAL(__raw_local_irq_disable_end) -_GLOBAL(local_irq_enable) +_GLOBAL(__raw_local_irq_enable) mfmsr r3 /* Get current state */ ori r3,r3,MSR_EE /* Turn on 'EE' bit */ SYNC /* Some chip revs have problems here... */ @@ -399,7 +399,7 @@ nop nop nop -_GLOBAL(local_irq_enable_end) +_GLOBAL(__raw_local_irq_enable_end) /* * complement mask on the msr then "or" some values on. Index: linux/arch/ppc/kernel/ppc_ksyms.c =================================================================== --- linux.orig/arch/ppc/kernel/ppc_ksyms.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/ppc_ksyms.c 2005-07-29 12:06:11.000000000 -0300 @@ -293,9 +293,11 @@ EXPORT_SYMBOL(xmon); EXPORT_SYMBOL(xmon_printf); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_up); +EXPORT_SYMBOL(__compat_down); +EXPORT_SYMBOL(__compat_down_interruptible); +#endif #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) extern void (*debugger)(struct pt_regs *regs); Index: linux/arch/ppc/kernel/process.c =================================================================== --- linux.orig/arch/ppc/kernel/process.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/process.c 2005-07-29 12:06:11.000000000 -0300 @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -52,8 +54,8 @@ struct task_struct *last_task_used_altivec = NULL; struct task_struct *last_task_used_spe = NULL; -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); @@ -241,7 +243,7 @@ unsigned long s; struct task_struct *last; - local_irq_save(s); + raw_local_irq_save(s); #ifdef CHECK_STACK check_stack(prev); check_stack(new); @@ -302,7 +304,7 @@ new_thread = &new->thread; old_thread = ¤t->thread; last = _switch(old_thread, new_thread); - local_irq_restore(s); + raw_local_irq_restore(s); return last; } Index: linux/arch/ppc/kernel/semaphore.c =================================================================== --- linux.orig/arch/ppc/kernel/semaphore.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/semaphore.c 2005-07-29 12:06:11.000000000 -0300 @@ -29,7 +29,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -48,7 +48,7 @@ return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -70,7 +70,7 @@ * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -100,7 +100,7 @@ wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -129,3 +129,8 @@ wake_up(&sem->wait); return retval; } + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} Index: linux/arch/ppc/kernel/smp-tbsync.c =================================================================== --- linux.orig/arch/ppc/kernel/smp-tbsync.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/smp-tbsync.c 2005-07-29 12:06:11.000000000 -0300 @@ -49,7 +49,7 @@ { int cmd, tbl, tbu; - local_irq_disable(); + raw_local_irq_disable(); while( !running ) ; rmb(); @@ -78,7 +78,7 @@ } enter_contest( tbsync->mark, -1 ); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit @@ -88,7 +88,7 @@ tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for( i=-3; itbu = tbu = get_tbu(); @@ -114,7 +114,7 @@ if( i++ > 0 ) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux/arch/ppc/kernel/smp.c =================================================================== --- linux.orig/arch/ppc/kernel/smp.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/smp.c 2005-07-29 12:06:11.000000000 -0300 @@ -137,6 +137,16 @@ smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -146,7 +156,7 @@ static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -161,7 +171,7 @@ * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -196,7 +206,7 @@ if (num_online_cpus() <= 1) return 0; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); return __smp_call_function(func, info, wait, MSG_ALL_BUT_SELF); } @@ -337,7 +347,7 @@ printk("CPU %i done callin...\n", cpu); smp_ops->setup_cpu(cpu); printk("CPU %i done setup...\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); smp_ops->take_timebase(); printk("CPU %i done timebase take...\n", cpu); Index: linux/arch/ppc/kernel/temp.c =================================================================== --- linux.orig/arch/ppc/kernel/temp.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/temp.c 2005-07-29 12:06:11.000000000 -0300 @@ -143,7 +143,7 @@ int shrink; /* disabling interrupts *should* be okay */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); #ifndef CONFIG_TAU_INT @@ -186,7 +186,7 @@ */ mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tau_timeout_smp(unsigned long unused) Index: linux/arch/ppc/kernel/time.c =================================================================== --- linux.orig/arch/ppc/kernel/time.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/time.c 2005-07-29 12:06:11.000000000 -0300 @@ -86,10 +86,11 @@ unsigned tb_to_us; unsigned tb_last_stamp; unsigned long tb_to_ns_scale; +unsigned long cpu_khz; extern unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -107,7 +108,7 @@ } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -293,6 +294,7 @@ tb_ticks_per_jiffy = DECREMENTER_COUNT_601; /* mulhwu_scale_factor(1000000000, 1000000) is 0x418937 */ tb_to_us = 0x418937; + cpu_khz = 1000000000 / 1000; } else { ppc_md.calibrate_decr(); tb_to_ns_scale = mulhwu(tb_to_us, 1000 << 10); Index: linux/arch/ppc/kernel/traps.c =================================================================== --- linux.orig/arch/ppc/kernel/traps.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/kernel/traps.c 2005-07-29 12:06:11.000000000 -0300 @@ -72,7 +72,7 @@ * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * fp, long err) { @@ -113,6 +113,10 @@ debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; Index: linux/arch/ppc/lib/dec_and_lock.c =================================================================== --- linux.orig/arch/ppc/lib/dec_and_lock.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/lib/dec_and_lock.c 2005-07-29 12:06:11.000000000 -0300 @@ -19,7 +19,7 @@ */ #ifndef ATOMIC_DEC_AND_LOCK -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_raw_spin_lock(atomic_t *atomic, raw_spinlock_t *lock) { int counter; int newcount; @@ -35,12 +35,12 @@ return 0; } - spin_lock(lock); + _raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + _raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_raw_spin_lock); #endif /* ATOMIC_DEC_AND_LOCK */ Index: linux/arch/ppc/lib/locks.c =================================================================== --- linux.orig/arch/ppc/lib/locks.c 2005-07-29 12:02:24.000000000 -0300 +++ linux/arch/ppc/lib/locks.c 2005-07-29 12:06:11.000000000 -0300 @@ -43,7 +43,7 @@ return ret; } -void _raw_spin_lock(spinlock_t *lock) +void __raw_spin_lock(raw_spinlock_t *lock) {