diff -urN ./linux-2.6.18.1/Documentation/kernel-parameters.txt linux-2.6.18.1-cabi-20070529-RT_HRT/Documentation/kernel-parameters.txt --- ./linux-2.6.18.1/Documentation/kernel-parameters.txt 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/Documentation/kernel-parameters.txt 2007-05-19 23:58:35.000000000 +0900 @@ -1637,6 +1637,12 @@ time Show timing data prefixed to each printk message line + timeout_granularity= + [KNL] + Timeout granularity: process timer wheel timers every + timeout_granularity jiffies. Defaults to 1 (process + timers HZ times per second - most finegrained). + clocksource= [GENERIC_TIME] Override the default clocksource Override the default clocksource and use the clocksource with the name specified. diff -urN ./linux-2.6.18.1/Documentation/rt-mutex-design.txt linux-2.6.18.1-cabi-20070529-RT_HRT/Documentation/rt-mutex-design.txt --- ./linux-2.6.18.1/Documentation/rt-mutex-design.txt 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/Documentation/rt-mutex-design.txt 2007-05-19 23:58:35.000000000 +0900 @@ -333,11 +333,11 @@ unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C) { - unsigned long T = *A; - if (*A == *B) { - *A = *C; - } - return T; + unsigned long T = *A; + if (*A == *B) { + *A = *C; + } + return T; } #define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c) @@ -582,7 +582,7 @@ try_to_take_rt_mutex is used every time the task tries to grab a mutex in the slow path. The first thing that is done here is an atomic setting of the "Has Waiters" flag of the mutex's owner field. Yes, this could really -be false, because if the the mutex has no owner, there are no waiters and +be false, because if the mutex has no owner, there are no waiters and the current task also won't have any waiters. But we don't have the lock yet, so we assume we are going to be a waiter. The reason for this is to play nice for those architectures that do have CMPXCHG. By setting this flag @@ -735,7 +735,7 @@ in the slow path too. If a waiter of a mutex woke up because of a signal or timeout between the time the owner failed the fast path CMPXCHG check and the grabbing of the wait_lock, the mutex may not have any waiters, thus the -owner still needs to make this check. If there are no waiters than the mutex +owner still needs to make this check. If there are no waiters then the mutex owner field is set to NULL, the wait_lock is released and nothing more is needed. diff -urN ./linux-2.6.18.1/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/Makefile --- ./linux-2.6.18.1/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/Makefile 2007-05-28 20:35:09.000000000 +0900 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 18 -EXTRAVERSION = .1 +EXTRAVERSION = -rt5-RT-100 NAME=Avast! A bilge rat! # *DOCUMENTATION* @@ -485,10 +485,14 @@ include $(srctree)/arch/$(ARCH)/Makefile -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_UNWIND_INFO @@ -522,7 +526,7 @@ # # INSTALL_PATH specifies where to place the updated kernel and system map # images. Default is /boot, but you can set it to other values -export INSTALL_PATH ?= /boot +export INSTALL_PATH = /boot # # INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory @@ -598,6 +602,10 @@ # # System.map is generated to document addresses of all kernel symbols +ifeq ($(strip $(CONFIG_CABI)),y) +libgcc=$(shell $(CC) -print-libgcc-file-name) +endif + vmlinux-init := $(head-y) $(init-y) vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y) vmlinux-all := $(vmlinux-init) $(vmlinux-main) diff -urN ./linux-2.6.18.1/arch/arm/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/Kconfig --- ./linux-2.6.18.1/arch/arm/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -17,6 +17,10 @@ Europe. There is an ARM Linux project with a web page at . +config GENERIC_TIME + bool + default y + config MMU bool default y @@ -51,6 +55,18 @@ bool default y +config STACKTRACE_SUPPORT + bool + default y + +config LOCKDEP_SUPPORT + bool + default y + +config TRACE_IRQFLAGS_SUPPORT + bool + default y + config HARDIRQS_SW_RESEND bool default y @@ -344,6 +360,15 @@ source "arch/arm/mach-netx/Kconfig" +config IS_TICK_BASED + bool + depends on GENERIC_TIME + default y + help + This is used on platforms that have not added a clocksource to + support GENERIC_TIME. Platforms which have a clocksource + should set this to 'n' in their mach-*/Kconfig. + # Definitions to make life easier config ARCH_ACORN bool @@ -419,6 +444,8 @@ menu "Kernel Features" +source "kernel/time/Kconfig" + config SMP bool "Symmetric Multi-Processing (EXPERIMENTAL)" depends on EXPERIMENTAL && REALVIEW_MPCORE @@ -463,38 +490,7 @@ accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -config NO_IDLE_HZ - bool "Dynamic tick timer" - help - Select this option if you want to disable continuous timer ticks - and have them programmed to occur as required. This option saves - power as the system can remain in idle state for longer. - - By default dynamic tick is disabled during the boot, and can be - manually enabled with: - - echo 1 > /sys/devices/system/timer/timer0/dyn_tick - - Alternatively, if you want dynamic tick automatically enabled - during boot, pass "dyntick=enable" via the kernel command string. - - Please note that dynamic tick may affect the accuracy of - timekeeping on some platforms depending on the implementation. - Currently at least OMAP, PXA2xx and SA11x0 platforms are known - to have accurate timekeeping with dynamic tick. +source kernel/Kconfig.preempt config HZ int diff -urN ./linux-2.6.18.1/arch/arm/boot/compressed/head.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/boot/compressed/head.S --- ./linux-2.6.18.1/arch/arm/boot/compressed/head.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/boot/compressed/head.S 2007-05-19 23:58:35.000000000 +0900 @@ -231,7 +231,8 @@ */ cmp r4, r2 bhs wont_overwrite - add r0, r4, #4096*1024 @ 4MB largest kernel size + sub r3, sp, r5 @ > compressed kernel image + add r0, r4, r3, lsl #2 @ allow for 4x expansion cmp r0, r5 bls wont_overwrite @@ -822,6 +823,19 @@ mov pc, r10 #endif +#ifdef CONFIG_MCOUNT +/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this + * trampoline + */ + .text + .align 0 + .type mcount %function + .global mcount +mcount: + mov pc, lr @ just return +#endif + + reloc_end: .align diff -urN ./linux-2.6.18.1/arch/arm/common/time-acorn.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/common/time-acorn.c --- ./linux-2.6.18.1/arch/arm/common/time-acorn.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/common/time-acorn.c 2007-05-19 23:58:35.000000000 +0900 @@ -77,7 +77,7 @@ static struct irqaction ioc_timer_irq = { .name = "timer", - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NODELAY, .handler = ioc_timer_interrupt }; diff -urN ./linux-2.6.18.1/arch/arm/kernel/dma.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/dma.c --- ./linux-2.6.18.1/arch/arm/kernel/dma.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/dma.c 2007-05-19 23:58:35.000000000 +0900 @@ -20,7 +20,7 @@ #include -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_RAW_SPINLOCK(dma_spin_lock); EXPORT_SYMBOL(dma_spin_lock); static dma_t dma_chan[MAX_DMA_CHANNELS]; diff -urN ./linux-2.6.18.1/arch/arm/kernel/entry-armv.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/entry-armv.S --- ./linux-2.6.18.1/arch/arm/kernel/entry-armv.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/entry-armv.S 2007-05-19 23:58:35.000000000 +0900 @@ -191,6 +191,9 @@ __irq_svc: svc_entry +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif #ifdef CONFIG_PREEMPT get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -201,7 +204,7 @@ irq_handler #ifdef CONFIG_PREEMPT ldr r0, [tsk, #TI_FLAGS] @ get flags - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED blne svc_preempt preempt_return: ldr r0, [tsk, #TI_PREEMPT] @ read preempt value @@ -211,6 +214,10 @@ #endif ldr r0, [sp, #S_PSR] @ irqs are already disabled msr spsr_cxsf, r0 +#ifdef CONFIG_TRACE_IRQFLAGS + tst r0, #PSR_I_BIT + bleq trace_hardirqs_on +#endif ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr .ltorg @@ -228,7 +235,7 @@ str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED beq preempt_return @ go again b 1b #endif @@ -398,6 +405,9 @@ __irq_usr: usr_entry +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif get_thread_info tsk #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -412,6 +422,9 @@ teq r0, r7 strne r0, [r0, -r0] #endif +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on +#endif mov why, #0 b ret_to_user diff -urN ./linux-2.6.18.1/arch/arm/kernel/entry-common.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/entry-common.S --- ./linux-2.6.18.1/arch/arm/kernel/entry-common.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/entry-common.S 2007-05-19 23:58:35.000000000 +0900 @@ -3,6 +3,8 @@ * * Copyright (C) 2000 Russell King * + * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -40,7 +42,7 @@ fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED + tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED bne work_resched tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING beq no_work_pending @@ -50,7 +52,8 @@ b ret_slow_syscall @ Check work again work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ @@ -387,6 +390,112 @@ #include "calls.S" #undef ABI #undef OBSOLETE +#endif + +#ifdef CONFIG_FRAME_POINTER + +#ifdef CONFIG_MCOUNT +/* + * At the point where we are in mcount() we maintain the + * frame of the prologue code and keep the call to mcount() + * out of the stack frame list: + + saved pc <---\ caller of instrumented routine + saved lr | + ip/prev_sp | + fp -----^ | + : | + | + -> saved pc | instrumented routine + | saved lr | + | ip/prev_sp | + | fp ---------/ + | : + | + | mcount + | saved pc + | saved lr + | ip/prev sp + -- fp + r3 + r2 + r1 + sp-> r0 + : + */ + + .text + .align 0 + .type mcount %function + .global mcount + +/* gcc -pg generated FUNCTION_PROLOGUE references mcount() + * and has already created the stack frame invocation for + * the routine we have been called to instrument. We create + * a complete frame nevertheless, as we want to use the same + * call to mcount() from c code. + */ +mcount: + + ldr ip, =mcount_enabled @ leave early, if disabled + ldr ip, [ip] + cmp ip, #0 + moveq pc, lr + + mov ip, sp + stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame + + ldr r1, [fp, #-4] @ get lr (the return address + @ of the caller of the + @ instrumented function) + mov r0, lr @ get lr - (the return address + @ of the instrumented function) + + sub fp, ip, #4 @ point fp at this frame + + bl __trace +1: + ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return + +#endif + +/* ARM replacement for unsupported gcc __builtin_return_address(n) + * where 0 < n. n == 0 is supported here as well. + * + * Walk up the stack frame until the desired frame is found or a NULL + * fp is encountered, return NULL in the latter case. + * + * Note: it is possible under code optimization for the stack invocation + * of an ancestor function (level N) to be removed before calling a + * descendant function (level N+1). No easy means is available to deduce + * this scenario with the result being [for example] caller_addr(0) when + * called from level N+1 returning level N-1 rather than the expected + * level N. This optimization issue appears isolated to the case of + * a call to a level N+1 routine made at the tail end of a level N + * routine -- the level N frame is deleted and a simple branch is made + * to the level N+1 routine. + */ + + .text + .align 0 + .type arm_return_addr %function + .global arm_return_addr + +arm_return_addr: + mov ip, r0 + mov r0, fp +3: + cmp r0, #0 + beq 1f @ frame list hit end, bail + cmp ip, #0 + beq 2f @ reached desired frame + ldr r0, [r0, #-12] @ else continue, get next fp + sub ip, ip, #1 + b 3b +2: + ldr r0, [r0, #-4] @ get target return address +1: + mov pc, lr #endif diff -urN ./linux-2.6.18.1/arch/arm/kernel/fiq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/fiq.c --- ./linux-2.6.18.1/arch/arm/kernel/fiq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/fiq.c 2007-05-19 23:58:35.000000000 +0900 @@ -89,7 +89,7 @@ * disable irqs for the duration. Note - these functions are almost * entirely coded in assembly. */ -void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( @@ -107,7 +107,7 @@ : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); } -void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( diff -urN ./linux-2.6.18.1/arch/arm/kernel/head.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/head.S --- ./linux-2.6.18.1/arch/arm/kernel/head.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/head.S 2007-05-19 23:58:35.000000000 +0900 @@ -234,18 +234,19 @@ /* * Now setup the pagetables for our kernel direct - * mapped region. We round TEXTADDR down to the - * nearest megabyte boundary. It is assumed that - * the kernel fits within 4 contigous 1MB sections. + * mapped region. */ add r0, r4, #(TEXTADDR & 0xff000000) >> 18 @ start of kernel str r3, [r0, #(TEXTADDR & 0x00f00000) >> 18]! - add r3, r3, #1 << 20 - str r3, [r0, #4]! @ KERNEL + 1MB - add r3, r3, #1 << 20 - str r3, [r0, #4]! @ KERNEL + 2MB - add r3, r3, #1 << 20 - str r3, [r0, #4] @ KERNEL + 3MB + + ldr r6, =(_end - PAGE_OFFSET) + sub r6, r6, #1 @ r6 = number of sections + mov r6, r6, lsr #20 @ needed for kernel minus 1 + +1: add r3, r3, #1 << 20 + str r3, [r0, #4]! + subs r6, r6, #1 + bgt 1b /* * Then map first 1MB of ram in case it contains our boot params. diff -urN ./linux-2.6.18.1/arch/arm/kernel/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/irq.c --- ./linux-2.6.18.1/arch/arm/kernel/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -101,7 +101,7 @@ /* Handle bad interrupts */ static struct irq_desc bad_irq_desc = { .handle_irq = handle_bad_irq, - .lock = SPIN_LOCK_UNLOCKED + .lock = RAW_SPIN_LOCK_UNLOCKED(bad_irq_desc.lock) }; /* @@ -109,10 +109,12 @@ * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct irqdesc *desc = irq_desc + irq; + trace_special(instruction_pointer(regs), irq, 0); + /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. diff -urN ./linux-2.6.18.1/arch/arm/kernel/process.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/process.c --- ./linux-2.6.18.1/arch/arm/kernel/process.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/process.c 2007-05-19 23:58:35.000000000 +0900 @@ -123,7 +123,7 @@ cpu_relax(); else { local_irq_disable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { timer_dyn_reprogram(); arch_idle(); } @@ -154,12 +154,20 @@ if (!idle) idle = default_idle; leds_event(led_idle_start); - while (!need_resched()) - idle(); + + if (!need_resched() && !need_resched_delayed() && + !hrtimer_stop_sched_tick()) { + while (!need_resched() && !need_resched_delayed()) + idle(); + } + hrtimer_restart_sched_tick(); + leds_event(led_idle_end); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff -urN ./linux-2.6.18.1/arch/arm/kernel/semaphore.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/semaphore.c --- ./linux-2.6.18.1/arch/arm/kernel/semaphore.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/semaphore.c 2007-05-19 23:58:35.000000000 +0900 @@ -49,14 +49,16 @@ * we cannot lose wakeup events. */ -void __up(struct semaphore *sem) +fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } +EXPORT_SYMBOL(__compat_up); + static DEFINE_SPINLOCK(semaphore_lock); -void __sched __down(struct semaphore * sem) +fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -89,7 +91,9 @@ wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +EXPORT_SYMBOL(__compat_down); + +fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -140,6 +144,8 @@ return retval; } +EXPORT_SYMBOL(__compat_down_interruptible); + /* * Trylock failed - make sure we correct for * having decremented the count. @@ -148,7 +154,7 @@ * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -int __down_trylock(struct semaphore * sem) +fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -168,6 +174,15 @@ return 1; } +EXPORT_SYMBOL(__compat_down_trylock); + +fastcall int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + +EXPORT_SYMBOL(compat_sem_is_locked); + /* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines @@ -185,7 +200,7 @@ __down_failed: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __down \n\ + bl __compat_down \n\ ldmfd sp!, {r0 - r4, pc} \n\ \n\ .align 5 \n\ @@ -193,7 +208,7 @@ __down_interruptible_failed: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __down_interruptible \n\ + bl __compat_down_interruptible \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r4, pc} \n\ \n\ @@ -202,7 +217,7 @@ __down_trylock_failed: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __down_trylock \n\ + bl __compat_down_trylock \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r4, pc} \n\ \n\ @@ -211,7 +226,7 @@ __up_wakeup: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __up \n\ + bl __compat_up \n\ ldmfd sp!, {r0 - r4, pc} \n\ "); diff -urN ./linux-2.6.18.1/arch/arm/kernel/signal.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/signal.c --- ./linux-2.6.18.1/arch/arm/kernel/signal.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/signal.c 2007-05-19 23:58:35.000000000 +0900 @@ -630,6 +630,14 @@ siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif + /* * We want the common case to go fast, which * is why we may in certain cases get here from diff -urN ./linux-2.6.18.1/arch/arm/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/smp.c --- ./linux-2.6.18.1/arch/arm/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -515,7 +515,7 @@ cpu_clear(cpu, data->unfinished); } -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_RAW_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() diff -urN ./linux-2.6.18.1/arch/arm/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/time.c --- ./linux-2.6.18.1/arch/arm/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -69,10 +69,12 @@ */ int (*set_rtc)(void); +#ifdef CONFIG_IS_TICK_BASED static unsigned long dummy_gettimeoffset(void) { return 0; } +#endif /* * Scheduler clock - returns current time in nanosec units. @@ -84,34 +86,10 @@ return (unsigned long long)jiffies * (1000000000 / HZ); } -static unsigned long next_rtc_update; - -/* - * If we have an externally synchronized linux clock, then update - * CMOS clock accordingly every ~11 minutes. set_rtc() has to be - * called as close as possible to 500 ms before the new second - * starts. - */ -static inline void do_set_rtc(void) +void sync_persistent_clock(struct timespec ts) { - if (!ntp_synced() || set_rtc == NULL) - return; - - if (next_rtc_update && - time_before((unsigned long)xtime.tv_sec, next_rtc_update)) - return; - - if (xtime.tv_nsec < 500000000 - ((unsigned) tick_nsec >> 1) && - xtime.tv_nsec >= 500000000 + ((unsigned) tick_nsec >> 1)) - return; - - if (set_rtc()) - /* - * rtc update failed. Try again in 60s - */ - next_rtc_update = xtime.tv_sec + 60; - else - next_rtc_update = xtime.tv_sec + 660; + if (set_rtc) + set_rtc(); } #ifdef CONFIG_LEDS @@ -230,68 +208,6 @@ #define do_leds() #endif -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags; - unsigned long seq; - unsigned long usec, sec, lost; - - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - usec = system_timer->offset(); - - lost = jiffies - wall_jiffies; - if (lost) - usec += lost * USECS_PER_JIFFY; - - sec = xtime.tv_sec; - usec += xtime.tv_nsec / 1000; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - - /* usec may have gone up a lot: be safe */ - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - /* - * This is revolting. We need to set "xtime" correctly. However, the - * value in this location is the value at the most recent update of - * wall time. Discover what correction gettimeofday() would have - * done, and then undo it! - */ - nsec -= system_timer->offset() * NSEC_PER_USEC; - nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - /** * save_time_delta - Save the offset between system time and RTC time * @delta: pointer to timespec to store delta @@ -332,7 +248,6 @@ { profile_tick(CPU_PROFILING, regs); do_leds(); - do_set_rtc(); do_timer(regs); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); @@ -500,8 +415,10 @@ void __init time_init(void) { +#ifdef CONFIG_IS_TICK_BASED if (system_timer->offset == NULL) system_timer->offset = dummy_gettimeoffset; +#endif system_timer->init(); #ifdef CONFIG_NO_IDLE_HZ diff -urN ./linux-2.6.18.1/arch/arm/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/traps.c --- ./linux-2.6.18.1/arch/arm/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -176,6 +176,7 @@ { #ifdef CONFIG_DEBUG_ERRORS __backtrace(); + print_traces(current); #endif } @@ -191,7 +192,7 @@ if (tsk != current) fp = thread_saved_fp(tsk); else - asm("mov%? %0, fp" : "=r" (fp)); + asm("mov %0, fp" : "=r" (fp) : : "cc"); c_backtrace(fp, 0x10); barrier(); @@ -216,7 +217,7 @@ } } -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -252,7 +253,7 @@ } static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_RAW_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { diff -urN ./linux-2.6.18.1/arch/arm/lib/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/lib/Makefile --- ./linux-2.6.18.1/arch/arm/lib/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/lib/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -41,6 +41,7 @@ lib-$(CONFIG_ARCH_CLPS7500) += io-acorn.o lib-$(CONFIG_ARCH_L7200) += io-acorn.o lib-$(CONFIG_ARCH_SHARK) += io-shark.o +lib-$(CONFIG_STACKTRACE) += stacktrace.o $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S diff -urN ./linux-2.6.18.1/arch/arm/lib/stacktrace.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/lib/stacktrace.c --- ./linux-2.6.18.1/arch/arm/lib/stacktrace.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/lib/stacktrace.c 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,77 @@ +#include +#include + +struct stackframe { + unsigned long fp; + unsigned long sp; + unsigned long lr; + unsigned long pc; +}; + +int walk_stackframe(unsigned long fp, unsigned long low, unsigned long high, + int (*fn)(struct stackframe *, void *), void *data) +{ + struct stackframe *frame; + + do { + /* + * Check current frame pointer is within bounds + */ + if ((fp - 12) < low || fp + 4 >= high) + break; + + frame = (struct stackframe *)(fp - 12); + + if (fn(frame, data)) + break; + + /* + * Update the low bound - the next frame must always + * be at a higher address than the current frame. + */ + low = fp + 4; + fp = frame->fp; + } while (fp); + + return 0; +} + +struct stack_trace_data { + struct stack_trace *trace; + unsigned int skip; +}; + +static int save_trace(struct stackframe *frame, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + + if (data->skip) { + data->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = frame->lr; + + return trace->nr_entries >= trace->max_entries; +} + +void save_stack_trace(struct stack_trace *trace, struct task_struct *task, + int all_contexts, unsigned int skip) +{ + struct stack_trace_data data; + unsigned long fp, base; + + data.trace = trace; + data.skip = skip; + + if (task) { + base = (unsigned long)task_stack_page(task); + fp = 0; + } else { + base = (unsigned long)task_stack_page(current); + asm("mov %0, fp" : "=r" (fp)); + } + + walk_stackframe(fp, base, base + THREAD_SIZE, save_trace, &data); +} diff -urN ./linux-2.6.18.1/arch/arm/mach-footbridge/netwinder-hw.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-footbridge/netwinder-hw.c --- ./linux-2.6.18.1/arch/arm/mach-footbridge/netwinder-hw.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-footbridge/netwinder-hw.c 2007-05-19 23:58:35.000000000 +0900 @@ -67,7 +67,7 @@ /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(gpio_lock); +DEFINE_RAW_SPINLOCK(gpio_lock); static unsigned int current_gpio_op; static unsigned int current_gpio_io; diff -urN ./linux-2.6.18.1/arch/arm/mach-footbridge/netwinder-leds.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-footbridge/netwinder-leds.c --- ./linux-2.6.18.1/arch/arm/mach-footbridge/netwinder-leds.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-footbridge/netwinder-leds.c 2007-05-19 23:58:35.000000000 +0900 @@ -32,7 +32,7 @@ static char hw_led_state; static DEFINE_SPINLOCK(leds_lock); -extern spinlock_t gpio_lock; +extern raw_spinlock_t gpio_lock; static void netwinder_leds_event(led_event_t evt) { diff -urN ./linux-2.6.18.1/arch/arm/mach-integrator/core.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-integrator/core.c --- ./linux-2.6.18.1/arch/arm/mach-integrator/core.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-integrator/core.c 2007-05-19 23:58:35.000000000 +0900 @@ -164,7 +164,7 @@ #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_RAW_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. diff -urN ./linux-2.6.18.1/arch/arm/mach-integrator/pci_v3.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-integrator/pci_v3.c --- ./linux-2.6.18.1/arch/arm/mach-integrator/pci_v3.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-integrator/pci_v3.c 2007-05-19 23:58:35.000000000 +0900 @@ -162,7 +162,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_RAW_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M diff -urN ./linux-2.6.18.1/arch/arm/mach-integrator/platsmp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-integrator/platsmp.c --- ./linux-2.6.18.1/arch/arm/mach-integrator/platsmp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-integrator/platsmp.c 2007-05-19 23:58:35.000000000 +0900 @@ -31,7 +31,7 @@ volatile int __cpuinitdata pen_release = -1; unsigned long __cpuinitdata phys_pen_release = 0; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { diff -urN ./linux-2.6.18.1/arch/arm/mach-ixp4xx/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-ixp4xx/Kconfig --- ./linux-2.6.18.1/arch/arm/mach-ixp4xx/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-ixp4xx/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -1,5 +1,9 @@ if ARCH_IXP4XX +config IS_TICK_BASED + bool + default n + config ARCH_SUPPORTS_BIG_ENDIAN bool default y diff -urN ./linux-2.6.18.1/arch/arm/mach-ixp4xx/common-pci.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-ixp4xx/common-pci.c --- ./linux-2.6.18.1/arch/arm/mach-ixp4xx/common-pci.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-ixp4xx/common-pci.c 2007-05-19 23:58:35.000000000 +0900 @@ -53,7 +53,7 @@ * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space diff -urN ./linux-2.6.18.1/arch/arm/mach-ixp4xx/common.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-ixp4xx/common.c --- ./linux-2.6.18.1/arch/arm/mach-ixp4xx/common.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-ixp4xx/common.c 2007-05-19 23:58:35.000000000 +0900 @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -38,6 +40,11 @@ #include #include +#ifdef CONFIG_HIGH_RES_TIMERS +static int __init ixp4xx_clockevent_init(void); +static struct clock_event clockevent_ixp4xx; +#endif + /************************************************************************* * IXP4xx chipset I/O mapping *************************************************************************/ @@ -253,25 +260,17 @@ static unsigned volatile last_jiffy_time; -#define CLOCK_TICKS_PER_USEC ((CLOCK_TICK_RATE + USEC_PER_SEC/2) / USEC_PER_SEC) - -/* IRQs are disabled before entering here from do_gettimeofday() */ -static unsigned long ixp4xx_gettimeoffset(void) -{ - u32 elapsed; - - elapsed = *IXP4XX_OSTS - last_jiffy_time; - - return elapsed / CLOCK_TICKS_PER_USEC; -} - static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - write_seqlock(&xtime_lock); - /* Clear Pending Interrupt by writing '1' to it */ *IXP4XX_OSST = IXP4XX_OSST_TIMER_1_PEND; +#ifdef CONFIG_HIGH_RES_TIMERS + if (clockevent_ixp4xx.event_handler) + clockevent_ixp4xx.event_handler(regs); +#else + write_seqlock(&xtime_lock); + /* * Catch up with the real idea of time */ @@ -281,6 +280,7 @@ } write_sequnlock(&xtime_lock); +#endif return IRQ_HANDLED; } @@ -299,17 +299,18 @@ /* Setup the Timer counter value */ *IXP4XX_OSRT1 = (LATCH & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE; - /* Reset time-stamp counter */ - *IXP4XX_OSTS = 0; last_jiffy_time = 0; /* Connect the interrupt handler and enable the interrupt */ setup_irq(IRQ_IXP4XX_TIMER1, &ixp4xx_timer_irq); + +#ifdef CONFIG_HIGH_RES_TIMERS + ixp4xx_clockevent_init(); +#endif } struct sys_timer ixp4xx_timer = { .init = ixp4xx_timer_init, - .offset = ixp4xx_gettimeoffset, }; static struct resource ixp46x_i2c_resources[] = { @@ -365,3 +366,70 @@ ixp4xx_exp_bus_size >> 20); } +cycle_t ixp4xx_get_cycles(void) +{ + return *IXP4XX_OSTS; +} + +static struct clocksource clocksource_ixp4xx = { + .name = "OSTS", + .rating = 200, + .read = ixp4xx_get_cycles, + .mask = 0xFFFFFFFF, + .shift = 20, + .is_continuous = 1, +}; + +static int __init ixp4xx_clocksource_init(void) +{ + /* Reset time-stamp counter */ + *IXP4XX_OSTS = 0; + + clocksource_ixp4xx.mult = + clocksource_khz2mult(66660, clocksource_ixp4xx.shift); + clocksource_register(&clocksource_ixp4xx); + + return 0; +} +device_initcall(ixp4xx_clocksource_init); + +#ifdef CONFIG_HIGH_RES_TIMERS +static u32 clockevent_mode = 0; + +static void ixp4xx_set_next_event(unsigned long evt, + struct clock_event *unused) +{ + u32 oneshot = (clockevent_mode == CLOCK_EVT_ONESHOT) ? + IXP4XX_OST_ONE_SHOT : 0; + + *IXP4XX_OSRT1 = (evt & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE | + oneshot; +} + +static void ixp4xx_set_mode(int mode, struct clock_event *evt) +{ + clockevent_mode = mode; +} + +static struct clock_event clockevent_ixp4xx = { + .name = "ixp4xx timer1", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_TICK | + CLOCK_CAP_UPDATE | CLOCK_CAP_PROFILE, + .shift = 32, + .set_mode = ixp4xx_set_mode, + .set_next_event = ixp4xx_set_next_event, +}; + +static int __init ixp4xx_clockevent_init(void) +{ + clockevent_ixp4xx.mult = div_sc(FREQ, NSEC_PER_SEC, + clockevent_ixp4xx.shift); + clockevent_ixp4xx.max_delta_ns = + clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx); + clockevent_ixp4xx.min_delta_ns = + clockevent_delta2ns(0xf, &clockevent_ixp4xx); + register_local_clockevent(&clockevent_ixp4xx); + + return 0; +} +#endif diff -urN ./linux-2.6.18.1/arch/arm/mach-omap1/pm.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-omap1/pm.c --- ./linux-2.6.18.1/arch/arm/mach-omap1/pm.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-omap1/pm.c 2007-05-19 23:58:35.000000000 +0900 @@ -120,7 +120,7 @@ local_irq_disable(); local_fiq_disable(); - if (need_resched()) { + if (need_resched() || need_resched_delayed()) { local_fiq_enable(); local_irq_enable(); return; diff -urN ./linux-2.6.18.1/arch/arm/mach-omap2/pm.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-omap2/pm.c --- ./linux-2.6.18.1/arch/arm/mach-omap2/pm.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-omap2/pm.c 2007-05-19 23:58:35.000000000 +0900 @@ -53,7 +53,7 @@ { local_irq_disable(); local_fiq_disable(); - if (need_resched()) { + if (need_resched() || need_resched_delayed()) { local_fiq_enable(); local_irq_enable(); return; diff -urN ./linux-2.6.18.1/arch/arm/mach-sa1100/badge4.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-sa1100/badge4.c --- ./linux-2.6.18.1/arch/arm/mach-sa1100/badge4.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-sa1100/badge4.c 2007-05-19 23:58:35.000000000 +0900 @@ -240,15 +240,22 @@ /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); GPCR = BADGE4_GPIO_PCMEN5V; } local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); diff -urN ./linux-2.6.18.1/arch/arm/mach-shark/leds.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-shark/leds.c --- ./linux-2.6.18.1/arch/arm/mach-shark/leds.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-shark/leds.c 2007-05-19 23:58:35.000000000 +0900 @@ -32,7 +32,7 @@ static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_RAW_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); diff -urN ./linux-2.6.18.1/arch/arm/mach-versatile/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-versatile/Kconfig --- ./linux-2.6.18.1/arch/arm/mach-versatile/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-versatile/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -1,6 +1,10 @@ menu "Versatile platform type" depends on ARCH_VERSATILE +config IS_TICK_BASED + bool + default n + config ARCH_VERSATILE_PB bool "Support Versatile/PB platform" default y diff -urN ./linux-2.6.18.1/arch/arm/mach-versatile/core.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-versatile/core.c --- ./linux-2.6.18.1/arch/arm/mach-versatile/core.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mach-versatile/core.c 2007-05-19 23:58:35.000000000 +0900 @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -808,59 +810,50 @@ #define TICKS2USECS(x) ((x) / TICKS_PER_uSEC) #endif -/* - * Returns number of ms since last clock interrupt. Note that interrupts - * will have been disabled by do_gettimeoffset() - */ -static unsigned long versatile_gettimeoffset(void) +#ifdef CONFIG_HIGH_RES_TIMERS +static void timer_set_mode(int mode, struct clock_event *clk) { - unsigned long ticks1, ticks2, status; - - /* - * Get the current number of ticks. Note that there is a race - * condition between us reading the timer and checking for - * an interrupt. We get around this by ensuring that the - * counter has not reloaded between our two reads. - */ - ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff; - do { - ticks1 = ticks2; - status = __raw_readl(VA_IC_BASE + VIC_RAW_STATUS); - ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff; - } while (ticks2 > ticks1); - - /* - * Number of ticks since last interrupt. - */ - ticks1 = TIMER_RELOAD - ticks2; - - /* - * Interrupt pending? If so, we've reloaded once already. - * - * FIXME: Need to check this is effectively timer 0 that expires - */ - if (status & IRQMASK_TIMERINT0_1) - ticks1 += TIMER_RELOAD; + if (mode == CLOCK_EVT_PERIODIC) { + writel(TIMER_CTRL_PERIODIC | TIMER_CTRL_32BIT | TIMER_CTRL_IE | + TIMER_CTRL_ENABLE, TIMER0_VA_BASE + TIMER_CTRL); + } else { + writel(TIMER_CTRL_ONESHOT | TIMER_CTRL_32BIT | TIMER_CTRL_IE | + TIMER_CTRL_ENABLE, TIMER0_VA_BASE + TIMER_CTRL); + } +} - /* - * Convert the ticks to usecs - */ - return TICKS2USECS(ticks1); +static void timer_set_next_event(unsigned long evt, struct clock_event *unused) +{ + BUG_ON(!evt); + writel(evt, TIMER0_VA_BASE + TIMER_LOAD); } +static struct clock_event timer0_clock = { + .name = "timer0", + .shift = 32, + .capabilities = CLOCK_CAP_TICK | CLOCK_CAP_UPDATE | + CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE, + .set_mode = timer_set_mode, + .set_next_event = timer_set_next_event, +}; +#endif + /* * IRQ handler for the timer */ static irqreturn_t versatile_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - write_seqlock(&xtime_lock); - // ...clear the interrupt writel(1, TIMER0_VA_BASE + TIMER_INTCLR); +#ifdef CONFIG_HIGH_RES_TIMERS + if (timer0_clock.event_handler) + timer0_clock.event_handler(regs); +#else + write_seqlock(&xtime_lock); timer_tick(regs); - write_sequnlock(&xtime_lock); +#endif return IRQ_HANDLED; } @@ -893,11 +886,20 @@ /* * Initialise to a known state (all timers off) */ - writel(0, TIMER0_VA_BASE + TIMER_CTRL); + writel(0, TIMER0_VA_BASE + TIMER_CTRL); writel(0, TIMER1_VA_BASE + TIMER_CTRL); writel(0, TIMER2_VA_BASE + TIMER_CTRL); writel(0, TIMER3_VA_BASE + TIMER_CTRL); +#ifdef CONFIG_HIGH_RES_TIMERS + timer0_clock.mult = div_sc(1000000, NSEC_PER_SEC, timer0_clock.shift); + timer0_clock.max_delta_ns = + clockevent_delta2ns(0xffffffff, &timer0_clock); + timer0_clock.min_delta_ns = + clockevent_delta2ns(0xf, &timer0_clock); + register_global_clockevent(&timer0_clock); +#endif + writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_LOAD); writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_VALUE); writel(TIMER_DIVISOR | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC | @@ -911,5 +913,36 @@ struct sys_timer versatile_timer = { .init = versatile_timer_init, - .offset = versatile_gettimeoffset, }; + +cycle_t versatile_get_cycles(void) +{ + return ~readl(TIMER3_VA_BASE + TIMER_VALUE); +} + +static struct clocksource clocksource_versatile = { + .name = "timer3", + .rating = 200, + .read = versatile_get_cycles, + .mask = 0xFFFFFFFF, + .shift = 20, + .is_continuous = 1, +}; + +static int __init versatile_clocksource_init(void) +{ + writel(0, TIMER3_VA_BASE + TIMER_CTRL); + writel(0xffffffff, TIMER3_VA_BASE + TIMER_LOAD); + writel(0xffffffff, TIMER3_VA_BASE + TIMER_VALUE); + writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, + TIMER3_VA_BASE + TIMER_CTRL); + + clocksource_versatile.mult = + clocksource_khz2mult(1000, clocksource_versatile.shift); + clocksource_register(&clocksource_versatile); + + return 0; +} + +device_initcall(versatile_clocksource_init); + diff -urN ./linux-2.6.18.1/arch/arm/mm/consistent.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/consistent.c --- ./linux-2.6.18.1/arch/arm/mm/consistent.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/consistent.c 2007-05-19 23:58:35.000000000 +0900 @@ -40,7 +40,7 @@ * These are the page tables (2MB each) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. diff -urN ./linux-2.6.18.1/arch/arm/mm/copypage-v4mc.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/copypage-v4mc.c --- ./linux-2.6.18.1/arch/arm/mm/copypage-v4mc.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/copypage-v4mc.c 2007-05-19 23:58:35.000000000 +0900 @@ -29,7 +29,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_page @@ -43,7 +43,7 @@ * instruction. If your processor does not supply this, you have to write your * own copy_user_page that does the right thing. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { asm volatile( @@ -82,7 +82,7 @@ /* * ARMv4 optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( diff -urN ./linux-2.6.18.1/arch/arm/mm/copypage-v6.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/copypage-v6.c --- ./linux-2.6.18.1/arch/arm/mm/copypage-v6.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/copypage-v6.c 2007-05-19 23:58:35.000000000 +0900 @@ -26,7 +26,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just diff -urN ./linux-2.6.18.1/arch/arm/mm/copypage-xscale.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/copypage-xscale.c --- ./linux-2.6.18.1/arch/arm/mm/copypage-xscale.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/copypage-xscale.c 2007-05-19 23:58:35.000000000 +0900 @@ -31,7 +31,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_page @@ -41,7 +41,7 @@ * Dcache aliasing issue. The writes will be forwarded to the write buffer, * and merged as appropriate. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { /* @@ -104,7 +104,7 @@ /* * XScale optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( diff -urN ./linux-2.6.18.1/arch/arm/mm/fault.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/fault.c --- ./linux-2.6.18.1/arch/arm/mm/fault.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/fault.c 2007-05-19 23:58:35.000000000 +0900 @@ -215,7 +215,7 @@ return fault; } -static int +static notrace int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; @@ -315,7 +315,7 @@ * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ -static int +static notrace int do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -361,7 +361,7 @@ * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ -static int +static notrace int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk = current; @@ -372,7 +372,7 @@ /* * This abort handler always returns "fault". */ -static int +static notrace int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { return 1; @@ -427,7 +427,7 @@ { do_bad, SIGBUS, 0, "unknown 31" } }; -void __init +void __init notrace hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, const char *name) { @@ -441,7 +441,7 @@ /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void +asmlinkage notrace void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); @@ -460,7 +460,7 @@ notify_die("", regs, &info, fsr, 0); } -asmlinkage void +asmlinkage notrace void do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { do_translation_fault(addr, 0, regs); diff -urN ./linux-2.6.18.1/arch/arm/mm/init.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/init.c --- ./linux-2.6.18.1/arch/arm/mm/init.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/mm/init.c 2007-05-19 23:58:35.000000000 +0900 @@ -25,7 +25,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; diff -urN ./linux-2.6.18.1/arch/arm/plat-omap/clock.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/clock.c --- ./linux-2.6.18.1/arch/arm/plat-omap/clock.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/clock.c 2007-05-19 23:58:35.000000000 +0900 @@ -29,7 +29,7 @@ static LIST_HEAD(clocks); static DEFINE_MUTEX(clocks_mutex); -static DEFINE_SPINLOCK(clockfw_lock); +static DEFINE_RAW_SPINLOCK(clockfw_lock); static struct clk_functions *arch_clock; diff -urN ./linux-2.6.18.1/arch/arm/plat-omap/dma.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/dma.c --- ./linux-2.6.18.1/arch/arm/plat-omap/dma.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/dma.c 2007-05-19 23:58:35.000000000 +0900 @@ -949,7 +949,7 @@ /*----------------------------------------------------------------------------*/ static struct lcd_dma_info { - spinlock_t lock; + raw_spinlock_t lock; int reserved; void (* callback)(u16 status, void *data); void *cb_data; diff -urN ./linux-2.6.18.1/arch/arm/plat-omap/gpio.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/gpio.c --- ./linux-2.6.18.1/arch/arm/plat-omap/gpio.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/gpio.c 2007-05-19 23:58:35.000000000 +0900 @@ -120,7 +120,7 @@ u32 reserved_map; u32 suspend_wakeup; u32 saved_wakeup; - spinlock_t lock; + raw_spinlock_t lock; }; #define METHOD_MPUIO 0 diff -urN ./linux-2.6.18.1/arch/arm/plat-omap/mux.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/mux.c --- ./linux-2.6.18.1/arch/arm/plat-omap/mux.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/mux.c 2007-05-19 23:58:35.000000000 +0900 @@ -56,7 +56,7 @@ */ int __init_or_module omap_cfg_reg(const unsigned long index) { - static DEFINE_SPINLOCK(mux_spin_lock); + static DEFINE_RAW_SPINLOCK(mux_spin_lock); unsigned long flags; struct pin_config *cfg; diff -urN ./linux-2.6.18.1/arch/arm/plat-omap/pm.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/pm.c --- ./linux-2.6.18.1/arch/arm/plat-omap/pm.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/arm/plat-omap/pm.c 2007-05-19 23:58:35.000000000 +0900 @@ -84,7 +84,7 @@ local_irq_disable(); local_fiq_disable(); - if (need_resched()) { + if (need_resched() || need_resched_delayed()) { local_fiq_enable(); local_irq_enable(); return; diff -urN ./linux-2.6.18.1/arch/h8300/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/h8300/Kconfig --- ./linux-2.6.18.1/arch/h8300/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/h8300/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -41,6 +41,10 @@ bool default y +config GENERIC_TIME + bool + default y + config TIME_LOW_RES bool default y diff -urN ./linux-2.6.18.1/arch/h8300/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/h8300/kernel/time.c --- ./linux-2.6.18.1/arch/h8300/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/h8300/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -68,58 +68,6 @@ platform_timer_setup(timer_interrupt); } -/* - * This version of gettimeofday has near microsecond resolution. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags; - unsigned long usec, sec; - - read_lock_irqsave(&xtime_lock, flags); - usec = 0; - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / 1000); - read_unlock_irqrestore(&xtime_lock, flags); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_lock_irq(&xtime_lock); - /* This is revolting. We need to set the xtime.tv_usec - * correctly. However, the value in this location is - * is value at the last tick. - * Discover what correction gettimeofday - * would have done, and then undo it! - */ - while (tv->tv_nsec < 0) { - tv->tv_nsec += NSEC_PER_SEC; - tv->tv_sec--; - } - - xtime.tv_sec = tv->tv_sec; - xtime.tv_nsec = tv->tv_nsec; - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - unsigned long long sched_clock(void) { return (unsigned long long)jiffies * (1000000000 / HZ); diff -urN ./linux-2.6.18.1/arch/i386/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig --- ./linux-2.6.18.1/arch/i386/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig 2007-05-20 14:14:28.000000000 +0900 @@ -65,6 +65,8 @@ menu "Processor type and features" +source "kernel/time/Kconfig" + config SMP bool "Symmetric multi-processing support" ---help--- @@ -261,6 +263,19 @@ source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + default y if !RWSEM_GENERIC_SPINLOCK + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) @@ -708,6 +723,7 @@ config REGPARM bool "Use register arguments" + depends on !MCOUNT default y help Compile the kernel with -mregparm=3. This instructs gcc to use @@ -791,6 +807,10 @@ enable suspend on SMP systems. CPUs can be controlled through /sys/devices/system/cpu. +config GENERIC_TIME_VSYSCALL + depends on EXPERIMENTAL + bool "VSYSCALL gettimeofday() interface" + config COMPAT_VDSO bool "Compat VDSO support" default y @@ -803,8 +823,11 @@ If unsure, say Y. +source "drivers/cabi/Kconfig" + endmenu + config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on HIGHMEM @@ -966,6 +989,7 @@ source "arch/i386/kernel/cpu/cpufreq/Kconfig" + endmenu menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff -urN ./linux-2.6.18.1/arch/i386/Kconfig.cpu linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig.cpu --- ./linux-2.6.18.1/arch/i386/Kconfig.cpu 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig.cpu 2007-05-19 23:58:35.000000000 +0900 @@ -235,11 +235,6 @@ depends on M386 default y -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - config GENERIC_CALIBRATE_DELAY bool default y diff -urN ./linux-2.6.18.1/arch/i386/Kconfig.debug linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig.debug --- ./linux-2.6.18.1/arch/i386/Kconfig.debug 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig.debug 2007-05-19 23:58:35.000000000 +0900 @@ -22,6 +22,7 @@ config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + default y help This option will cause messages to be printed if free stack space drops below a certain limit. @@ -29,6 +30,7 @@ config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL + default y help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. @@ -49,6 +51,7 @@ config DEBUG_RODATA bool "Write protect kernel read-only data structures" depends on DEBUG_KERNEL + default y help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const @@ -59,6 +62,7 @@ config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on DEBUG_KERNEL + default y help If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates diff -urN ./linux-2.6.18.1/arch/i386/Kconfig.orig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig.orig --- ./linux-2.6.18.1/arch/i386/Kconfig.orig 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/Kconfig.orig 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,1209 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "Linux Kernel Configuration" + +config X86_32 + bool + default y + help + This is Linux's home port. Linux was originally native to the Intel + 386, and runs on all the later x86 processors including the Intel + 486, 586, Pentiums, and various instruction-set-compatible chips by + AMD, Cyrix, and others. + +config GENERIC_TIME + bool + default y + +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + +config SEMAPHORE_SLEEPERS + bool + default y + +config X86 + bool + default y + +config MMU + bool + default y + +config SBUS + bool + +config GENERIC_ISA_DMA + bool + default y + +config GENERIC_IOMAP + bool + default y + +config GENERIC_HWEIGHT + bool + default y + +config ARCH_MAY_HAVE_PC_FDC + bool + default y + +config DMI + bool + default y + +source "init/Kconfig" + +menu "Processor type and features" + +source "kernel/time/Kconfig" + +config SMP + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also the , + , + and the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +choice + prompt "Subarchitecture Type" + default X86_PC + +config X86_PC + bool "PC-compatible" + help + Choose this option if your computer is a standard PC or compatible. + +config X86_ELAN + bool "AMD Elan" + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + +config X86_VOYAGER + bool "Voyager (NCR)" + help + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + select SMP + select NUMA + help + This option is used for getting Linux to run on a (IBM/Sequent) NUMA + multiquad box. This changes the way that processors are bootstrapped, + and uses Clustered Logical APIC addressing mode instead of Flat Logical. + You will need a new lynxer.elf file to flash your firmware with - send + email to . + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. + + If you don't have one of these computers, you should say N here. + If you want to build a NUMA kernel, you must select ACPI. + +config X86_BIGSMP + bool "Support for other sub-arch SMP systems with more than 8 CPUs" + depends on SMP + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + + If you don't have such a system, you should say N here. + +config X86_VISWS + bool "SGI 320/540 (Visual Workstation)" + help + The SGI Visual Workstation series is an IA32-based workstation + based on SGI systems chips with some legacy PC hardware attached. + + Say Y here to create a kernel to run on the SGI 320 or 540. + + A kernel compiled for the Visual Workstation will not run on PCs + and vice versa. See for details. + +config X86_GENERICARCH + bool "Generic architecture (Summit, bigsmp, ES7000, default)" + depends on SMP + help + This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. + It is intended for a generic binary kernel. + If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + +config X86_ES7000 + bool "Support for Unisys ES7000 IA32 series" + depends on SMP + help + Support for Unisys ES7000 systems. Say 'Y' here if this kernel is + supposed to run on an IA32-based Unisys ES7000 system. + Only choose this option if you have such a system, otherwise you + should say N here. + +endchoice + +config ACPI_SRAT + bool + default y + depends on ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + select ACPI_NUMA + +config HAVE_ARCH_PARSE_SRAT + bool + default y + depends on ACPI_SRAT + +config X86_SUMMIT_NUMA + bool + default y + depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) + +config X86_CYCLONE_TIMER + bool + default y + depends on X86_SUMMIT || X86_GENERICARCH + +config ES7000_CLUSTERED_APIC + bool + default y + depends on SMP && X86_ES7000 && MPENTIUMIII + +source "arch/i386/Kconfig.cpu" + +config HPET_TIMER + bool "HPET Timer Support" + help + This enables the use of the HPET for the kernel's internal timer. + HPET is the next generation timer replacing legacy 8254s. + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. + + Choose N to continue using the legacy 8254 timer. + +config HPET_EMULATE_RTC + bool + depends on HPET_TIMER && RTC=y + default y + +config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 + depends on SMP + default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 + default "8" + help + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 255 and the + minimum value which makes sense is 2. + + This is purely to save memory - each supported CPU adds + approximately eight kilobytes to the kernel image. + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on X86_HT + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on X86_HT + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +source "kernel/Kconfig.preempt" + +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + default y if !RWSEM_GENERIC_SPINLOCK + +config X86_UP_APIC + bool "Local APIC support on uniprocessors" + depends on !SMP && !(X86_VISWS || X86_VOYAGER) + help + A local APIC (Advanced Programmable Interrupt Controller) is an + integrated interrupt controller in the CPU. If you have a single-CPU + system which has a processor with a local APIC, you can say Y here to + enable and use it. If you say Y here even though your machine doesn't + have a local APIC, then the kernel will still run with no slowdown at + all. The local APIC supports CPU-generated self-interrupts (timer, + performance counters), and the NMI watchdog which detects hard + lockups. + +config X86_UP_IOAPIC + bool "IO-APIC support on uniprocessors" + depends on X86_UP_APIC + help + An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an + SMP-capable replacement for PC-style interrupt controllers. Most + SMP systems and many recent uniprocessor systems have one. + + If you have a single-CPU system with an IO-APIC, you can say Y here + to use it. If you say Y here even though your machine doesn't have + an IO-APIC, then the kernel will still run with no slowdown at all. + +config X86_LOCAL_APIC + bool + depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) + default y + +config X86_IO_APIC + bool + depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) + default y + +config X86_VISWS_APIC + bool + depends on X86_VISWS + default y + +config X86_MCE + bool "Machine Check Exception" + depends on !X86_VOYAGER + ---help--- + Machine Check Exception support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, component failure). + The action the kernel takes depends on the severity of the problem, + ranging from a warning message on the console, to halting the machine. + Your processor must be a Pentium or newer to support this - check the + flags in /proc/cpuinfo for mce. Note that some older Pentium systems + have a design flaw which leads to false MCE events - hence MCE is + disabled on all P5 processors, unless explicitly enabled with "mce" + as a boot argument. Similarly, if MCE is built in and creates a + problem on some new non-standard machine, you can boot with "nomce" + to disable it. MCE support simply ignores non-MCE processors like + the 386 and 486, so nearly everyone can say Y here. + +config X86_MCE_NONFATAL + tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" + depends on X86_MCE + help + Enabling this feature starts a timer that triggers every 5 seconds which + will look at the machine check registers to see if anything happened. + Non-fatal problems automatically get corrected (but still logged). + Disable this if you don't want to see these messages. + Seeing the messages this option prints out may be indicative of dying hardware, + or out-of-spec (ie, overclocked) hardware. + This option only does something on certain CPUs. + (AMD Athlon/Duron and Intel Pentium 4) + +config X86_MCE_P4THERMAL + bool "check for P4 thermal throttling interrupt." + depends on X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS + help + Enabling this feature will cause a message to be printed when the P4 + enters thermal throttling. + +config VM86 + default y + bool "Enable VM86 support" if EMBEDDED + help + This option is required by programs like DOSEMU to run 16-bit legacy + code on X86 processors. It also may be needed by software like + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + +config TOSHIBA + tristate "Toshiba Laptop support" + ---help--- + This adds a driver to safely access the System Management Mode of + the CPU on Toshiba portables with a genuine Toshiba BIOS. It does + not work on models with a Phoenix BIOS. The System Management Mode + is used to set the BIOS and power saving options on Toshiba portables. + + For information on utilities to make use of this driver see the + Toshiba Linux utilities web site at: + . + + Say Y if you intend to run this kernel on a Toshiba portable. + Say N otherwise. + +config I8K + tristate "Dell laptop support" + ---help--- + This adds a driver to safely access the System Management Mode + of the CPU on the Dell Inspiron 8000. The System Management Mode + is used to read cpu temperature and cooling fan status and to + control the fans on the I8K portables. + + This driver has been tested only on the Inspiron 8000 but it may + also work with other Dell laptops. You can force loading on other + models by passing the parameter `force=1' to the module. Use at + your own risk. + + For information on utilities to make use of this driver see the + I8K Linux utilities web site at: + + + Say Y if you intend to run this kernel on a Dell Inspiron 8000. + Say N otherwise. + +config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" + depends on X86 + default n + ---help--- + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on + some combinations of hardware and BIOS. The symptom, for which + this config is intended, is when reboot ends with a stalled/hung + system. + + Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1. + combination. + + Say Y if you want to enable the fixup. Currently, it's safe to + enable this option even if you don't need it. + Say N otherwise. + +config MICROCODE + tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" + ---help--- + If you say Y here and also to "/dev file system support" in the + 'File systems' section, you will be able to update the microcode on + Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II, + Pentium III, Pentium 4, Xeon etc. You will obviously need the + actual microcode binary data itself which is not shipped with the + Linux kernel. + + For latest news and information on obtaining all the required + ingredients for this driver, check: + . + + To compile this driver as a module, choose M here: the + module will be called microcode. + +config X86_MSR + tristate "/dev/cpu/*/msr - Model-specific register support" + help + This device gives privileged processes access to the x86 + Model-Specific Registers (MSRs). It is a character device with + major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. + MSR accesses are directed to a specific CPU on multi-processor + systems. + +config X86_CPUID + tristate "/dev/cpu/*/cpuid - CPU information support" + help + This device gives processes access to the x86 CPUID instruction to + be executed on a specific processor. It is a character device + with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to + /dev/cpu/31/cpuid. + +source "drivers/firmware/Kconfig" + +choice + prompt "High Memory Support" + default NOHIGHMEM + +config NOHIGHMEM + bool "off" + depends on !X86_NUMAQ + ---help--- + Linux can use up to 64 Gigabytes of physical memory on x86 systems. + However, the address space of 32-bit x86 processors is only 4 + Gigabytes large. That means that, if you have a large amount of + physical memory, not all of it can be "permanently mapped" by the + kernel. The physical memory that's not permanently mapped is called + "high memory". + + If you are compiling a kernel which will never run on a machine with + more than 1 Gigabyte total physical RAM, answer "off" here (default + choice and suitable for most users). This will result in a "3GB/1GB" + split: 3GB are mapped so that each process sees a 3GB virtual memory + space and the remaining part of the 4GB virtual memory space is used + by the kernel to permanently map as much physical memory as + possible. + + If the machine has between 1 and 4 Gigabytes physical RAM, then + answer "4GB" here. + + If more than 4 Gigabytes is used then answer "64GB" here. This + selection turns Intel PAE (Physical Address Extension) mode on. + PAE implements 3-level paging on IA32 processors. PAE is fully + supported by Linux, PAE mode is implemented on all recent Intel + processors (Pentium Pro and better). NOTE: If you say "64GB" here, + then the kernel will not boot on CPUs that don't support PAE! + + The actual amount of total physical memory will either be + auto detected or can be forced by using a kernel command line option + such as "mem=256M". (Try "man bootparam" or see the documentation of + your boot loader (lilo or loadlin) about how to pass options to the + kernel at boot time.) + + If unsure, say "off". + +config HIGHMEM4G + bool "4GB" + depends on !X86_NUMAQ + help + Select this if you have a 32-bit processor and between 1 and 4 + gigabytes of physical RAM. + +config HIGHMEM64G + bool "64GB" + depends on X86_CMPXCHG64 + help + Select this if you have a 32-bit processor and more than 4 + gigabytes of physical RAM. + +endchoice + +choice + depends on EXPERIMENTAL && !X86_PAE + prompt "Memory split" if EMBEDDED + default VMSPLIT_3G + help + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x78000000 if VMSPLIT_2G + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + +config HIGHMEM + bool + depends on HIGHMEM64G || HIGHMEM4G + default y + +config X86_PAE + bool + depends on HIGHMEM64G + default y + select RESOURCES_64BIT + +# Common NUMA Features +config NUMA + bool "Numa Memory Allocation and Scheduler Support" + depends on SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) + default n if X86_PC + default y if (X86_NUMAQ || X86_SUMMIT) + +comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" + depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) + +config NODES_SHIFT + int + default "4" if X86_NUMAQ + default "3" + depends on NEED_MULTIPLE_NODES + +config HAVE_ARCH_BOOTMEM_NODE + bool + depends on NUMA + default y + +config ARCH_HAVE_MEMORY_PRESENT + bool + depends on DISCONTIGMEM + default y + +config NEED_NODE_MEMMAP_SIZE + bool + depends on DISCONTIGMEM || SPARSEMEM + default y + +config HAVE_ARCH_ALLOC_REMAP + bool + depends on NUMA + default y + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on (ARCH_SELECT_MEMORY_MODEL && X86_PC) + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on (NUMA || (X86_PC && EXPERIMENTAL)) + select SPARSEMEM_STATIC + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on ARCH_SPARSEMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + bool + default y + depends on NUMA + +config HIGHPTE + bool "Allocate 3rd-level pagetables from highmem" + depends on HIGHMEM4G || HIGHMEM64G + help + The VM uses one page table entry for each page of physical memory. + For systems with a lot of RAM, this can be wasteful of precious + low memory. Setting this option will put user-space page table + entries in high memory. + +config MATH_EMULATION + bool "Math emulation" + ---help--- + Linux can emulate a math coprocessor (used for floating point + operations) if you don't have one. 486DX and Pentium processors have + a math coprocessor built in, 486SX and 386 do not, unless you added + a 487DX or 387, respectively. (The messages during boot time can + give you some hints here ["man dmesg"].) Everyone needs either a + coprocessor or this emulation. + + If you don't have a math coprocessor, you need to say Y here; if you + say Y here even though you have a coprocessor, the coprocessor will + be used nevertheless. (This behavior can be changed with the kernel + command line option "no387", which comes handy if your coprocessor + is broken. Try "man bootparam" or see the documentation of your boot + loader (lilo or loadlin) about how to pass options to the kernel at + boot time.) This means that it is a good idea to say Y here if you + intend to use this kernel on different machines. + + More information about the internals of the Linux math coprocessor + emulation can be found in . + + If you are not sure, say Y; apart from resulting in a 66 KB bigger + kernel, it won't hurt. + +config MTRR + bool "MTRR (Memory Type Range Register) support" + ---help--- + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control + processor access to memory ranges. This is most useful if you have + a video (VGA) card on a PCI or AGP bus. Enabling write-combining + allows bus write transfers to be combined into a larger transfer + before bursting over the PCI/AGP bus. This can increase performance + of image write operations 2.5 times or more. Saying Y here creates a + /proc/mtrr file which may be used to manipulate your processor's + MTRRs. Typically the X server should use this. + + This code has a reasonably generic interface so that similar + control registers on other processors can be easily supported + as well: + + The Cyrix 6x86, 6x86MX and M II processors have Address Range + Registers (ARRs) which provide a similar functionality to MTRRs. For + these, the ARRs are used to emulate the MTRRs. + The AMD K6-2 (stepping 8 and above) and K6-3 processors have two + MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing + write-combining. All of these processors are supported by this code + and it makes sense to say Y here if you have one of them. + + Saying Y here also fixes a problem with buggy SMP BIOSes which only + set the MTRRs for the boot CPU and not for the secondary CPUs. This + can lead to all sorts of problems, so it's good to say Y here. + + You can safely say Y even if your machine doesn't have MTRRs, you'll + just add about 9 KB to your kernel. + + See for more information. + +config EFI + bool "Boot from EFI support" + depends on ACPI + default n + ---help--- + This enables the the kernel to boot on EFI platforms using + system configuration information passed to it from the firmware. + This also enables the kernel to use any EFI runtime services that are + available (such as the EFI variable services). + + This option is only useful on systems that have EFI firmware + and will result in a kernel image that is ~8k larger. In addition, + you must use the latest ELILO loader available at + in order to take advantage of + kernel initialization using EFI information (neither GRUB nor LILO know + anything about EFI). However, even with this option, the resultant + kernel should continue to boot on existing non-EFI platforms. + +config IRQBALANCE + bool "Enable kernel irq balancing" + depends on SMP && X86_IO_APIC + default y + help + The default yes will allow the kernel to do irq load balancing. + Saying no will keep the kernel from doing irq load balancing. + +# turning this on wastes a bunch of space. +# Summit needs it only when NUMA is on +config BOOT_IOREMAP + bool + depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) + default y + +config REGPARM + bool "Use register arguments" + depends on !MCOUNT + default y + help + Compile the kernel with -mregparm=3. This instructs gcc to use + a more efficient function call ABI which passes the first three + arguments of a function call via registers, which results in denser + and faster code. + + If this option is disabled, then the default ABI of passing + arguments via the stack is used. + + If unsure, say Y. + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc//seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +source kernel/Kconfig.hz + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on HIGHMEM + help + Generate crash dump after being started by kexec. + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + + default "0x1000000" if CRASH_DUMP + default "0x100000" + help + This gives the physical address where the kernel is loaded. Normally + for regular kernels this value is 0x100000 (1MB). But in the case + of kexec on panic the fail safe kernel needs to run at a different + address than the panic-ed kernel. This option is used to set the load + address for kernels used to capture crash dump on being kexec'ed + after panic. The default value for crash dump kernels is + 0x1000000 (16MB). This can also be set based on the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel. Typically this parameter is set as + crashkernel=64M@16M. Please take a look at + Documentation/kdump/kdump.txt for more details about crash dumps. + + Don't change this unless you know what you are doing. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + ---help--- + Say Y here to experiment with turning CPUs off and on, and to + enable suspend on SMP systems. CPUs can be controlled through + /sys/devices/system/cpu. + +config GENERIC_TIME_VSYSCALL + depends on EXPERIMENTAL + bool "VSYSCALL gettimeofday() interface" + +config COMPAT_VDSO + bool "Compat VDSO support" + default y + help + Map the VDSO to the predictable old-style address too. + ---help--- + Say N here if you are running a sufficiently recent glibc + version (2.3.3 or later), to remove the high-mapped + VDSO mapping and to exclusively use the randomized VDSO. + + If unsure, say Y. + +endmenu + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on HIGHMEM + +menu "Power management options (ACPI, APM)" + depends on !X86_VOYAGER + +source kernel/power/Kconfig + +source "drivers/acpi/Kconfig" + +menu "APM (Advanced Power Management) BIOS Support" +depends on PM && !X86_VISWS + +config APM + tristate "APM (Advanced Power Management) BIOS support" + depends on PM + ---help--- + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + If you select "Y" here, you can disable actual use of the APM + BIOS by passing the "apm=off" option to the kernel at boot time. + + Note that the APM support is almost completely disabled for + machines with more than one CPU. + + In order to use APM, you will need supporting software. For location + and more information, read and the + Battery Powered Linux mini-HOWTO, available from + . + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + This driver does not support the TI 4000M TravelMate and the ACER + 486/DX4/75 because they don't have compliant BIOSes. Many "green" + desktop machines also don't have compliant BIOSes, and this driver + may cause those machines to panic during the boot phase. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + + Some other things you should try when experiencing seemingly random, + "weird" problems: + + 1) make sure that you have enough swap space and that it is + enabled. + 2) pass the "no-hlt" option to the kernel + 3) switch on floating point emulation in the kernel and pass + the "no387" option to the kernel + 4) pass the "floppy=nodma" option to the kernel + 5) pass the "mem=4M" option to the kernel (thereby disabling + all but the first 4 MB of RAM) + 6) make sure that the CPU is not over clocked. + 7) read the sig11 FAQ at + 8) disable the cache from your BIOS settings + 9) install a fan for the video card or exchange video RAM + 10) install a better fan for the CPU + 11) exchange RAM chips + 12) exchange the motherboard. + + To compile this driver as a module, choose M here: the + module will be called apm. + +config APM_IGNORE_USER_SUSPEND + bool "Ignore USER SUSPEND" + depends on APM + help + This option will ignore USER SUSPEND requests. On machines with a + compliant APM BIOS, you want to say N. However, on the NEC Versa M + series notebooks, it is necessary to say Y because of a BIOS bug. + +config APM_DO_ENABLE + bool "Enable PM at boot time" + depends on APM + ---help--- + Enable APM features at boot time. From page 36 of the APM BIOS + specification: "When disabled, the APM BIOS does not automatically + power manage devices, enter the Standby State, enter the Suspend + State, or take power saving steps in response to CPU Idle calls." + This driver will make CPU Idle calls when Linux is idle (unless this + feature is turned off -- see "Do CPU IDLE calls", below). This + should always save battery power, but more complicated APM features + will be dependent on your BIOS implementation. You may need to turn + this option off if your computer hangs at boot time when using APM + support, or if it beeps continuously instead of suspending. Turn + this off if you have a NEC UltraLite Versa 33/C or a Toshiba + T400CDT. This is off by default since most machines do fine without + this feature. + +config APM_CPU_IDLE + bool "Make CPU Idle calls when idle" + depends on APM + help + Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. + On some machines, this can activate improved power savings, such as + a slowed CPU clock rate, when the machine is idle. These idle calls + are made after the idle loop has run for some length of time (e.g., + 333 mS). On some machines, this will cause a hang at boot time or + whenever the CPU becomes idle. (On machines with more than one CPU, + this option does nothing.) + +config APM_DISPLAY_BLANK + bool "Enable console blanking using APM" + depends on APM + help + Enable console blanking using the APM. Some laptops can use this to + turn off the LCD backlight when the screen blanker of the Linux + virtual console blanks the screen. Note that this is only used by + the virtual console screen blanker, and won't turn off the backlight + when using the X Window system. This also doesn't have anything to + do with your VESA-compliant power-saving monitor. Further, this + option doesn't work for all laptops -- it might not turn off your + backlight at all, or it might print a lot of errors to the console, + especially if you are using gpm. + +config APM_RTC_IS_GMT + bool "RTC stores time in GMT" + depends on APM + help + Say Y here if your RTC (Real Time Clock a.k.a. hardware clock) + stores the time in GMT (Greenwich Mean Time). Say N if your RTC + stores localtime. + + It is in fact recommended to store GMT in your RTC, because then you + don't have to worry about daylight savings time changes. The only + reason not to use GMT in your RTC is if you also run a broken OS + that doesn't understand GMT. + +config APM_ALLOW_INTS + bool "Allow interrupts during APM BIOS calls" + depends on APM + help + Normally we disable external interrupts while we are making calls to + the APM BIOS as a measure to lessen the effects of a badly behaving + BIOS implementation. The BIOS should reenable interrupts if it + needs to. Unfortunately, some BIOSes do not -- especially those in + many of the newer IBM Thinkpads. If you experience hangs when you + suspend, try setting this to Y. Otherwise, say N. + +config APM_REAL_MODE_POWER_OFF + bool "Use real mode APM BIOS call to power off" + depends on APM + help + Use real mode APM BIOS calls to switch off the computer. This is + a work-around for a number of buggy BIOSes. Switch this option on if + your computer crashes instead of powering off properly. + +endmenu + +source "arch/i386/kernel/cpu/cpufreq/Kconfig" + +endmenu + +menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" + +config PCI + bool "PCI support" if !X86_VISWS + depends on !X86_VOYAGER + default y if X86_VISWS + help + Find out whether you have a PCI motherboard. PCI is the name of a + bus system, i.e. the way the CPU talks to the other stuff inside + your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or + VESA. If you have PCI, say Y, otherwise N. + + The PCI-HOWTO, available from + , contains valuable + information about which PCI hardware does work under Linux and which + doesn't. + +choice + prompt "PCI access mode" + depends on PCI && !X86_VISWS + default PCI_GOANY + ---help--- + On PCI systems, the BIOS can be used to detect the PCI devices and + determine their configuration. However, some old PCI motherboards + have BIOS bugs and may crash if this is done. Also, some embedded + PCI-based systems don't have any BIOS at all. Linux can also try to + detect the PCI hardware directly without using the BIOS. + + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOBIOS + bool "BIOS" + +config PCI_GOMMCONFIG + bool "MMConfig" + +config PCI_GODIRECT + bool "Direct" + +config PCI_GOANY + bool "Any" + +endchoice + +config PCI_BIOS + bool + depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + default y + +config PCI_DIRECT + bool + depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + default y + +config PCI_MMCONFIG + bool + depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) + default y + +source "drivers/pci/pcie/Kconfig" + +source "drivers/pci/Kconfig" + +config ISA_DMA_API + bool + default y + +config ISA + bool "ISA support" + depends on !(X86_VOYAGER || X86_VISWS) + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +config EISA + bool "EISA support" + depends on ISA + ---help--- + The Extended Industry Standard Architecture (EISA) bus was + developed as an open alternative to the IBM MicroChannel bus. + + The EISA bus provided some of the features of the IBM MicroChannel + bus while maintaining backward compatibility with cards made for + the older ISA bus. The EISA bus saw limited use between 1988 and + 1995 when it was made obsolete by the PCI bus. + + Say Y here if you are building a kernel for an EISA-based machine. + + Otherwise, say N. + +source "drivers/eisa/Kconfig" + +config MCA + bool "MCA support" if !(X86_VISWS || X86_VOYAGER) + default y if X86_VOYAGER + help + MicroChannel Architecture is found in some IBM PS/2 machines and + laptops. It is a bus system similar to PCI or ISA. See + (and especially the web page given + there) before attempting to build an MCA bus kernel. + +source "drivers/mca/Kconfig" + +config SCx200 + tristate "NatSemi SCx200 support" + depends on !X86_VOYAGER + help + This provides basic support for National Semiconductor's + (now AMD's) Geode processors. The driver probes for the + PCI-IDs of several on-chip devices, so its a good dependency + for other scx200_* drivers. + + If compiled as a module, the driver is named scx200. + +config SCx200HR_TIMER + tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" + depends on SCx200 && GENERIC_TIME + default y + help + This driver provides a clocksource built upon the on-chip + 27MHz high-resolution timer. Its also a workaround for + NSC Geode SC-1100's buggy TSC, which loses time when the + processor goes idle (as is done by the scheduler). The + other workaround is idle=poll boot option. + +config K8_NB + def_bool y + depends on AGP_AMD64 + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +endmenu + +menu "Executable file formats" + +source "fs/Kconfig.binfmt" + +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +menu "Instrumentation Support" + depends on EXPERIMENTAL + +source "arch/i386/oprofile/Kconfig" + +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + depends on EXPERIMENTAL && MODULES + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". +endmenu + +source "arch/i386/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +# +# Use the generic interrupt handling code in kernel/irq/: +# +config GENERIC_HARDIRQS + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + +config X86_SMP + bool + depends on SMP && !X86_VOYAGER + default y + +config X86_HT + bool + depends on SMP && !(X86_VISWS || X86_VOYAGER) + default y + +config X86_BIOS_REBOOT + bool + depends on !(X86_VISWS || X86_VOYAGER) + default y + +config X86_TRAMPOLINE + bool + depends on X86_SMP || (X86_VOYAGER && SMP) + default y + +config KTIME_SCALAR + bool + default y diff -urN ./linux-2.6.18.1/arch/i386/boot/compressed/misc.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/boot/compressed/misc.c --- ./linux-2.6.18.1/arch/i386/boot/compressed/misc.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/boot/compressed/misc.c 2007-05-19 23:58:35.000000000 +0900 @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_MCOUNT +void notrace mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -107,7 +113,7 @@ #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; diff -urN ./linux-2.6.18.1/arch/i386/kernel/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/Makefile --- ./linux-2.6.18.1/arch/i386/kernel/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -4,7 +4,7 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ +obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o bootflag.o \ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o @@ -12,6 +12,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ +obj-$(CONFIG_GENERIC_TIME_VSYSCALL) += vsyscall-gtod.o obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o @@ -20,6 +21,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o @@ -30,6 +32,7 @@ obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o diff -urN ./linux-2.6.18.1/arch/i386/kernel/acpi/boot.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/acpi/boot.c --- ./linux-2.6.18.1/arch/i386/kernel/acpi/boot.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/acpi/boot.c 2007-05-19 23:58:35.000000000 +0900 @@ -53,8 +53,6 @@ #include #endif /* CONFIG_X86_LOCAL_APIC */ -static inline int gsi_irq_sharing(int gsi) { return gsi; } - #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ @@ -459,12 +457,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { -#ifdef CONFIG_X86_IO_APIC - if (use_pci_vector() && !platform_legacy_irq(gsi)) - *irq = IO_APIC_VECTOR(gsi); - else -#endif - *irq = gsi_irq_sharing(gsi); + *irq = gsi; return 0; } @@ -575,6 +568,7 @@ } #ifdef CONFIG_HPET_TIMER +#include static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) { @@ -595,21 +589,13 @@ return -1; } #ifdef CONFIG_X86_64 - vxtime.hpet_address = hpet_tbl->addr.addrl | + hpet_address = hpet_tbl->addr.addrl | ((long)hpet_tbl->addr.addrh << 32); - +#else + hpet_address = hpet_tbl->addr.addrl; +#endif printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, vxtime.hpet_address); -#else /* X86 */ - { - extern unsigned long hpet_address; - - hpet_address = hpet_tbl->addr.addrl; - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); - } -#endif /* X86 */ - + hpet_tbl->id, hpet_address); return 0; } #else diff -urN ./linux-2.6.18.1/arch/i386/kernel/apic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/apic.c --- ./linux-2.6.18.1/arch/i386/kernel/apic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/apic.c 2007-05-19 23:58:35.000000000 +0900 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,23 @@ */ int apic_verbosity; +static unsigned int calibration_result; + +static void lapic_next_event(unsigned long delta, struct clock_event *evt); +static void lapic_timer_setup(int mode, struct clock_event *evt); + +static struct clock_event lapic_clockevent = { + .name = "lapic", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, +}; +static DEFINE_PER_CPU(struct clock_event, lapic_events); static void apic_pm_activate(void); @@ -909,6 +927,11 @@ */ /* + * FIXME: Move this to i8253.h. There is no need to keep the access to + * the PIT scattered all around the place -tglx + */ + +/* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. @@ -966,13 +989,15 @@ #define APIC_DIVISOR 16 -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot) { unsigned int lvtt_value, tmp_value, ver; int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); @@ -989,23 +1014,31 @@ & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); +} + +static void lapic_next_event(unsigned long delta, struct clock_event *evt) +{ + apic_write_around(APIC_TMICT, delta); } -static void __devinit setup_APIC_timer(unsigned int clocks) +static void lapic_timer_setup(int mode, struct clock_event *evt) { unsigned long flags; local_irq_save(flags); + __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC); + local_irq_restore(flags); +} - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); +static void __devinit setup_APIC_timer(void) +{ + struct clock_event *levt = &__get_cpu_var(lapic_events); - __setup_APIC_LVTT(clocks); + memcpy(levt, &lapic_clockevent, sizeof(*levt)); - local_irq_restore(flags); + register_local_clockevent(levt); } /* @@ -1014,6 +1047,8 @@ * to calibrate, since some later bootup code depends on getting * the first irq? Ugh. * + * TODO: Fix this rather than saying "Ugh" -tglx + * * We want to do the calibration only once since we * want to have local timer irqs syncron. CPUs connected * by the same APIC bus have the very same bus frequency. @@ -1036,7 +1071,7 @@ * value into the APIC clock, we just want to get the * counter running for calibration. */ - __setup_APIC_LVTT(1000000000); + __setup_APIC_LVTT(1000000000, 0); /* * The timer chip counts down to zero. Let's wait @@ -1073,6 +1108,14 @@ result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(tt1-tt2, TICK_NSEC * LOOPS, 32); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + printk("lapic max_delta_ns: %ld\n", lapic_clockevent.max_delta_ns); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + if (cpu_has_tsc) apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", @@ -1087,8 +1130,6 @@ return result; } -static unsigned int calibration_result; - void __init setup_boot_APIC_clock(void) { unsigned long flags; @@ -1101,14 +1142,14 @@ /* * Now set up the timer for real. */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) { - setup_APIC_timer(calibration_result); + setup_APIC_timer(); } void disable_APIC_timer(void) @@ -1154,6 +1195,13 @@ !cpu_isset(cpu, timer_bcast_ipi)) { disable_APIC_timer(); cpu_set(cpu, timer_bcast_ipi); +#ifdef CONFIG_HIGH_RES_TIMERS + printk("Disabling NO_HZ and high resolution timers " + "due to timer broadcasting\n"); + for_each_possible_cpu(cpu) + per_cpu(lapic_events, cpu).capabilities &= + ~CLOCK_CAP_NEXTEVT; +#endif } } EXPORT_SYMBOL(switch_APIC_timer_to_ipi); @@ -1190,6 +1238,8 @@ update_process_times(user_mode_vm(regs)); #endif + trace_special(regs->eip, 0, 0); + /* * We take the 'long' return path, and there every subsystem * grabs the apropriate locks (kernel lock/ irq lock). @@ -1211,15 +1261,18 @@ * interrupt as well. Thus we cannot inline the local irq ... ] */ -fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) +fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); + struct clock_event *evt = &per_cpu(lapic_events, cpu); /* * the NMI deadlock-detector uses this. */ per_cpu(irq_stat, cpu).apic_timer_irqs++; + trace_special(regs->eip, 0, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. @@ -1231,7 +1284,15 @@ * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); - smp_local_timer_interrupt(regs); + /* + * If the task is currently running in user mode, don't + * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not + * configured, this should be optimized out. + */ + if (user_mode(regs)) + touch_softlockup_watchdog(); + + evt->event_handler(regs); irq_exit(); } @@ -1240,6 +1301,8 @@ { int cpu = smp_processor_id(); + trace_special(regs->eip, 1, 0); + /* * the NMI deadlock-detector uses this. */ @@ -1323,6 +1386,7 @@ */ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); + dump_stack(); irq_exit(); } diff -urN ./linux-2.6.18.1/arch/i386/kernel/apm.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/apm.c --- ./linux-2.6.18.1/arch/i386/kernel/apm.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/apm.c 2007-05-19 23:58:35.000000000 +0900 @@ -233,7 +233,6 @@ #include "io_ports.h" -extern unsigned long get_cmos_time(void); extern void machine_real_restart(unsigned char *, int); #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) @@ -1152,26 +1151,6 @@ spin_unlock(&user_list_lock); } -static void set_time(void) -{ - if (got_clock_diff) { /* Must know time zone in order to set clock */ - xtime.tv_sec = get_cmos_time() + clock_cmos_diff; - xtime.tv_nsec = 0; - } -} - -static void get_time_diff(void) -{ -#ifndef CONFIG_APM_RTC_IS_GMT - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - got_clock_diff = 1; -#endif -} - static void reinit_timer(void) { #ifdef INIT_TIMER_AFTER_SUSPEND @@ -1211,19 +1190,6 @@ local_irq_disable(); device_power_down(PMSG_SUSPEND); - /* serialize with the timer interrupt */ - write_seqlock(&xtime_lock); - - /* protect against access to timer chip registers */ - spin_lock(&i8253_lock); - - get_time_diff(); - /* - * Irq spinlock must be dropped around set_system_power_state. - * We'll undo any timer changes due to interrupts below. - */ - spin_unlock(&i8253_lock); - write_sequnlock(&xtime_lock); local_irq_enable(); save_processor_state(); @@ -1232,13 +1198,7 @@ restore_processor_state(); local_irq_disable(); - write_seqlock(&xtime_lock); - spin_lock(&i8253_lock); reinit_timer(); - set_time(); - - spin_unlock(&i8253_lock); - write_sequnlock(&xtime_lock); if (err == APM_NO_ERROR) err = APM_SUCCESS; @@ -1267,11 +1227,6 @@ local_irq_disable(); device_power_down(PMSG_SUSPEND); - /* serialize with the timer interrupt */ - write_seqlock(&xtime_lock); - /* If needed, notify drivers here */ - get_time_diff(); - write_sequnlock(&xtime_lock); local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); @@ -1365,9 +1320,6 @@ ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - write_seqlock_irq(&xtime_lock); - set_time(); - write_sequnlock_irq(&xtime_lock); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(event, NULL); @@ -1383,9 +1335,6 @@ break; case APM_UPDATE_TIME: - write_seqlock_irq(&xtime_lock); - set_time(); - write_sequnlock_irq(&xtime_lock); break; case APM_CRITICAL_SUSPEND: diff -urN ./linux-2.6.18.1/arch/i386/kernel/cpu/mtrr/generic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/cpu/mtrr/generic.c --- ./linux-2.6.18.1/arch/i386/kernel/cpu/mtrr/generic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/cpu/mtrr/generic.c 2007-05-19 23:58:35.000000000 +0900 @@ -234,7 +234,7 @@ static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they diff -urN ./linux-2.6.18.1/arch/i386/kernel/cpu/mtrr/main.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/cpu/mtrr/main.c --- ./linux-2.6.18.1/arch/i386/kernel/cpu/mtrr/main.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/cpu/mtrr/main.c 2007-05-19 23:58:35.000000000 +0900 @@ -135,8 +135,6 @@ mtrr_type smp_type; }; -#ifdef CONFIG_SMP - static void ipi_handler(void *info) /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. [RETURNS] Nothing. @@ -166,8 +164,6 @@ local_irq_restore(flags); } -#endif - /** * set_mtrr - update mtrrs on all processors * @reg: mtrr in question diff -urN ./linux-2.6.18.1/arch/i386/kernel/cpu/transmeta.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/cpu/transmeta.c --- ./linux-2.6.18.1/arch/i386/kernel/cpu/transmeta.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/cpu/transmeta.c 2007-05-19 23:58:35.000000000 +0900 @@ -9,7 +9,8 @@ { unsigned int cap_mask, uk, max, dummy; unsigned int cms_rev1, cms_rev2; - unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; + unsigned int cpu_rev, cpu_freq = 0 /* shut up gcc warning */, + cpu_flags, new_cpu_rev; char cpu_info[65]; get_model_name(c); /* Same as AMD/Cyrix */ diff -urN ./linux-2.6.18.1/arch/i386/kernel/entry.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/entry.S --- ./linux-2.6.18.1/arch/i386/kernel/entry.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/entry.S 2007-05-23 20:41:33.000000000 +0900 @@ -235,7 +235,20 @@ movb CS(%esp), %al testl $(VM_MASK | 3), %eax jz resume_kernel + ENTRY(resume_userspace) +#ifdef CONFIG_CABI + sti + movl cabi_ret_with_reschedule_hook,%eax + testl %eax,%eax /* if (hook == 0) */ + je 1f /* yes, then skip it */ + call *%eax /* no, then call hook */ + testl %eax,%eax /* if (ret != 0) */ + jne resume_userspace /* yes, then jump back */ + /* jne work_pending */ +1: +#endif + cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -247,15 +260,29 @@ #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) +#ifdef CONFIG_CABI + movl cabi_ret_with_reschedule_hook,%eax + testl %eax,%eax /* if (hook == 0) */ + je 1f /* yes, then skip it */ + call *%eax /* no, then call hook */ + testl %eax,%eax /* if (ret != 0) */ + jne syscall_exit /* yes, then jump back */ + /* jne work_pending */ +1: +#endif cli + cmpl $0, kernel_preemption + jz restore_nocheck cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck + cli + TRACE_IRQS_OFF call preempt_schedule_irq jmp need_resched #endif @@ -311,6 +338,11 @@ pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -322,9 +354,15 @@ movl %eax,EAX(%esp) cli TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -341,6 +379,11 @@ pushl %eax # save orig_eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) testl $TF_MASK,EFLAGS(%esp) jz no_singlestep @@ -430,19 +473,20 @@ ALIGN RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jz work_notifysig work_resched: - call schedule - cli # make sure we don't miss an interrupt + cli + TRACE_IRQS_OFF + call __schedule + # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jnz work_resched work_notifysig: # deal with pending signals and diff -urN ./linux-2.6.18.1/arch/i386/kernel/entry.S.orig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/entry.S.orig --- ./linux-2.6.18.1/arch/i386/kernel/entry.S.orig 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/entry.S.orig 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,975 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * NOTE: This code handles signal-recognition, which happens every time + * after a timer-interrupt and after each system call. + * + * I changed all the .align's to 4 (16 byte alignment), as that's faster + * on a 486. + * + * Stack layout in 'ret_from_system_call': + * ptrace needs to have all regs on the stack. + * if the order here is changed, it needs to be + * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq_vectors.h" + +#define nr_syscalls ((syscall_table_size)/4) + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +TF_MASK = 0x00000100 +IF_MASK = 0x00000200 +DF_MASK = 0x00000400 +NT_MASK = 0x00004000 +VM_MASK = 0x00020000 + +#ifdef CONFIG_PREEMPT +#define preempt_stop cli; TRACE_IRQS_OFF +#else +#define preempt_stop +#define resume_kernel restore_nocheck +#endif + +.macro TRACE_IRQS_IRET +#ifdef CONFIG_TRACE_IRQFLAGS + testl $IF_MASK,EFLAGS(%esp) # interrupts off? + jz 1f + TRACE_IRQS_ON +1: +#endif +.endm + +#ifdef CONFIG_VM86 +#define resume_userspace_sig check_userspace +#else +#define resume_userspace_sig resume_userspace +#endif + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET es, 0;*/\ + pushl %ds; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET ds, 0;*/\ + pushl %eax; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET eax, 0;\ + pushl %ebp; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ebp, 0;\ + pushl %edi; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET edi, 0;\ + pushl %esi; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET esi, 0;\ + pushl %edx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET edx, 0;\ + pushl %ecx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ecx, 0;\ + pushl %ebx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ebx, 0;\ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ + movl %edx, %es; + +#define RESTORE_INT_REGS \ + popl %ebx; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE ebx;\ + popl %ecx; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE ecx;\ + popl %edx; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE edx;\ + popl %esi; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE esi;\ + popl %edi; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE edi;\ + popl %ebp; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE ebp;\ + popl %eax; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE eax + +#define RESTORE_REGS \ + RESTORE_INT_REGS; \ +1: popl %ds; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE ds;*/\ +2: popl %es; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE es;*/\ +.section .fixup,"ax"; \ +3: movl $0,(%esp); \ + jmp 1b; \ +4: movl $0,(%esp); \ + jmp 2b; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,3b; \ + .long 2b,4b; \ +.previous + +#define RING0_INT_FRAME \ + CFI_STARTPROC simple;\ + CFI_DEF_CFA esp, 3*4;\ + /*CFI_OFFSET cs, -2*4;*/\ + CFI_OFFSET eip, -3*4 + +#define RING0_EC_FRAME \ + CFI_STARTPROC simple;\ + CFI_DEF_CFA esp, 4*4;\ + /*CFI_OFFSET cs, -2*4;*/\ + CFI_OFFSET eip, -3*4 + +#define RING0_PTREGS_FRAME \ + CFI_STARTPROC simple;\ + CFI_DEF_CFA esp, OLDESP-EBX;\ + /*CFI_OFFSET cs, CS-OLDESP;*/\ + CFI_OFFSET eip, EIP-OLDESP;\ + /*CFI_OFFSET es, ES-OLDESP;*/\ + /*CFI_OFFSET ds, DS-OLDESP;*/\ + CFI_OFFSET eax, EAX-OLDESP;\ + CFI_OFFSET ebp, EBP-OLDESP;\ + CFI_OFFSET edi, EDI-OLDESP;\ + CFI_OFFSET esi, ESI-OLDESP;\ + CFI_OFFSET edx, EDX-OLDESP;\ + CFI_OFFSET ecx, ECX-OLDESP;\ + CFI_OFFSET ebx, EBX-OLDESP + +ENTRY(ret_from_fork) + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + call schedule_tail + GET_THREAD_INFO(%ebp) + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + pushl $0x0202 # Reset kernel eflags + CFI_ADJUST_CFA_OFFSET 4 + popfl + CFI_ADJUST_CFA_OFFSET -4 + jmp syscall_exit + CFI_ENDPROC + +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ + + # userspace resumption stub bypassing syscall exit tracing + ALIGN + RING0_PTREGS_FRAME +ret_from_exception: + preempt_stop +ret_from_intr: + GET_THREAD_INFO(%ebp) +check_userspace: + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernel +ENTRY(resume_userspace) + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret + movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done on + # int/exception return? + jne work_pending + jmp restore_all + +#ifdef CONFIG_PREEMPT +ENTRY(resume_kernel) + cli + cmpl $0, kernel_preemption + jz restore_nocheck + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jnz restore_nocheck +need_resched: + movl TI_flags(%ebp), %ecx # need_resched set ? + testb $_TIF_NEED_RESCHED, %cl + jz restore_nocheck + testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + jz restore_nocheck + cli + TRACE_IRQS_OFF + call preempt_schedule_irq + jmp need_resched +#endif + CFI_ENDPROC + +/* SYSENTER_RETURN points to after the "sysenter" instruction in + the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ + + # sysenter call handler stub +ENTRY(sysenter_entry) + CFI_STARTPROC simple + CFI_DEF_CFA esp, 0 + CFI_REGISTER esp, ebp + movl TSS_sysenter_esp0(%esp),%esp +sysenter_past_esp: + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ + sti + pushl $(__USER_DS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ss, 0*/ + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esp, 0 + pushfl + CFI_ADJUST_CFA_OFFSET 4 + pushl $(__USER_CS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET cs, 0*/ + /* + * Push current_thread_info()->sysenter_return to the stack. + * A tiny bit of offset fixup is necessary - 4*4 means the 4 words + * pushed above; +8 corresponds to copy_thread's esp0 setting. + */ + pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eip, 0 + +/* + * Load the potential sixth argument from user stack. + * Careful about security. + */ + cmpl $__PAGE_OFFSET-3,%ebp + jae syscall_fault +1: movl (%ebp),%ebp +.section __ex_table,"a" + .align 4 + .long 1b,syscall_fault +.previous + + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif + GET_THREAD_INFO(%ebp) + + /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + jnz syscall_trace_entry + cmpl $(nr_syscalls), %eax + jae syscall_badsys + call *sys_call_table(,%eax,4) + movl %eax,EAX(%esp) + cli + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx + jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif +/* if something modifies registers it must also disable sysexit */ + movl EIP(%esp), %edx + movl OLDESP(%esp), %ecx + xorl %ebp,%ebp + TRACE_IRQS_ON + sti + sysexit + CFI_ENDPROC + + + # system call handler stub +ENTRY(system_call) + RING0_INT_FRAME # can't unwind into user space anyway + pushl %eax # save orig_eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif + GET_THREAD_INFO(%ebp) + testl $TF_MASK,EFLAGS(%esp) + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) +no_singlestep: + # system call tracing in operation / emulation + /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + jnz syscall_trace_entry + cmpl $(nr_syscalls), %eax + jae syscall_badsys +syscall_call: + call *sys_call_table(,%eax,4) + movl %eax,EAX(%esp) # store the return value +syscall_exit: + cli # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + +restore_all: + movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: OLDSS(%esp) contains the wrong/random values if we + # are returning to the kernel. + # See comments in process.c:copy_thread() for details. + movb OLDSS(%esp), %ah + movb CS(%esp), %al + andl $(VM_MASK | (4 << 8) | 3), %eax + cmpl $((4 << 8) | 3), %eax + CFI_REMEMBER_STATE + je ldt_ss # returning to user-space with LDT SS +restore_nocheck: + TRACE_IRQS_IRET +restore_nocheck_notrace: + RESTORE_REGS + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 +1: iret +.section .fixup,"ax" +iret_exc: + TRACE_IRQS_ON + sti + pushl $0 # no error code + pushl $do_iret_error + jmp error_code +.previous +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + + CFI_RESTORE_STATE +ldt_ss: + larl OLDSS(%esp), %eax + jnz restore_nocheck + testl $0x00400000, %eax # returning to 32bit stack? + jnz restore_nocheck # allright, normal return + /* If returning to userspace with 16bit stack, + * try to fix the higher word of ESP, as the CPU + * won't restore it. + * This is an "official" bug of all the x86-compatible + * CPUs, which we can try to work around to make + * dosemu and wine happy. */ + subl $8, %esp # reserve space for switch16 pointer + CFI_ADJUST_CFA_OFFSET 8 + cli + TRACE_IRQS_OFF + movl %esp, %eax + /* Set up the 16bit stack frame with switch32 pointer on top, + * and a switch16 pointer on top of the current frame. */ + call setup_x86_bogus_stack + CFI_ADJUST_CFA_OFFSET -8 # frame has moved + TRACE_IRQS_IRET + RESTORE_REGS + lss 20+4(%esp), %esp # switch to 16bit stack +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + CFI_ENDPROC + + # perform work that needs to be done immediately before resumption + ALIGN + RING0_PTREGS_FRAME # can't unwind into user space anyway +work_pending: + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx + jz work_notifysig +work_resched: + cli + TRACE_IRQS_OFF + call __schedule + # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret + movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done other + # than syscall tracing? + jz restore_all + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx + jnz work_resched + +work_notifysig: # deal with pending signals and + # notify-resume requests + testl $VM_MASK, EFLAGS(%esp) + movl %esp, %eax + jne work_notifysig_v86 # returning to kernel-space or + # vm86-space + xorl %edx, %edx + call do_notify_resume + jmp resume_userspace_sig + + ALIGN +work_notifysig_v86: +#ifdef CONFIG_VM86 + pushl %ecx # save ti_flags for do_notify_resume + CFI_ADJUST_CFA_OFFSET 4 + call save_v86_state # %eax contains pt_regs pointer + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + movl %eax, %esp + xorl %edx, %edx + call do_notify_resume + jmp resume_userspace_sig +#endif + + # perform syscall exit tracing + ALIGN +syscall_trace_entry: + movl $-ENOSYS,EAX(%esp) + movl %esp, %eax + xorl %edx,%edx + call do_syscall_trace + cmpl $0, %eax + jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, + # so must skip actual syscall + movl ORIG_EAX(%esp), %eax + cmpl $(nr_syscalls), %eax + jnae syscall_call + jmp syscall_exit + + # perform syscall exit tracing + ALIGN +syscall_exit_work: + testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl + jz work_pending + TRACE_IRQS_ON + sti # could let do_syscall_trace() call + # schedule() instead + movl %esp, %eax + movl $1, %edx + call do_syscall_trace + jmp resume_userspace + CFI_ENDPROC + + RING0_INT_FRAME # can't unwind into user space anyway +syscall_fault: + pushl %eax # save orig_eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + GET_THREAD_INFO(%ebp) + movl $-EFAULT,EAX(%esp) + jmp resume_userspace + +syscall_badsys: + movl $-ENOSYS,EAX(%esp) + jmp resume_userspace + CFI_ENDPROC + +#define FIXUP_ESPFIX_STACK \ + movl %esp, %eax; \ + /* switch to 32bit stack using the pointer on top of 16bit stack */ \ + lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ + /* copy data from 16bit stack to 32bit stack */ \ + call fixup_x86_bogus_stack; \ + /* put ESP to the proper location */ \ + movl %eax, %esp; +#define UNWIND_ESPFIX_STACK \ + pushl %eax; \ + CFI_ADJUST_CFA_OFFSET 4; \ + movl %ss, %eax; \ + /* see if on 16bit stack */ \ + cmpw $__ESPFIX_SS, %ax; \ + je 28f; \ +27: popl %eax; \ + CFI_ADJUST_CFA_OFFSET -4; \ +.section .fixup,"ax"; \ +28: movl $__KERNEL_DS, %eax; \ + movl %eax, %ds; \ + movl %eax, %es; \ + /* switch to 32bit stack */ \ + FIXUP_ESPFIX_STACK; \ + jmp 27b; \ +.previous + +/* + * Build the entry stubs and pointer table with + * some assembler magic. + */ +.data +ENTRY(interrupt) +.text + +vector=0 +ENTRY(irq_entries_start) + RING0_INT_FRAME +.rept NR_IRQS + ALIGN + .if vector + CFI_ADJUST_CFA_OFFSET -4 + .endif +1: pushl $~(vector) + CFI_ADJUST_CFA_OFFSET 4 + jmp common_interrupt +.data + .long 1b +.text +vector=vector+1 +.endr + +/* + * the CPU automatically disables interrupts when executing an IRQ vector, + * so IRQ-flags tracing has to follow that: + */ + ALIGN +common_interrupt: + SAVE_ALL + TRACE_IRQS_OFF + movl %esp,%eax + call do_IRQ + jmp ret_from_intr + CFI_ENDPROC + +#define BUILD_INTERRUPT(name, nr) \ +ENTRY(name) \ + RING0_INT_FRAME; \ + pushl $~(nr); \ + CFI_ADJUST_CFA_OFFSET 4; \ + SAVE_ALL; \ + TRACE_IRQS_OFF \ + movl %esp,%eax; \ + call smp_/**/name; \ + jmp ret_from_intr; \ + CFI_ENDPROC + +/* The include is where all of the SMP etc. interrupts come from */ +#include "entry_arch.h" + +ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_divide_error + CFI_ADJUST_CFA_OFFSET 4 + ALIGN +error_code: + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + xorl %eax, %eax + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + decl %eax # eax = -1 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + /*CFI_REL_OFFSET es, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + call *%edi + jmp ret_from_exception + CFI_ENDPROC + +ENTRY(coprocessor_error) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_coprocessor_error + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(simd_coprocessor_error) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_simd_coprocessor_error + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(device_not_available) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + movl %cr0, %eax + testl $0x4, %eax # EM (math emulation bit) + jne device_not_available_emulate + preempt_stop + call math_state_restore + jmp ret_from_exception +device_not_available_emulate: + pushl $0 # temporary storage for ORIG_EIP + CFI_ADJUST_CFA_OFFSET 4 + call math_emulate + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_exception + CFI_ENDPROC + +/* + * Debug traps and NMI can happen at the one SYSENTER instruction + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * + * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * + * We just load the right stack, and push the three (known) values + * by hand onto the new stack - while updating the return eip past + * the instruction that would have done it for sysenter. + */ +#define FIX_STACK(offset, ok, label) \ + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ +label: \ + movl TSS_sysenter_esp0+offset(%esp),%esp; \ + pushfl; \ + pushl $__KERNEL_CS; \ + pushl $sysenter_past_esp + +KPROBE_ENTRY(debug) + RING0_INT_FRAME + cmpl $sysenter_entry,(%esp) + jne debug_stack_correct + FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) +debug_stack_correct: + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + xorl %edx,%edx # error code 0 + movl %esp,%eax # pt_regs pointer + call do_debug + jmp ret_from_exception + CFI_ENDPROC + .previous .text +/* + * NMI is doubly nasty. It can happen _while_ we're handling + * a debug fault, and the debug fault hasn't yet been able to + * clear up the stack. So we first check whether we got an + * NMI on the sysenter entry path, but after that we need to + * check whether we got an NMI on the debug path where the debug + * fault happened on the sysenter path. + */ +ENTRY(nmi) + RING0_INT_FRAME + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + je nmi_16bit_stack + cmpl $sysenter_entry,(%esp) + je nmi_stack_fixup + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %esp,%eax + /* Do not access memory above the end of our stack page, + * it might not exist. + */ + andl $(THREAD_SIZE-1),%eax + cmpl $(THREAD_SIZE-20),%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + jae nmi_stack_correct + cmpl $sysenter_entry,12(%esp) + je nmi_debug_stack_check +nmi_stack_correct: + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi + jmp restore_nocheck_notrace + CFI_ENDPROC + +nmi_stack_fixup: + FIX_STACK(12,nmi_stack_correct, 1) + jmp nmi_stack_correct +nmi_debug_stack_check: + cmpw $__KERNEL_CS,16(%esp) + jne nmi_stack_correct + cmpl $debug,(%esp) + jb nmi_stack_correct + cmpl $debug_esp_fix_insn,(%esp) + ja nmi_stack_correct + FIX_STACK(24,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_16bit_stack: + RING0_INT_FRAME + /* create the pointer to lss back */ + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + movzwl %sp, %esp + addw $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 + pushl 16(%esp) + CFI_ADJUST_CFA_OFFSET 4 + .endr + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + FIXUP_ESPFIX_STACK # %eax == %esp + CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved + xorl %edx,%edx # zero error code + call do_nmi + RESTORE_REGS + lss 12+4(%esp), %esp # back to 16bit stack +1: iret + CFI_ENDPROC +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + +KPROBE_ENTRY(int3) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_int3 + jmp ret_from_exception + CFI_ENDPROC + .previous .text + +ENTRY(overflow) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_overflow + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(bounds) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_bounds + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(invalid_op) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_invalid_op + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(coprocessor_segment_overrun) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_coprocessor_segment_overrun + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(invalid_TSS) + RING0_EC_FRAME + pushl $do_invalid_TSS + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(segment_not_present) + RING0_EC_FRAME + pushl $do_segment_not_present + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +ENTRY(stack_segment) + RING0_EC_FRAME + pushl $do_stack_segment + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +KPROBE_ENTRY(general_protection) + RING0_EC_FRAME + pushl $do_general_protection + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + .previous .text + +ENTRY(alignment_check) + RING0_EC_FRAME + pushl $do_alignment_check + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +KPROBE_ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + .previous .text + +#ifdef CONFIG_X86_MCE +ENTRY(machine_check) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl machine_check_vector + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +#endif + +ENTRY(spurious_interrupt_bug) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_spurious_interrupt_bug + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC + +#ifdef CONFIG_STACK_UNWIND +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movl 4(%esp), %edx + movl (%esp), %ecx + leal 4(%esp), %eax + movl %ebx, EBX(%edx) + xorl %ebx, %ebx + movl %ebx, ECX(%edx) + movl %ebx, EDX(%edx) + movl %esi, ESI(%edx) + movl %edi, EDI(%edx) + movl %ebp, EBP(%edx) + movl %ebx, EAX(%edx) + movl $__USER_DS, DS(%edx) + movl $__USER_DS, ES(%edx) + movl %ebx, ORIG_EAX(%edx) + movl %ecx, EIP(%edx) + movl 12(%esp), %ecx + movl $__KERNEL_CS, CS(%edx) + movl %ebx, EFLAGS(%edx) + movl %eax, OLDESP(%edx) + movl 8(%esp), %eax + movl %ecx, 8(%esp) + movl EBX(%edx), %ebx + movl $__KERNEL_DS, OLDSS(%edx) + jmpl *%eax + CFI_ENDPROC +ENDPROC(arch_unwind_init_running) +#endif + +.section .rodata,"a" +#include "syscall_table.S" + +syscall_table_size=(.-sys_call_table) diff -urN ./linux-2.6.18.1/arch/i386/kernel/head.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/head.S --- ./linux-2.6.18.1/arch/i386/kernel/head.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/head.S 2007-05-19 23:58:35.000000000 +0900 @@ -397,6 +397,7 @@ call printk #endif addl $(5*4),%esp + call dump_stack popl %ds popl %es popl %edx diff -urN ./linux-2.6.18.1/arch/i386/kernel/i386_ksyms.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/i386_ksyms.c --- ./linux-2.6.18.1/arch/i386/kernel/i386_ksyms.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/i386_ksyms.c 2007-05-19 23:58:35.000000000 +0900 @@ -2,10 +2,12 @@ #include #include -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); @@ -20,7 +22,7 @@ EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES) extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); diff -urN ./linux-2.6.18.1/arch/i386/kernel/i8253.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/i8253.c --- ./linux-2.6.18.1/arch/i386/kernel/i8253.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/i8253.c 2007-05-19 23:58:35.000000000 +0900 @@ -2,7 +2,7 @@ * i8253.c 8253/PIT functions * */ -#include +#include #include #include #include @@ -16,22 +16,66 @@ #include "io_ports.h" -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -void setup_pit_timer(void) +static void init_pit_timer(int mode, struct clock_event *evt) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + + switch(mode) { + case CLOCK_EVT_PERIODIC: + /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(0x34, PIT_MODE); + udelay(10); + outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ + outb(LATCH >> 8 , PIT_CH0); /* MSB */ + break; + + case CLOCK_EVT_ONESHOT: + case CLOCK_EVT_SHUTDOWN: + /* One shot setup */ + outb_p(0x38, PIT_MODE); + udelay(10); + break; + } + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static void pit_next_event(unsigned long delta, struct clock_event *evt) { unsigned long flags; spin_lock_irqsave(&i8253_lock, flags); - outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - udelay(10); - outb(LATCH >> 8 , PIT_CH0); /* MSB */ + outb_p(delta & 0xff , PIT_CH0); /* LSB */ + outb(delta >> 8 , PIT_CH0); /* MSB */ spin_unlock_irqrestore(&i8253_lock, flags); } +struct clock_event pit_clockevent = { + .name = "pit", + .capabilities = CLOCK_CAP_TICK | CLOCK_CAP_PROFILE | CLOCK_CAP_UPDATE +#ifndef CONFIG_SMP + | CLOCK_CAP_NEXTEVT +#endif + , + .set_mode = init_pit_timer, + .set_next_event = pit_next_event, + .shift = 32, +}; + +void setup_pit_timer(void) +{ + pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); + pit_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFF, &pit_clockevent); + pit_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &pit_clockevent); + register_global_clockevent(&pit_clockevent); +} + /* * Since the PIT overflows every tick, its not very useful * to just read by itself. So use jiffies to emulate a free @@ -46,7 +90,7 @@ static u32 old_jifs; spin_lock_irqsave(&i8253_lock, flags); - /* + /* * Although our caller may have the read side of xtime_lock, * this is now a seqlock, and we are cheating in this routine * by having side effects on state that we cannot undo if diff -urN ./linux-2.6.18.1/arch/i386/kernel/i8259.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/i8259.c --- ./linux-2.6.18.1/arch/i386/kernel/i8259.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/i8259.c 2007-05-19 23:58:35.000000000 +0900 @@ -34,39 +34,21 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); - -static void end_8259A_irq (unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) && - irq_desc[irq].action) - enable_8259A_irq(irq); -} - -#define shutdown_8259A_irq disable_8259A_irq - static void mask_and_ack_8259A(unsigned int); -unsigned int startup_8259A_irq(unsigned int irq) -{ - enable_8259A_irq(irq); - return 0; /* never anything pending */ -} - -static struct hw_interrupt_type i8259A_irq_type = { - .typename = "XT-PIC", - .startup = startup_8259A_irq, - .shutdown = shutdown_8259A_irq, - .enable = enable_8259A_irq, - .disable = disable_8259A_irq, - .ack = mask_and_ack_8259A, - .end = end_8259A_irq, +static struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, }; /* * 8259A PIC functions to handle ISA devices: */ +DEFINE_RAW_SPINLOCK(i8259A_lock); + /* * This contains the irq mask for both 8259A irq controllers, */ @@ -131,7 +113,7 @@ { disable_irq_nosync(irq); io_apic_irqs &= ~(1< #include #include +#include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include @@ -49,8 +51,8 @@ /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(vector_lock); int timer_over_8254 __initdata = 1; @@ -85,14 +87,6 @@ int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; -#ifdef CONFIG_PCI_MSI -#define vector_to_irq(vector) \ - (platform_legacy_irq(vector) ? vector : vector_irq[vector]) -#else -#define vector_to_irq(vector) (vector) -#endif - /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -136,6 +130,105 @@ } } +//#define IOAPIC_CACHE + +#ifdef IOAPIC_CACHE +# define MAX_IOAPIC_CACHE 512 + +/* + * Cache register values: + */ +static unsigned int io_apic_cache[MAX_IO_APICS][MAX_IOAPIC_CACHE] + ____cacheline_aligned_in_smp; +#endif + +inline unsigned int __raw_io_apic_read(unsigned int apic, unsigned int reg) +{ + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); +} + +unsigned int raw_io_apic_read(unsigned int apic, unsigned int reg) +{ + unsigned int val = __raw_io_apic_read(apic, reg); + +#ifdef IOAPIC_CACHE + io_apic_cache[apic][reg] = val; +#endif + return val; +} + +unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ +#ifdef IOAPIC_CACHE + if (unlikely(reg >= MAX_IOAPIC_CACHE)) { + static int once = 1; + + if (once) { + once = 0; + printk("WARNING: ioapic register cache overflow: %d.\n", + reg); + dump_stack(); + } + return __raw_io_apic_read(apic, reg); + } + if (io_apic_cache[apic][reg] && !sis_apic_bug) + return io_apic_cache[apic][reg]; +#endif + return raw_io_apic_read(apic, reg); +} + +void io_apic_write(unsigned int apic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + if (unlikely(reg >= MAX_IOAPIC_CACHE)) { + static int once = 1; + + if (once) { + once = 0; + printk("WARNING: ioapic register cache overflow: %d.\n", + reg); + dump_stack(); + } + } else + io_apic_cache[apic][reg] = val; +#endif + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = val; +} + +/* + * Some systems need a POST flush or else level-triggered interrupts + * generate lots of spurious interrupts due to the POST-ed write not + * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC. + */ +#ifdef CONFIG_SMP +# define IOAPIC_POSTFLUSH +#endif + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index regiser + */ +void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + io_apic_cache[apic][reg] = val; +#endif + if (unlikely(sis_apic_bug)) + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = val; +#ifndef IOAPIC_POSTFLUSH + if (unlikely(sis_apic_bug)) +#endif + /* + * Force POST flush by reading: + */ + val = *(IO_APIC_BASE(apic)+4); +} + static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; @@ -167,18 +260,6 @@ __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -258,7 +339,7 @@ break; entry = irq_2_pin + entry->next; } - set_irq_info(irq, cpumask); + set_native_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1159,46 +1240,45 @@ /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; -int assign_irq_vector(int irq) +static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - unsigned long flags; int vector; - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - spin_lock_irqsave(&vector_lock, flags); - - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { - spin_unlock_irqrestore(&vector_lock, flags); + if (IO_APIC_VECTOR(irq) > 0) return IO_APIC_VECTOR(irq); - } -next: + current_vector += 8; if (current_vector == SYSCALL_VECTOR) - goto next; + current_vector += 8; if (current_vector >= FIRST_SYSTEM_VECTOR) { offset++; - if (!(offset%8)) { - spin_unlock_irqrestore(&vector_lock, flags); + if (!(offset % 8)) return -ENOSPC; - } current_vector = FIRST_DEVICE_VECTOR + offset; } vector = current_vector; - vector_irq[vector] = irq; - if (irq != AUTO_ASSIGN) - IO_APIC_VECTOR(irq) = vector; + IO_APIC_VECTOR(irq) = vector; + return vector; +} + +static int assign_irq_vector(int irq) +{ + unsigned long flags; + int vector; + + spin_lock_irqsave(&vector_lock, flags); + vector = __assign_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); return vector; } - -static struct hw_interrupt_type ioapic_level_type; -static struct hw_interrupt_type ioapic_edge_type; +static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -1206,16 +1286,17 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { - unsigned idx; - - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - irq_desc[idx].chip = &ioapic_level_type; - else - irq_desc[idx].chip = &ioapic_edge_type; - set_intr_gate(vector, interrupt[idx]); + trigger == IOAPIC_LEVEL) { +#ifdef CONFIG_PREEMPT_HARDIRQS + set_irq_chip_and_handler(irq, &ioapic_chip, handle_level_irq); +#else + set_irq_chip_and_handler(irq, &ioapic_chip, handle_fasteoi_irq); +#endif + } else { + set_irq_chip_and_handler(irq, &ioapic_chip, handle_edge_irq); + } + set_intr_gate(vector, interrupt[irq]); } static void __init setup_IO_APIC_irqs(void) @@ -1326,7 +1407,8 @@ * The timer IRQ doesn't have to know that behind the * scene we have a 8259A-master in AEOI mode ... */ - irq_desc[0].chip = &ioapic_edge_type; + irq_desc[0].chip = &ioapic_chip; + set_irq_handler(0, handle_edge_irq); /* * Add it to the IO-APIC irq-routing table: @@ -1445,8 +1527,8 @@ struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + *(((int *)&entry)+0) = raw_io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = raw_io_apic_read(apic, 0x11+i*2); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", @@ -1467,17 +1549,12 @@ ); } } - if (use_pci_vector()) - printk(KERN_INFO "Using vector-based indexing\n"); printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < NR_IRQS; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; - if (use_pci_vector() && !platform_legacy_irq(i)) - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); - else - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -1492,7 +1569,7 @@ return; } -#if 0 +#if 1 static void print_APIC_bitfield (int base) { @@ -1893,7 +1970,7 @@ * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ - if (jiffies - t1 > 4) + if (jiffies - t1 > 4 && jiffies - t1 < 16) return 1; return 0; @@ -1913,6 +1990,8 @@ */ /* + * Startup quirk: + * * Starting up a edge-triggered IO-APIC interrupt is * nasty - we need to make sure that we get the edge. * If it is already asserted for some reason, we need @@ -1920,8 +1999,10 @@ * * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... + * + * (We do this for level-triggered IRQs too - it cannot hurt.) */ -static unsigned int startup_edge_ioapic_irq(unsigned int irq) +static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; @@ -1938,47 +2019,18 @@ return was_pending; } -/* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled - * devices. - */ -static void ack_edge_ioapic_irq(unsigned int irq) -{ - move_irq(irq); - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) - == (IRQ_PENDING | IRQ_DISABLED)) - mask_IO_APIC_irq(irq); - ack_APIC_irq(); -} - -/* - * Level triggered interrupts can just be masked, - * and shutting down and starting up the interrupt - * is the same as enabling and disabling them -- except - * with a startup need to return a "was pending" value. - * - * Level triggered interrupts are special because we - * do not touch any IO-APIC register while handling - * them. We ack the APIC in the end-IRQ handler, not - * in the start-IRQ-handler. Protection against reentrance - * from the same interrupt is still provided, both by the - * generic IRQ layer and by the fact that an unacked local - * APIC does not accept IRQs. - */ -static unsigned int startup_level_ioapic_irq (unsigned int irq) +static void ack_ioapic_irq(unsigned int irq) { - unmask_IO_APIC_irq(irq); - - return 0; /* don't check for pending */ + move_native_irq(irq); + ack_APIC_irq(); } -static void end_level_ioapic_irq (unsigned int irq) +static void ack_ioapic_quirk_irq(unsigned int irq) { unsigned long v; int i; - move_irq(irq); + move_native_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -2007,111 +2059,34 @@ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + /* mask = 1, trigger = 0 */ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + /* mask = 0, trigger = 1 */ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); spin_unlock(&ioapic_lock); } } -#ifdef CONFIG_PCI_MSI -static unsigned int startup_edge_ioapic_vector(unsigned int vector) -{ - int irq = vector_to_irq(vector); - - return startup_edge_ioapic_irq(irq); -} - -static void ack_edge_ioapic_vector(unsigned int vector) -{ - int irq = vector_to_irq(vector); - - move_native_irq(vector); - ack_edge_ioapic_irq(irq); -} - -static unsigned int startup_level_ioapic_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - return startup_level_ioapic_irq (irq); -} - -static void end_level_ioapic_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - move_native_irq(vector); - end_level_ioapic_irq(irq); -} - -static void mask_IO_APIC_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - mask_IO_APIC_irq(irq); -} - -static void unmask_IO_APIC_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - unmask_IO_APIC_irq(irq); -} - -#ifdef CONFIG_SMP -static void set_ioapic_affinity_vector (unsigned int vector, - cpumask_t cpu_mask) -{ - int irq = vector_to_irq(vector); - - set_native_irq_info(vector, cpu_mask); - set_ioapic_affinity_irq(irq, cpu_mask); -} -#endif -#endif - -static int ioapic_retrigger(unsigned int irq) +static int ioapic_retrigger_irq(unsigned int irq) { send_IPI_self(IO_APIC_VECTOR(irq)); return 1; } -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ -static struct hw_interrupt_type ioapic_edge_type __read_mostly = { - .typename = "IO-APIC-edge", - .startup = startup_edge_ioapic, - .shutdown = shutdown_edge_ioapic, - .enable = enable_edge_ioapic, - .disable = disable_edge_ioapic, - .ack = ack_edge_ioapic, - .end = end_edge_ioapic, +static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_ioapic_irq, + .eoi = ack_ioapic_quirk_irq, #ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity_irq, #endif - .retrigger = ioapic_retrigger, + .retrigger = ioapic_retrigger_irq, }; -static struct hw_interrupt_type ioapic_level_type __read_mostly = { - .typename = "IO-APIC-level", - .startup = startup_level_ioapic, - .shutdown = shutdown_level_ioapic, - .enable = enable_level_ioapic, - .disable = disable_level_ioapic, - .ack = mask_and_ack_level_ioapic, - .end = end_level_ioapic, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity, -#endif - .retrigger = ioapic_retrigger, -}; static inline void init_IO_APIC_traps(void) { @@ -2130,11 +2105,6 @@ */ for (irq = 0; irq < NR_IRQS ; irq++) { int tmp = irq; - if (use_pci_vector()) { - if (!platform_legacy_irq(tmp)) - if ((tmp = vector_to_irq(tmp)) == -1) - continue; - } if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { /* * Hmm.. We don't have an entry for this, @@ -2145,20 +2115,21 @@ make_8259A_irq(irq); else /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_type; + irq_desc[irq].chip = &no_irq_chip; } } } -static void enable_lapic_irq (unsigned int irq) -{ - unsigned long v; +/* + * The local APIC irq-chip implementation: + */ - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +static void ack_apic(unsigned int irq) +{ + ack_APIC_irq(); } -static void disable_lapic_irq (unsigned int irq) +static void mask_lapic_irq (unsigned int irq) { unsigned long v; @@ -2166,21 +2137,19 @@ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); } -static void ack_lapic_irq (unsigned int irq) +static void unmask_lapic_irq (unsigned int irq) { - ack_APIC_irq(); -} + unsigned long v; -static void end_lapic_irq (unsigned int i) { /* nothing */ } + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +} -static struct hw_interrupt_type lapic_irq_type __read_mostly = { - .typename = "local-APIC-edge", - .startup = NULL, /* startup_irq() not used for IRQ0 */ - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ - .enable = enable_lapic_irq, - .disable = disable_lapic_irq, - .ack = ack_lapic_irq, - .end = end_lapic_irq +static struct irq_chip lapic_chip __read_mostly = { + .name = "local-APIC-edge", + .mask = mask_lapic_irq, + .unmask = unmask_lapic_irq, + .eoi = ack_apic, }; static void setup_nmi (void) @@ -2361,7 +2330,7 @@ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); disable_8259A_irq(0); - irq_desc[0].chip = &lapic_irq_type; + set_irq_chip_and_handler(0, &lapic_chip, handle_fasteoi_irq); apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ enable_8259A_irq(0); @@ -2543,6 +2512,117 @@ device_initcall(ioapic_init_sysfs); +#ifdef CONFIG_PCI_MSI +/* + * Dynamic irq allocate and deallocation for MSI + */ +int create_irq(void) +{ + /* Allocate an unused irq */ + int irq, new, vector; + unsigned long flags; + + irq = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for (new = (NR_IRQS - 1); new >= 0; new--) { + if (platform_legacy_irq(new)) + continue; + if (irq_vector[new] != 0) + continue; + vector = __assign_irq_vector(new); + if (likely(vector > 0)) + irq = new; + break; + } + spin_unlock_irqrestore(&vector_lock, flags); + + if (irq >= 0) { + set_intr_gate(vector, interrupt[irq]); + dynamic_irq_init(irq); + } + return irq; +} + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + dynamic_irq_cleanup(irq); + + spin_lock_irqsave(&vector_lock, flags); + irq_vector[irq] = 0; + spin_unlock_irqrestore(&vector_lock, flags); +} +#endif /* CONFIG_PCI_MSI */ + +/* + * MSI mesage composition + */ +#ifdef CONFIG_PCI_MSI +static int msi_msg_setup(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + /* For now always this code always uses physical delivery + * mode. + */ + int vector; + unsigned dest; + + vector = assign_irq_vector(irq); + if (vector >= 0) { + dest = cpu_mask_to_apicid(TARGET_CPUS); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(vector); + } + return vector; +} + +static void msi_msg_teardown(unsigned int irq) +{ + return; +} + +static void msi_msg_set_affinity(unsigned int irq, cpumask_t mask, struct msi_msg *msg) +{ + int vector; + unsigned dest; + + vector = assign_irq_vector(irq); + if (vector > 0) { + dest = cpu_mask_to_apicid(mask); + + msg->data &= ~MSI_DATA_VECTOR_MASK; + msg->data |= MSI_DATA_VECTOR(vector); + msg->address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg->address_lo |= MSI_ADDR_DEST_ID(dest); + } +} + +struct msi_ops arch_msi_ops = { + .needs_64bit_address = 0, + .setup = msi_msg_setup, + .teardown = msi_msg_teardown, + .target = msi_msg_set_affinity, +}; + +#endif /* CONFIG_PCI_MSI */ + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -2697,7 +2777,7 @@ spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff -urN ./linux-2.6.18.1/arch/i386/kernel/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/irq.c --- ./linux-2.6.18.1/arch/i386/kernel/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -51,10 +51,11 @@ * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { /* high bit used in ret_from_ code */ int irq = ~regs->orig_eax; + struct irq_desc *desc = irq_desc + irq; #ifdef CONFIG_4KSTACKS union irq_ctx *curctx, *irqctx; u32 *isp; @@ -67,6 +68,11 @@ } irq_enter(); +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -75,12 +81,25 @@ __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } } #endif +#ifdef CONFIG_NO_HZ + if (idle_cpu(smp_processor_id())) { + update_jiffies(); + /* + * Force polling-idle loops to break out into + * the sched-timer setting code, to make sure + * that timer interval changes due to __mod_timer() + * in IRQ context get properly propagated: + */ + if (tsk_is_polling(current)) + set_need_resched(); + } +#endif #ifdef CONFIG_4KSTACKS @@ -94,7 +113,7 @@ * current stack (which is the irq stack already after all) */ if (curctx != irqctx) { - int arg1, arg2, ebx; + int arg1, arg2, arg3, ebx; /* build the stack frame on the IRQ stack */ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); @@ -110,16 +129,17 @@ (curctx->tinfo.preempt_count & SOFTIRQ_MASK); asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_IRQ \n" + " xchgl %%ebx,%%esp \n" + " call *%%edi \n" " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (ebx) - : "0" (irq), "1" (regs), "2" (isp) - : "memory", "cc", "ecx" + : "=a" (arg1), "=d" (arg2), "=c" (arg3), "=b" (ebx) + : "0" (irq), "1" (desc), "2" (regs), "3" (isp), + "D" (desc->handle_irq) + : "memory", "cc" ); } else #endif - __do_IRQ(irq, regs); + desc->handle_irq(irq, desc, regs); irq_exit(); @@ -242,8 +262,10 @@ } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + irq_desc_t *desc = irq_desc + i; + + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ",i); @@ -253,7 +275,22 @@ for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].chip->typename); + seq_printf(p, " %-14s", irq_desc[i].chip->name); +#define F(x,c) ((desc->status & x) ? c : '.') + seq_printf(p, " [%c%c%c%c%c%c%c%c%c/", + F(IRQ_INPROGRESS, 'I'), + F(IRQ_DISABLED, 'D'), + F(IRQ_PENDING, 'P'), + F(IRQ_REPLAY, 'R'), + F(IRQ_AUTODETECT, 'A'), + F(IRQ_WAITING, 'W'), + F(IRQ_LEVEL, 'L'), + F(IRQ_MASKED, 'M'), + F(IRQ_NODELAY, 'N')); +#undef F + seq_printf(p, "%3d]", desc->irqs_unhandled); + + seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq)); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) diff -urN ./linux-2.6.18.1/arch/i386/kernel/kprobes.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/kprobes.c --- ./linux-2.6.18.1/arch/i386/kernel/kprobes.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/kprobes.c 2007-05-19 23:58:35.000000000 +0900 @@ -338,7 +338,7 @@ /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); regs->eip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); + preempt_enable(); return 1; } #endif @@ -347,7 +347,7 @@ return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } @@ -566,7 +566,7 @@ } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, eflags @@ -600,7 +600,7 @@ restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: @@ -734,7 +734,7 @@ *regs = kcb->jprobe_saved_regs; memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); - preempt_enable_no_resched(); + preempt_enable(); return 1; } return 0; diff -urN ./linux-2.6.18.1/arch/i386/kernel/mcount-wrapper.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/mcount-wrapper.S --- ./linux-2.6.18.1/arch/i386/kernel/mcount-wrapper.S 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/mcount-wrapper.S 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + diff -urN ./linux-2.6.18.1/arch/i386/kernel/microcode.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/microcode.c --- ./linux-2.6.18.1/arch/i386/kernel/microcode.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/microcode.c 2007-05-19 23:58:35.000000000 +0900 @@ -115,7 +115,7 @@ #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); diff -urN ./linux-2.6.18.1/arch/i386/kernel/mpparse.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/mpparse.c --- ./linux-2.6.18.1/arch/i386/kernel/mpparse.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/mpparse.c 2007-05-19 23:58:35.000000000 +0900 @@ -228,12 +228,17 @@ mpc_oem_bus_info(m, str, translation_table[mpc_record]); + /* + * mpc_busid is char: + */ +#if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { printk(KERN_WARNING "MP table busid value (%d) for bustype %s " " is too large, max. supported is %d\n", m->mpc_busid, str, MAX_MP_BUSSES - 1); return; } +#endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; diff -urN ./linux-2.6.18.1/arch/i386/kernel/nmi.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/nmi.c --- ./linux-2.6.18.1/arch/i386/kernel/nmi.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/nmi.c 2007-05-19 23:58:35.000000000 +0900 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,7 @@ unsigned int nmi_watchdog = NMI_NONE; extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); @@ -99,7 +100,6 @@ #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK -#ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all * CPUs during the test make them busy. @@ -107,7 +107,12 @@ static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; + /* + * avoid a warning, on PREEMPT_RT this wont run in hardirq context: + */ +#ifndef CONFIG_PREEMPT_RT local_irq_enable_in_hardirq(); +#endif /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -117,7 +122,6 @@ while (*endflag == 0) barrier(); } -#endif static int __init check_nmi_watchdog(void) { @@ -140,7 +144,7 @@ for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + mdelay((100*1000)/nmi_hz); // wait 100 ticks for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP @@ -167,7 +171,7 @@ /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; + nmi_hz = 10000; kfree(prev_nmi_count); return 0; @@ -579,9 +583,34 @@ extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + printk("nmi_show_all_regs(): start on CPU#%d.\n", + raw_smp_processor_id()); + dump_stack(); + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + +void notrace nmi_watchdog_tick (struct pt_regs * regs) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -590,7 +619,16 @@ unsigned int sum; int cpu = smp_processor_id(); - sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); + + profile_tick(CPU_PROFILING, regs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk("NMI show regs on CPU#%d:\n", cpu); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (last_irq_sums[cpu] == sum) { /* @@ -598,11 +636,26 @@ * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + bust_spinlocks(1); + spin_lock(&nmi_print_lock); + printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", + cpu, alert_counter[cpu], 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) + if (i != cpu) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); + + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; diff -urN ./linux-2.6.18.1/arch/i386/kernel/process.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/process.c --- ./linux-2.6.18.1/arch/i386/kernel/process.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/process.c 2007-05-19 23:58:35.000000000 +0900 @@ -103,16 +103,20 @@ if (!hlt_counter && boot_cpu_data.hlt_works_ok) { current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { local_irq_disable(); - if (!need_resched()) - safe_halt(); - else + if (!need_resched() && !need_resched_delayed()) { + if (!hrtimer_stop_sched_tick()) + safe_halt(); + else + local_irq_enable(); + hrtimer_restart_sched_tick(); + } else local_irq_enable(); } current_thread_info()->status |= TS_POLLING; } else { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) cpu_relax(); } } @@ -125,16 +129,18 @@ * to poll the ->work.need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +static void poll_idle(void) { local_irq_enable(); - asm volatile( - "2:" - "testl %0, %1;" - "rep; nop;" - "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + while (!need_resched() && !need_resched_delayed()) { + hrtimer_stop_sched_tick(); + local_irq_enable(); + while (!need_resched() && !need_resched_delayed() && !rcu_pending(smp_processor_id()) && !local_softirq_pending()) + rep_nop(); + hrtimer_restart_sched_tick(); + local_irq_enable(); + } } #ifdef CONFIG_HOTPLUG_CPU @@ -177,7 +183,9 @@ /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -195,9 +203,11 @@ __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } @@ -240,13 +250,16 @@ { local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { + if (hrtimer_stop_sched_tick()) + break; __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); } + hrtimer_restart_sched_tick(); } void __devinit select_idle_routine(const struct cpuinfo_x86 *c) @@ -363,15 +376,23 @@ if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + void *io_bitmap_ptr = t->io_bitmap_ptr; + int cpu; + struct tss_struct *tss; - kfree(t->io_bitmap_ptr); + /* + * On PREEMPT_RT we must not call kfree() with + * preemption disabled, so we first zap the pointer: + */ t->io_bitmap_ptr = NULL; + kfree(io_bitmap_ptr); + clear_thread_flag(TIF_IO_BITMAP); /* * Careful, clear this in the TSS too: */ + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); t->io_bitmap_max = 0; tss->io_bitmap_owner = NULL; diff -urN ./linux-2.6.18.1/arch/i386/kernel/semaphore.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/semaphore.c --- ./linux-2.6.18.1/arch/i386/kernel/semaphore.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/semaphore.c 2007-05-19 23:58:35.000000000 +0900 @@ -12,6 +12,7 @@ * * rw semaphores implemented November 1999 by Benjamin LaHaise */ +#include #include /* @@ -27,15 +28,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" +".globl __compat_down_failed\n" +"__compat_down_failed:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down\n\t" + "call __compat_down\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -48,15 +49,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" +".globl __compat_down_failed_interruptible\n" +"__compat_down_failed_interruptible:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_interruptible\n\t" + "call __compat_down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -69,15 +70,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" +".globl __compat_down_failed_trylock\n" +"__compat_down_failed_trylock:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_trylock\n\t" + "call __compat_down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -90,45 +91,13 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" +".globl __compat_up_wakeup\n" +"__compat_up_wakeup:\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" - "call __up\n\t" + "call __compat_up\n\t" "popl %ecx\n\t" "popl %edx\n\t" "ret" ); -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK_PREFIX "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK_PREFIX "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif diff -urN ./linux-2.6.18.1/arch/i386/kernel/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/setup.c --- ./linux-2.6.18.1/arch/i386/kernel/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -61,7 +61,7 @@ #include #include #include - +#include /* Forward Declaration. */ void __init find_max_pfn(void); @@ -1580,6 +1580,7 @@ #endif #endif tsc_init(); + vsyscall_init(); } static __init int add_pcspkr(void) diff -urN ./linux-2.6.18.1/arch/i386/kernel/signal.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/signal.c --- ./linux-2.6.18.1/arch/i386/kernel/signal.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/signal.c 2007-05-19 23:58:35.000000000 +0900 @@ -532,6 +532,13 @@ } } +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so * that register information in the sigcontext is correct. @@ -572,6 +579,13 @@ struct k_sigaction ka; sigset_t *oldset; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from diff -urN ./linux-2.6.18.1/arch/i386/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/smp.c --- ./linux-2.6.18.1/arch/i386/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -255,7 +255,7 @@ static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL 0xffffffff /* @@ -400,7 +400,7 @@ while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; @@ -491,10 +491,20 @@ } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -599,13 +609,14 @@ } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) diff -urN ./linux-2.6.18.1/arch/i386/kernel/syscall_table.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/syscall_table.S --- ./linux-2.6.18.1/arch/i386/kernel/syscall_table.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/syscall_table.S 2007-05-20 14:14:28.000000000 +0900 @@ -317,3 +317,14 @@ .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages +#ifdef CONFIG_CABI + .long sys_cabi_account_create /* 318-324 */ + .long sys_cabi_account_destroy + .long sys_cabi_account_bind_pid + .long sys_cabi_account_bind_pgid + .long sys_cabi_account_unbind + .long sys_cabi_account_get + .long sys_cabi_account_set + .long sys_cabi_account_eval +#endif /* CONFIG_CABI */ + diff -urN ./linux-2.6.18.1/arch/i386/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/time.c --- ./linux-2.6.18.1/arch/i386/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -131,7 +131,7 @@ int timer_ack; #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -150,15 +150,6 @@ */ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - #ifdef CONFIG_X86_IO_APIC if (timer_ack) { /* @@ -177,7 +168,6 @@ do_timer_interrupt_hook(regs); - if (MCA_bus) { /* The PS/2 uses level-triggered interrupts. You can't turn them off, nor would you want to (any attempt to @@ -192,8 +182,6 @@ outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ } - write_sequnlock(&xtime_lock); - #ifdef CONFIG_X86_LOCAL_APIC if (using_apic_timer) smp_send_timer_broadcast_ipi(regs); @@ -203,7 +191,7 @@ } /* not static: needed by APM */ -unsigned long get_cmos_time(void) +unsigned long read_persistent_clock(void) { unsigned long retval; unsigned long flags; @@ -219,7 +207,7 @@ return retval; } -EXPORT_SYMBOL(get_cmos_time); +EXPORT_SYMBOL(read_persistent_clock); static void sync_cmos_clock(unsigned long dummy); @@ -270,75 +258,11 @@ mod_timer(&sync_cmos_timer, jiffies + 1); } -static long clock_cmos_diff, sleep_start; - -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); - return 0; -} - -static int timer_resume(struct sys_device *dev) -{ - unsigned long flags; - unsigned long sec; - unsigned long sleep_length; - -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) - hpet_reenable(); -#endif - setup_pit_timer(); - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - jiffies_64 += sleep_length; - wall_jiffies += sleep_length; - write_sequnlock_irqrestore(&xtime_lock, flags); - touch_softlockup_watchdog(); - return 0; -} - -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - - -/* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); - #ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); } @@ -359,10 +283,6 @@ return; } #endif - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); time_init_hook(); } diff -urN ./linux-2.6.18.1/arch/i386/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/traps.c --- ./linux-2.6.18.1/arch/i386/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -226,6 +226,7 @@ break; printk("%s =======================\n", log_lvl); } + print_traces(task); } void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) @@ -276,6 +277,12 @@ EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -302,8 +309,8 @@ regs->eax, regs->ebx, regs->ecx, regs->edx); printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk(KERN_EMERG "ds: %04x es: %04x ss: %04x preempt: %08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count()); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, current->pid, current_thread_info(), current, current->thread_info); @@ -375,11 +382,11 @@ void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -486,6 +493,11 @@ if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { if (info) force_sig_info(signr, info, tsk); @@ -505,6 +517,7 @@ if (ret) goto trap_signal; return; } + print_traces(tsk); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -703,10 +716,11 @@ crash_kexec(regs); } + nmi_exit(); do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static void notrace default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -715,9 +729,6 @@ reason = get_nmi_reason(); if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; #ifdef CONFIG_X86_LOCAL_APIC /* * Ok, so this is none of the documented NMI sources, @@ -725,9 +736,13 @@ */ if (nmi_watchdog) { nmi_watchdog_tick(regs); +// trace_special(6, 1, 0); return; } #endif + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; unknown_nmi_error(reason, regs); return; } @@ -744,18 +759,19 @@ reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu) { return 0; } static nmi_callback_t nmi_callback = dummy_nmi_callback; -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); diff -urN ./linux-2.6.18.1/arch/i386/kernel/tsc.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/tsc.c --- ./linux-2.6.18.1/arch/i386/kernel/tsc.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/tsc.c 2007-05-19 23:58:35.000000000 +0900 @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -333,6 +334,16 @@ return ret; } + +static cycle_t __vsyscall_fn vread_tsc(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, @@ -342,6 +353,7 @@ .shift = 22, .update_callback = tsc_update_callback, .is_continuous = 1, + .vread = vread_tsc, }; static int tsc_update_callback(void) diff -urN ./linux-2.6.18.1/arch/i386/kernel/vm86.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/vm86.c --- ./linux-2.6.18.1/arch/i386/kernel/vm86.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/vm86.c 2007-05-19 23:58:35.000000000 +0900 @@ -109,6 +109,7 @@ local_irq_enable(); if (!current->thread.vm86_info) { + local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } diff -urN ./linux-2.6.18.1/arch/i386/kernel/vmlinux.lds.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/vmlinux.lds.S --- ./linux-2.6.18.1/arch/i386/kernel/vmlinux.lds.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/vmlinux.lds.S 2007-05-19 23:58:35.000000000 +0900 @@ -8,6 +8,8 @@ #include #include #include +#include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -71,6 +73,51 @@ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ +/* VSYSCALL_GTOD data */ +#ifdef CONFIG_GENERIC_TIME_VSYSCALL +#undef VSYSCALL_ADDR +#define VSYSCALL_ADDR VSYSCALL_GTOD_START +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) + +#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) +#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) + +#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) +#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + + . = VSYSCALL_ADDR; + .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } + .vsyscall_data : AT(VLOAD(.vsyscall_data)) { *(.vsyscall_data) } + + . = ALIGN(32); + .vsyscall_gtod_data : AT (VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) } + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + + . = ALIGN(32); + .vsyscall_gtod_lock : AT (VLOAD(.vsyscall_gtod_lock)) { *(.vsyscall_gtod_lock) } + vsyscall_gtod_lock = VVIRT(.vsyscall_gtod_lock); + + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } + + . = VSYSCALL_VIRT_ADDR + 4096; + +#undef VSYSCALL_ADDR +#undef VSYSCALL_PHYS_ADDR +#undef VSYSCALL_VIRT_ADDR +#undef VLOAD_OFFSET +#undef VLOAD +#undef VVIRT_OFFSET +#undef VVIRT + +#endif +/* END of VSYSCALL_GTOD data*/ + #ifdef CONFIG_STACK_UNWIND . = ALIGN(4); .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { diff -urN ./linux-2.6.18.1/arch/i386/kernel/vsyscall-gtod.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/vsyscall-gtod.c --- ./linux-2.6.18.1/arch/i386/kernel/vsyscall-gtod.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/kernel/vsyscall-gtod.c 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,179 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct vsyscall_gtod_data_t { + struct timeval wall_time_tv; + struct timezone sys_tz; + struct clocksource clock; +}; + +struct vsyscall_gtod_data_t vsyscall_gtod_data; +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data; + +seqlock_t vsyscall_gtod_lock = SEQLOCK_UNLOCKED; +seqlock_t __vsyscall_gtod_lock __section_vsyscall_gtod_lock = SEQLOCK_UNLOCKED; + +int errno; +static inline _syscall2(int,gettimeofday,struct timeval *,tv,struct timezone *,tz); + +static int vsyscall_mapped = 0; /* flag variable for remap_vsyscall() */ +extern struct timezone sys_tz; + +static inline void do_vgettimeofday(struct timeval* tv) +{ + cycle_t now, cycle_delta; + s64 nsec_delta; + + if (!__vsyscall_gtod_data.clock.vread) { + gettimeofday(tv, NULL); + return; + } + + /* read the clocksource and calc cycle_delta */ + now = __vsyscall_gtod_data.clock.vread(); + cycle_delta = (now - __vsyscall_gtod_data.clock.cycle_last) + & __vsyscall_gtod_data.clock.mask; + + /* convert cycles to nsecs */ + nsec_delta = cycle_delta * __vsyscall_gtod_data.clock.mult; + nsec_delta = nsec_delta >> __vsyscall_gtod_data.clock.shift; + + /* add nsec offset to wall_time_tv */ + *tv = __vsyscall_gtod_data.wall_time_tv; + do_div(nsec_delta, NSEC_PER_USEC); + while (nsec_delta > NSEC_PER_SEC) { + tv->tv_sec += 1; + nsec_delta -= NSEC_PER_SEC; + } + tv->tv_usec += ((unsigned long)nsec_delta)/1000; +} + +static inline void do_get_tz(struct timezone *tz) +{ + *tz = __vsyscall_gtod_data.sys_tz; +} + +static int __vsyscall(0) asmlinkage vgettimeofday(struct timeval *tv, struct timezone *tz) +{ + unsigned long seq; + do { + seq = read_seqbegin(&__vsyscall_gtod_lock); + + if (tv) + do_vgettimeofday(tv); + if (tz) + do_get_tz(tz); + + } while (read_seqretry(&__vsyscall_gtod_lock, seq)); + + return 0; +} + +static time_t __vsyscall(1) asmlinkage vtime(time_t * t) +{ + struct timeval tv; + vgettimeofday(&tv,NULL); + if (t) + *t = tv.tv_sec; + return tv.tv_sec; +} + +struct clocksource* curr_clock; + +void update_vsyscall(struct timespec *wall_time, + struct clocksource* clock) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_lock, flags); + + /* XXX - hackitty hack hack. this is terrible! */ + if (curr_clock != clock) { + curr_clock = clock; + } + + /* save off wall time as timeval */ + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; + + /* copy current clocksource */ + vsyscall_gtod_data.clock = *clock; + + /* save off current timezone */ + vsyscall_gtod_data.sys_tz = sys_tz; + + write_sequnlock_irqrestore(&vsyscall_gtod_lock, flags); + +} +extern char __vsyscall_0; + +static void __init map_vsyscall(void) +{ + unsigned long physaddr_page0 = (unsigned long) &__vsyscall_0 - PAGE_OFFSET; + + /* Initially we map the VSYSCALL page w/ PAGE_KERNEL permissions to + * keep the alternate_instruction code from bombing out when it + * changes the seq_lock memory barriers in vgettimeofday() + */ + __set_fixmap(FIX_VSYSCALL_GTOD_FIRST_PAGE, physaddr_page0, PAGE_KERNEL); +} + +static int __init remap_vsyscall(void) +{ + unsigned long physaddr_page0 = (unsigned long) &__vsyscall_0 - PAGE_OFFSET; + + if (!vsyscall_mapped) + return 0; + + /* Remap the VSYSCALL page w/ PAGE_KERNEL_VSYSCALL permissions + * after the alternate_instruction code has run + */ + clear_fixmap(FIX_VSYSCALL_GTOD_FIRST_PAGE); + __set_fixmap(FIX_VSYSCALL_GTOD_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); + + return 0; +} + +int __init vsyscall_init(void) +{ + printk("VSYSCALL: consistency checks..."); + if ((unsigned long) &vgettimeofday != VSYSCALL_ADDR(__NR_vgettimeofday)) { + printk("vgettimeofday link addr broken\n"); + printk("VSYSCALL: vsyscall_init failed!\n"); + return -EFAULT; + } + if ((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)) { + printk("vtime link addr broken\n"); + printk("VSYSCALL: vsyscall_init failed!\n"); + return -EFAULT; + } + if (VSYSCALL_ADDR(0) != __fix_to_virt(FIX_VSYSCALL_GTOD_FIRST_PAGE)) { + printk("fixmap first vsyscall 0x%lx should be 0x%x\n", + __fix_to_virt(FIX_VSYSCALL_GTOD_FIRST_PAGE), + VSYSCALL_ADDR(0)); + printk("VSYSCALL: vsyscall_init failed!\n"); + return -EFAULT; + } + + + printk("passed...mapping..."); + map_vsyscall(); + printk("done.\n"); + vsyscall_mapped = 1; + printk("VSYSCALL: fixmap virt addr: 0x%lx\n", + __fix_to_virt(FIX_VSYSCALL_GTOD_FIRST_PAGE)); + + return 0; +} +__initcall(remap_vsyscall); diff -urN ./linux-2.6.18.1/arch/i386/lib/bitops.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/lib/bitops.c --- ./linux-2.6.18.1/arch/i386/lib/bitops.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/lib/bitops.c 2007-05-19 23:58:35.000000000 +0900 @@ -1,5 +1,11 @@ -#include + +#include +#include +#include #include +#include +#include +#include /** * find_next_bit - find the first set bit in a memory region @@ -68,3 +74,39 @@ return (offset + set + res); } EXPORT_SYMBOL(find_next_zero_bit); + + +/* + * rw spinlock fallbacks + */ +#ifdef CONFIG_SMP +asm( +".section .sched.text, \"ax\"\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + "lock; addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + "lock; subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret\n" +".previous\n" +); + +asm( +".section .sched.text, \"ax\"\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + "lock; incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + "lock; decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret\n" +".previous\n" +); +#endif diff -urN ./linux-2.6.18.1/arch/i386/mach-default/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-default/setup.c --- ./linux-2.6.18.1/arch/i386/mach-default/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-default/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -35,7 +35,7 @@ /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, IRQF_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -79,7 +79,7 @@ { } -static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED | IRQF_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; /** * time_init_hook - do any specific initialisations for the system timer. diff -urN ./linux-2.6.18.1/arch/i386/mach-visws/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-visws/setup.c --- ./linux-2.6.18.1/arch/i386/mach-visws/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-visws/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -115,7 +115,7 @@ static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NODELAY, .name = "timer", }; diff -urN ./linux-2.6.18.1/arch/i386/mach-visws/visws_apic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-visws/visws_apic.c --- ./linux-2.6.18.1/arch/i386/mach-visws/visws_apic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-visws/visws_apic.c 2007-05-19 23:58:35.000000000 +0900 @@ -259,11 +259,13 @@ static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = IRQF_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = IRQF_NODELAY, }; diff -urN ./linux-2.6.18.1/arch/i386/mach-voyager/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-voyager/setup.c --- ./linux-2.6.18.1/arch/i386/mach-voyager/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mach-voyager/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -18,7 +18,7 @@ /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, IRQF_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init intr_init_hook(void) { @@ -40,7 +40,7 @@ { } -static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED | IRQF_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; void __init time_init_hook(void) { diff -urN ./linux-2.6.18.1/arch/i386/mm/fault.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/fault.c --- ./linux-2.6.18.1/arch/i386/mm/fault.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/fault.c 2007-05-19 23:58:35.000000000 +0900 @@ -73,6 +73,9 @@ int loglevel_save = console_loglevel; if (yes) { + stop_trace(); + user_trace_stop(); + zap_rt_locks(); oops_in_progress = 1; return; } @@ -325,8 +328,8 @@ * bit 3 == 1 means use of reserved bit detected * bit 4 == 1 means fault was an instruction fetch */ -fastcall void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) +fastcall notrace void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; @@ -337,6 +340,7 @@ /* get the address */ address = read_cr2(); + trace_special(regs->eip, error_code, address); tsk = current; diff -urN ./linux-2.6.18.1/arch/i386/mm/highmem.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/highmem.c --- ./linux-2.6.18.1/arch/i386/mm/highmem.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/highmem.c 2007-05-19 23:58:35.000000000 +0900 @@ -18,6 +18,26 @@ kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +46,7 @@ * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -48,7 +68,7 @@ return (void*) vaddr; } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -78,7 +98,7 @@ /* This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -93,7 +113,7 @@ return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -108,6 +128,7 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); diff -urN ./linux-2.6.18.1/arch/i386/mm/init.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/init.c --- ./linux-2.6.18.1/arch/i386/mm/init.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/init.c 2007-05-19 23:58:35.000000000 +0900 @@ -45,7 +45,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int noinline do_test_wp_bit(void); @@ -194,7 +194,7 @@ extern int is_available_memory(efi_memory_desc_t *); -int page_is_ram(unsigned long pagenr) +int notrace page_is_ram(unsigned long pagenr) { int i; unsigned long addr, end; diff -urN ./linux-2.6.18.1/arch/i386/mm/pgtable.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/pgtable.c --- ./linux-2.6.18.1/arch/i386/mm/pgtable.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/mm/pgtable.c 2007-05-19 23:58:35.000000000 +0900 @@ -182,7 +182,7 @@ * recommendations and having no core impact whatsoever. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) diff -urN ./linux-2.6.18.1/arch/i386/oprofile/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/oprofile/Kconfig --- ./linux-2.6.18.1/arch/i386/oprofile/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/oprofile/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -15,3 +15,6 @@ If unsure, say N. +config PROFILE_NMI + bool + default y diff -urN ./linux-2.6.18.1/arch/i386/pci/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/pci/Makefile --- ./linux-2.6.18.1/arch/i386/pci/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/pci/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -4,8 +4,9 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_ACPI) += acpi.o + pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o diff -urN ./linux-2.6.18.1/arch/i386/pci/direct.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/pci/direct.c --- ./linux-2.6.18.1/arch/i386/pci/direct.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/pci/direct.c 2007-05-19 23:58:35.000000000 +0900 @@ -220,16 +220,23 @@ unsigned int tmp; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -239,17 +246,19 @@ unsigned long flags; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } diff -urN ./linux-2.6.18.1/arch/i386/pci/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/pci/irq.c --- ./linux-2.6.18.1/arch/i386/pci/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/i386/pci/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -981,10 +981,6 @@ pci_name(bridge), 'A' + pin, irq); } if (irq >= 0) { - if (use_pci_vector() && - !platform_legacy_irq(irq)) - irq = IO_APIC_VECTOR(irq); - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", pci_name(dev), 'A' + pin, irq); dev->irq = irq; @@ -1169,33 +1165,3 @@ } return 0; } - -int pci_vector_resources(int last, int nr_released) -{ - int count = nr_released; - - int next = last; - int offset = (last % 8); - - while (next < FIRST_SYSTEM_VECTOR) { - next += 8; -#ifdef CONFIG_X86_64 - if (next == IA32_SYSCALL_VECTOR) - continue; -#else - if (next == SYSCALL_VECTOR) - continue; -#endif - count++; - if (next >= FIRST_SYSTEM_VECTOR) { - if (offset%8) { - next = FIRST_DEVICE_VECTOR + offset; - offset++; - continue; - } - count--; - } - } - - return count; -} diff -urN ./linux-2.6.18.1/arch/ia64/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/Kconfig --- ./linux-2.6.18.1/arch/ia64/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -32,6 +32,7 @@ config RWSEM_XCHGADD_ALGORITHM bool + depends on !PREEMPT_RT default y config GENERIC_FIND_NEXT_BIT @@ -42,7 +43,11 @@ bool default y -config TIME_INTERPOLATION +config GENERIC_TIME + bool + default y + +config GENERIC_TIME_VSYSCALL bool default y @@ -258,6 +263,69 @@ If you don't know what to do here, say N. + +config GENERIC_TIME + bool + default y + +config HIGH_RES_TIMERS + bool "High-Resolution Timers" + help + + POSIX timers are available by default. This option enables + high-resolution POSIX timers. With this option the resolution + is at least 1 microsecond. High resolution is not free. If + enabled this option will add a small overhead each time a + timer expires that is not on a 1/HZ tick boundary. If no such + timers are used the overhead is nil. + + This option enables two additional POSIX CLOCKS, + CLOCK_REALTIME_HR and CLOCK_MONOTONIC_HR. Note that this + option does not change the resolution of CLOCK_REALTIME or + CLOCK_MONOTONIC which remain at 1/HZ resolution. + +config HIGH_RES_RESOLUTION + int "High-Resolution-Timer resolution (nanoseconds)" + depends on HIGH_RES_TIMERS + default 1000 + help + + This sets the resolution of timers accessed with + CLOCK_REALTIME_HR and CLOCK_MONOTONIC_HR. Too + fine a resolution (small a number) will usually not + be observable due to normal system latencies. For an + 800 MHZ processor about 10,000 is the recommended maximum + (smallest number). If you don't need that sort of resolution, + higher numbers may generate less overhead. + +choice + prompt "Clock source" + depends on HIGH_RES_TIMERS + default HIGH_RES_TIMER_ITC + help + This option allows you to choose the hardware source in charge + of generating high precision interruptions on your system. + On IA-64 these are: + + + ITC Interval Time Counter 1/CPU clock + HPET High Precision Event Timer ~ (XXX:have to check the spec) + + The ITC timer is available on all the ia64 computers because + it is integrated directly into the processor. However it may not + give correct results on MP machines with processors running + at different clock rates. In this case you may want to use + the HPET if available on your machine. + + +config HIGH_RES_TIMER_ITC + bool "Interval Time Counter/ITC" + +config HIGH_RES_TIMER_HPET + bool "High Precision Event Timer/HPET" + +endchoice + config NR_CPUS int "Maximum number of CPUs (2-1024)" range 2 1024 @@ -310,17 +378,15 @@ This option it useful to enable this feature on older BIOS's as well. You can also enable this by using boot command line option force_cpei=1. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. +source "kernel/Kconfig.preempt" - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +config RWSEM_GENERIC_SPINLOCK + bool + depends on PREEMPT_RT + default y + +config PREEMPT + def_bool y if (PREEMPT_RT || PREEMPT_SOFTIRQS || PREEMPT_HARDIRQS || PREEMPT_VOLUNTARY || PREEMPT_DESKTOP) source "mm/Kconfig" diff -urN ./linux-2.6.18.1/arch/ia64/configs/bigsur_defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/bigsur_defconfig --- ./linux-2.6.18.1/arch/ia64/configs/bigsur_defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/bigsur_defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -85,7 +85,7 @@ CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -urN ./linux-2.6.18.1/arch/ia64/configs/gensparse_defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/gensparse_defconfig --- ./linux-2.6.18.1/arch/ia64/configs/gensparse_defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/gensparse_defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -86,7 +86,7 @@ CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -urN ./linux-2.6.18.1/arch/ia64/configs/sim_defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/sim_defconfig --- ./linux-2.6.18.1/arch/ia64/configs/sim_defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/sim_defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -86,7 +86,7 @@ CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -urN ./linux-2.6.18.1/arch/ia64/configs/sn2_defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/sn2_defconfig --- ./linux-2.6.18.1/arch/ia64/configs/sn2_defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/sn2_defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -83,7 +83,7 @@ CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_DMI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y diff -urN ./linux-2.6.18.1/arch/ia64/configs/tiger_defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/tiger_defconfig --- ./linux-2.6.18.1/arch/ia64/configs/tiger_defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/tiger_defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -86,7 +86,7 @@ CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -urN ./linux-2.6.18.1/arch/ia64/configs/zx1_defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/zx1_defconfig --- ./linux-2.6.18.1/arch/ia64/configs/zx1_defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/configs/zx1_defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -84,7 +84,7 @@ CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -urN ./linux-2.6.18.1/arch/ia64/defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/defconfig --- ./linux-2.6.18.1/arch/ia64/defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -86,7 +86,7 @@ CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -urN ./linux-2.6.18.1/arch/ia64/kernel/asm-offsets.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/asm-offsets.c --- ./linux-2.6.18.1/arch/ia64/kernel/asm-offsets.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/asm-offsets.c 2007-05-19 23:58:35.000000000 +0900 @@ -7,6 +7,7 @@ #define ASM_OFFSETS_C 1 #include +#include #include #include @@ -254,18 +255,13 @@ offsetof (struct pal_min_state_area_s, pmsa_xip)); BLANK(); +#ifdef CONFIG_TIME_INTERPOLATION /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ - DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr)); - DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source)); - DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift)); - DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc)); - DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset)); - DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle)); - DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter)); - DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter)); - DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask)); - DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU); - DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); - DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); + DEFINE(IA64_CLOCKSOURCE_MASK_OFFSET, offsetof (struct clocksource, mask)); + DEFINE(IA64_CLOCKSOURCE_MULT_OFFSET, offsetof (struct clocksource, mult)); + DEFINE(IA64_CLOCKSOURCE_SHIFT_OFFSET, offsetof (struct clocksource, shift)); + DEFINE(IA64_CLOCKSOURCE_MMIO_PTR_OFFSET, offsetof (struct clocksource, fsys_mmio_ptr)); + DEFINE(IA64_CLOCKSOURCE_CYCLE_LAST_OFFSET, offsetof (struct clocksource, cycle_last)); +#endif } diff -urN ./linux-2.6.18.1/arch/ia64/kernel/cyclone.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/cyclone.c --- ./linux-2.6.18.1/arch/ia64/kernel/cyclone.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/cyclone.c 2007-05-19 23:58:35.000000000 +0900 @@ -3,6 +3,7 @@ #include #include #include +#include #include /* IBM Summit (EXA) Cyclone counter code*/ @@ -18,13 +19,21 @@ use_cyclone = 1; } +static void __iomem *cyclone_mc_ptr; -struct time_interpolator cyclone_interpolator = { - .source = TIME_SOURCE_MMIO64, - .shift = 16, - .frequency = CYCLONE_TIMER_FREQ, - .drift = -100, - .mask = (1LL << 40) - 1 +static cycle_t read_cyclone(void) +{ + return (cycle_t)readq((void __iomem *)cyclone_mc_ptr); +} + +static struct clocksource clocksource_cyclone = { + .name = "cyclone", + .rating = 300, + .read = read_cyclone, + .mask = (1LL << 40) - 1, + .mult = 0, /*to be caluclated*/ + .shift = 16, + .is_continuous = 1, }; int __init init_cyclone_clock(void) @@ -101,8 +110,10 @@ } } /* initialize last tick */ - cyclone_interpolator.addr = cyclone_timer; - register_time_interpolator(&cyclone_interpolator); + clocksource_cyclone.fsys_mmio_ptr = cyclone_mc_ptr = cyclone_timer; + clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ, + clocksource_cyclone.shift); + clocksource_register(&clocksource_cyclone); return 0; } diff -urN ./linux-2.6.18.1/arch/ia64/kernel/entry.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/entry.S --- ./linux-2.6.18.1/arch/ia64/kernel/entry.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/entry.S 2007-05-19 23:58:35.000000000 +0900 @@ -1101,23 +1101,24 @@ st8 [r2]=r8 st8 [r3]=r10 .work_pending: - tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? + tbit.nz p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? +(p6) br.cond.sptk.few .needresched + tbit.z p6,p0=r31,TIF_NEED_RESCHED_DELAYED // current_thread_info()->need_resched_delayed==0? (p6) br.cond.sptk.few .notify -#ifdef CONFIG_PREEMPT -(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 + +.needresched: + +(pKStk) br.cond.sptk.many .fromkernel ;; -(pKStk) st4 [r20]=r21 ssm psr.i // enable interrupts -#endif br.call.spnt.many rp=schedule -.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 - rsm psr.i // disable interrupts - ;; -#ifdef CONFIG_PREEMPT -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; -(pKStk) st4 [r20]=r0 // preempt_count() <- 0 -#endif +.ret9a: rsm psr.i // disable interrupts + ;; + br.cond.sptk.many .endpreemptdep +.fromkernel: + br.call.spnt.many rp=preempt_schedule_irq +.ret9b: rsm psr.i // disable interrupts +.endpreemptdep: (pLvSys)br.cond.sptk.few .work_pending_syscall_end br.cond.sptk.many .work_processed_kernel // re-check diff -urN ./linux-2.6.18.1/arch/ia64/kernel/fsys.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/fsys.S --- ./linux-2.6.18.1/arch/ia64/kernel/fsys.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/fsys.S 2007-05-19 23:58:35.000000000 +0900 @@ -24,6 +24,7 @@ #include "entry.h" +#ifdef CONFIG_TIME_INTERPOLATION /* * See Documentation/ia64/fsys.txt for details on fsyscalls. * @@ -145,13 +146,6 @@ FSYS_RETURN END(fsys_set_tid_address) -/* - * Ensure that the time interpolator structure is compatible with the asm code - */ -#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \ - || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4 -#error fsys_gettimeofday incompatible with changes to struct time_interpolator -#endif #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 #define CLOCK_DIVIDE_BY_1000 0x4000 @@ -177,19 +171,18 @@ // r11 = preserved: saved ar.pfs // r12 = preserved: memory stack // r13 = preserved: thread pointer - // r14 = address of mask / mask + // r14 = address of mask / mask value // r15 = preserved: system call number // r16 = preserved: current task pointer // r17 = wall to monotonic use - // r18 = time_interpolator->offset - // r19 = address of wall_to_monotonic - // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address - // r21 = shift factor - // r22 = address of time interpolator->last_counter - // r23 = address of time_interpolator->last_cycle - // r24 = adress of time_interpolator->offset - // r25 = last_cycle value - // r26 = last_counter value + // r19 = address of itc_lastcycle + // r20 = struct clocksource / address of first element + // r21 = shift value + // r22 = address of itc_jitter/ wall_to_monotonic + // r23 = address of shift + // r24 = address mult factor / cycle_last value + // r25 = itc_lastcycle value + // r26 = address clocksource cycle_last // r27 = pointer to xtime // r28 = sequence number at the beginning of critcal section // r29 = address of seqlock @@ -199,9 +192,9 @@ // p6,p7 short term use // p8 = timesource ar.itc // p9 = timesource mmio64 - // p10 = timesource mmio32 + // p10 = timesource mmio32 - not used // p11 = timesource not to be handled by asm code - // p12 = memory time source ( = p9 | p10) + // p12 = memory time source ( = p9 | p10) - not used // p13 = do cmpxchg with time_interpolator_last_cycle // p14 = Divide by 1000 // p15 = Add monotonic @@ -212,61 +205,55 @@ tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure mov pr = r30,0xc000 // Set predicates according to function add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 - movl r20 = time_interpolator + movl r20 = fsyscall_clock // load fsyscall clocksource address ;; - ld8 r20 = [r20] // get pointer to time_interpolator structure + add r10 = IA64_CLOCKSOURCE_MMIO_PTR_OFFSET,r20 movl r29 = xtime_lock ld4 r2 = [r2] // process work pending flags movl r27 = xtime ;; // only one bundle here - ld8 r21 = [r20] // first quad with control information + add r14 = IA64_CLOCKSOURCE_MASK_OFFSET,r20 + movl r22 = itc_jitter + add r24 = IA64_CLOCKSOURCE_MULT_OFFSET,r20 and r2 = TIF_ALLWORK_MASK,r2 (p6) br.cond.spnt.few .fail_einval // deferred branch ;; - add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20 - extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc - extr r8 = r21,0,16 // time_interpolator->source + ld8 r30 = [r10] // clocksource->mmio_ptr + movl r19 = itc_lastcycle + add r23 = IA64_CLOCKSOURCE_SHIFT_OFFSET,r20 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled (p6) br.cond.spnt.many fsys_fallback_syscall ;; - cmp.eq p8,p12 = 0,r8 // Check for cpu timer - cmp.eq p9,p0 = 1,r8 // MMIO64 ? - extr r2 = r21,24,8 // time_interpolator->jitter - cmp.eq p10,p0 = 2,r8 // MMIO32 ? - cmp.ltu p11,p0 = 2,r8 // function or other clock -(p11) br.cond.spnt.many fsys_fallback_syscall - ;; - setf.sig f7 = r3 // Setup for scaling of counter -(p15) movl r19 = wall_to_monotonic -(p12) ld8 r30 = [r10] - cmp.ne p13,p0 = r2,r0 // need jitter compensation? - extr r21 = r21,16,8 // shift factor + ld8 r14 = [r14] // clocksource mask value + ld4 r2 = [r22] // itc_jitter value + add r26 = IA64_CLOCKSOURCE_CYCLE_LAST_OFFSET,r20 // clock fsyscall_cycle_last + ld4 r3 = [r24] // clocksource->mult value + cmp.eq p8,p9 = 0,r30 // Check for cpu timer, no mmio_ptr, set p8, clear p9 + ;; + setf.sig f7 = r3 // Setup for mult scaling of counter +(p15) movl r22 = wall_to_monotonic + ld4 r21 = [r23] // shift value +(p8) cmp.ne p13,p0 = r2,r0 // need jitter compensation, set p13 +(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control ;; .time_redo: .pred.rel.mutex p8,p9,p10 ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! - add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20 (p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues.. -(p10) ld4 r2 = [r30] // readw(ti->address) -(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20 +(p13) ld8 r25 = [r19] // get itc_lastcycle value ;; // could be removed by moving the last add upward - ld8 r26 = [r22] // time_interpolator->last_counter -(p13) ld8 r25 = [r23] // time interpolator->last_cycle - add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20 -(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET - add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20 + ld8 r24 = [r26] // get fsyscall_cycle_last value +(p15) ld8 r17 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET ;; - ld8 r18 = [r24] // time_interpolator->offset ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec -(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) +(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) ;; - ld8 r14 = [r14] // time_interpolator->mask -(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared - sub r10 = r2,r26 // current_counter - last_counter +(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared + sub r10 = r2,r24 // current_counter - last_counter ;; -(p6) sub r10 = r25,r26 // time we got was less than last_cycle +(p6) sub r10 = r25,r24 // time we got was less than last_cycle (p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg ;; and r10 = r10,r14 // Apply mask @@ -274,22 +261,21 @@ setf.sig f8 = r10 nop.i 123 ;; -(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv +(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) (p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs ;; -(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET +(p15) ld8 r17 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET (p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo // simulate tbit.nz.or p7,p0 = r28,0 and r28 = ~1,r28 // Make sequence even to force retry if odd getf.sig r2 = f8 mf - add r8 = r8,r18 // Add time interpolator offset ;; ld4 r10 = [r29] // xtime_lock.sequence (p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs - shr.u r2 = r2,r21 + shr.u r2 = r2,r21 // shift by factor ;; // overloaded 3 bundles! // End critical section. add r8 = r8,r2 // Add xtime.nsecs @@ -348,6 +334,26 @@ br.many .gettime END(fsys_clock_gettime) + +#else // !CONFIG_TIME_INTERPOLATION + +# define fsys_gettimeofday 0 +# define fsys_clock_gettime 0 + +.fail_einval: + mov r8 = EINVAL + mov r10 = -1 + FSYS_RETURN + +.fail_efault: + mov r8 = EFAULT + mov r10 = -1 + FSYS_RETURN + +#endif + + + /* * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). */ diff -urN ./linux-2.6.18.1/arch/ia64/kernel/iosapic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/iosapic.c --- ./linux-2.6.18.1/arch/ia64/kernel/iosapic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/iosapic.c 2007-05-19 23:58:35.000000000 +0900 @@ -112,7 +112,7 @@ (PAGE_SIZE / sizeof(struct iosapic_rte_info)) #define RTE_PREALLOCATED (1) -static DEFINE_SPINLOCK(iosapic_lock); +static DEFINE_RAW_SPINLOCK(iosapic_lock); /* * These tables map IA-64 vectors to the IOSAPIC pin that generates this @@ -409,6 +409,34 @@ return 0; } +/* + * In the preemptible case mask the IRQ first then handle it and ack it. + */ +#ifdef CONFIG_PREEMPT_HARDIRQS + +static void +iosapic_ack_level_irq (unsigned int irq) +{ + ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; + + move_irq(irq); + mask_irq(irq); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) + iosapic_eoi(rte->addr, vec); +} + +static void +iosapic_end_level_irq (unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_INPROGRESS)) + unmask_irq(irq); +} + +#else /* !CONFIG_PREEMPT_HARDIRQS */ + +#define iosapic_ack_level_irq nop + static void iosapic_end_level_irq (unsigned int irq) { @@ -420,10 +448,12 @@ iosapic_eoi(rte->addr, vec); } + +#endif + #define iosapic_shutdown_level_irq mask_irq #define iosapic_enable_level_irq unmask_irq #define iosapic_disable_level_irq mask_irq -#define iosapic_ack_level_irq nop struct hw_interrupt_type irq_type_iosapic_level = { .typename = "IO-SAPIC-level", diff -urN ./linux-2.6.18.1/arch/ia64/kernel/irq_ia64.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/irq_ia64.c --- ./linux-2.6.18.1/arch/ia64/kernel/irq_ia64.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/irq_ia64.c 2007-05-19 23:58:35.000000000 +0900 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,25 @@ return test_and_set_bit(pos, ia64_vector_mask); } +/* + * Dynamic irq allocate and deallocation for MSI + */ +int create_irq(void) +{ + int vector = assign_irq_vector(AUTO_ASSIGN); + + if (vector >= 0) + dynamic_irq_init(vector); + + return vector; +} + +void destroy_irq(unsigned int irq) +{ + dynamic_irq_cleanup(irq); + free_irq_vector(irq); +} + #ifdef CONFIG_SMP # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) #else diff -urN ./linux-2.6.18.1/arch/ia64/kernel/mca.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/mca.c --- ./linux-2.6.18.1/arch/ia64/kernel/mca.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/mca.c 2007-05-19 23:58:35.000000000 +0900 @@ -152,7 +152,7 @@ typedef struct ia64_state_log_s { - spinlock_t isl_lock; + raw_spinlock_t isl_lock; int isl_index; unsigned long isl_count; ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ diff -urN ./linux-2.6.18.1/arch/ia64/kernel/perfmon.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/perfmon.c --- ./linux-2.6.18.1/arch/ia64/kernel/perfmon.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/perfmon.c 2007-05-19 23:58:35.000000000 +0900 @@ -277,7 +277,7 @@ */ typedef struct pfm_context { - spinlock_t ctx_lock; /* context protection */ + raw_spinlock_t ctx_lock; /* context protection */ pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ unsigned int ctx_state; /* state: active/inactive (no bitfield) */ @@ -363,7 +363,7 @@ * mostly used to synchronize between system wide and per-process */ typedef struct { - spinlock_t pfs_lock; /* lock the structure */ + raw_spinlock_t pfs_lock; /* lock the structure */ unsigned int pfs_task_sessions; /* number of per task sessions */ unsigned int pfs_sys_sessions; /* number of per system wide sessions */ @@ -504,7 +504,7 @@ static struct proc_dir_entry *perfmon_dir; static pfm_uuid_t pfm_null_uuid = {0,}; -static spinlock_t pfm_buffer_fmt_lock; +static raw_spinlock_t pfm_buffer_fmt_lock; static LIST_HEAD(pfm_buffer_fmt_list); static pmu_config_t *pmu_conf; diff -urN ./linux-2.6.18.1/arch/ia64/kernel/process.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/process.c --- ./linux-2.6.18.1/arch/ia64/kernel/process.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/process.c 2007-05-19 23:58:35.000000000 +0900 @@ -96,6 +96,9 @@ void dump_stack (void) { + if (irqs_disabled()) { + printk("Uh oh.. entering dump_stack() with irqs disabled.\n"); + } show_stack(NULL, NULL); } @@ -199,7 +202,7 @@ default_idle (void) { local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { if (can_do_pal_halt) safe_halt(); else @@ -275,7 +278,7 @@ else current_thread_info()->status |= TS_POLLING; - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { void (*idle)(void); #ifdef CONFIG_SMP min_xtp(); @@ -297,10 +300,11 @@ normal_xtp(); #endif } - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); + preempt_disable(); - check_pgt_cache(); + if (cpu_is_offline(cpu)) play_dead(); } diff -urN ./linux-2.6.18.1/arch/ia64/kernel/sal.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/sal.c --- ./linux-2.6.18.1/arch/ia64/kernel/sal.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/sal.c 2007-05-19 23:58:35.000000000 +0900 @@ -18,7 +18,7 @@ #include #include - __cacheline_aligned DEFINE_SPINLOCK(sal_lock); + __cacheline_aligned DEFINE_RAW_SPINLOCK(sal_lock); unsigned long sal_platform_features; unsigned short sal_revision; diff -urN ./linux-2.6.18.1/arch/ia64/kernel/salinfo.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/salinfo.c --- ./linux-2.6.18.1/arch/ia64/kernel/salinfo.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/salinfo.c 2007-05-19 23:58:35.000000000 +0900 @@ -141,7 +141,7 @@ struct salinfo_data { cpumask_t cpu_event; /* which cpus have outstanding events */ - struct semaphore mutex; + struct compat_semaphore mutex; u8 *log_buffer; u64 log_size; u8 *oemdata; /* decoded oem data */ @@ -157,8 +157,8 @@ static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; -static DEFINE_SPINLOCK(data_lock); -static DEFINE_SPINLOCK(data_saved_lock); +static DEFINE_RAW_SPINLOCK(data_lock); +static DEFINE_RAW_SPINLOCK(data_saved_lock); /** salinfo_platform_oemdata - optional callback to decode oemdata from an error * record. diff -urN ./linux-2.6.18.1/arch/ia64/kernel/semaphore.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/semaphore.c --- ./linux-2.6.18.1/arch/ia64/kernel/semaphore.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/semaphore.c 2007-05-19 23:58:35.000000000 +0900 @@ -40,12 +40,12 @@ */ void -__up (struct semaphore *sem) +__up (struct compat_semaphore *sem) { wake_up(&sem->wait); } -void __sched __down (struct semaphore *sem) +void __sched __down (struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -82,7 +82,7 @@ tsk->state = TASK_RUNNING; } -int __sched __down_interruptible (struct semaphore * sem) +int __sched __down_interruptible (struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -142,7 +142,7 @@ * count. */ int -__down_trylock (struct semaphore *sem) +__down_trylock (struct compat_semaphore *sem) { unsigned long flags; int sleepers; diff -urN ./linux-2.6.18.1/arch/ia64/kernel/signal.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/signal.c --- ./linux-2.6.18.1/arch/ia64/kernel/signal.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/signal.c 2007-05-19 23:58:35.000000000 +0900 @@ -487,6 +487,14 @@ long errno = scr->pt.r8; # define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c)) +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif + /* * In the ia64_leave_kernel code path, we want the common case to go fast, which * is why we may in certain cases get here from kernel mode. Just return without diff -urN ./linux-2.6.18.1/arch/ia64/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/smp.c --- ./linux-2.6.18.1/arch/ia64/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -222,6 +222,22 @@ platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); + } +} + + void smp_flush_tlb_all (void) { diff -urN ./linux-2.6.18.1/arch/ia64/kernel/smpboot.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/smpboot.c --- ./linux-2.6.18.1/arch/ia64/kernel/smpboot.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/smpboot.c 2007-05-19 23:58:35.000000000 +0900 @@ -371,6 +371,8 @@ { } +extern void register_itc_clockevent(void); + static void __devinit smp_callin (void) { @@ -430,6 +432,7 @@ #ifdef CONFIG_IA32_SUPPORT ia32_gdt_init(); #endif + register_itc_clockevent(); /* * Allow the master to continue. diff -urN ./linux-2.6.18.1/arch/ia64/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/time.c --- ./linux-2.6.18.1/arch/ia64/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,10 @@ extern unsigned long wall_jiffies; +static cycle_t itc_get_cycles(void); +cycle_t itc_lastcycle __attribute__((aligned(L1_CACHE_BYTES))); +int itc_jitter __attribute__((aligned(L1_CACHE_BYTES))); + volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ @@ -40,11 +45,16 @@ #endif -static struct time_interpolator itc_interpolator = { - .shift = 16, - .mask = 0xffffffffffffffffLL, - .source = TIME_SOURCE_CPU +static struct clocksource clocksource_itc = { + .name = "itc", + .rating = 350, + .read = itc_get_cycles, + .mask = 0xffffffffffffffffLL, + .mult = 0, /*to be caluclated*/ + .shift = 16, + .is_continuous = 1, }; +static struct clocksource *clocksource_itc_p; static irqreturn_t timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) @@ -57,38 +67,57 @@ platform_timer_interrupt(irq, dev_id, regs); +#if 0 new_itm = local_cpu_data->itm_next; if (!time_after(ia64_get_itc(), new_itm)) printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", ia64_get_itc(), new_itm); - profile_tick(CPU_PROFILING, regs); +#endif - while (1) { - update_process_times(user_mode(regs)); + if (time_after(ia64_get_itc(), local_cpu_data->itm_tick_next)) { - new_itm += local_cpu_data->itm_delta; + unsigned long new_tick_itm; + new_tick_itm = local_cpu_data->itm_tick_next; - if (smp_processor_id() == time_keeper_id) { - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ - write_seqlock(&xtime_lock); - do_timer(regs); - local_cpu_data->itm_next = new_itm; - write_sequnlock(&xtime_lock); - } else - local_cpu_data->itm_next = new_itm; + profile_tick(CPU_PROFILING, regs); + + while (1) { + update_process_times(user_mode(regs)); + + new_tick_itm += local_cpu_data->itm_tick_delta; + + if (smp_processor_id() == time_keeper_id) { + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + write_seqlock(&xtime_lock); + do_timer(regs); + local_cpu_data->itm_tick_next = new_tick_itm; + write_sequnlock(&xtime_lock); + } else + local_cpu_data->itm_tick_next = new_tick_itm; + + if (time_after(new_tick_itm, ia64_get_itc())) + break; + } + } - if (time_after(new_itm, ia64_get_itc())) - break; + if (time_after(ia64_get_itc(), local_cpu_data->itm_timer_next)) { + if (itc_clockevent.event_handler) + itc_clockevent.event_handler(regs); } do { + // FIXME, really, please + new_itm = local_cpu_data->itm_tick_next; + + if (time_after(new_itm, local_cpu_data->itm_timer_next)) + new_itm = local_cpu_data->itm_timer_next; /* * If we're too close to the next clock tick for * comfort, we increase the safety margin by @@ -98,8 +127,8 @@ * too fast (with the potentially devastating effect * of losing monotony of time). */ - while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) - new_itm += local_cpu_data->itm_delta; + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_tick_delta/2)) + new_itm += local_cpu_data->itm_tick_delta; ia64_set_itm(new_itm); /* double check, in case we got hit by a (slow) PMI: */ } while (time_after_eq(ia64_get_itc(), new_itm)); @@ -118,7 +147,7 @@ /* arrange for the cycle counter to generate a timer interrupt: */ ia64_set_itv(IA64_TIMER_VECTOR); - delta = local_cpu_data->itm_delta; + delta = local_cpu_data->itm_tick_delta; /* * Stagger the timer tick for each CPU so they don't occur all at (almost) the * same time: @@ -127,8 +156,8 @@ unsigned long hi = 1UL << ia64_fls(cpu); shift = (2*(cpu - hi) + 1) * delta/hi/2; } - local_cpu_data->itm_next = ia64_get_itc() + delta + shift; - ia64_set_itm(local_cpu_data->itm_next); + local_cpu_data->itm_tick_next = ia64_get_itc() + delta + shift; + ia64_set_itm(local_cpu_data->itm_tick_next); } static int nojitter; @@ -186,7 +215,7 @@ itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; - local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; + local_cpu_data->itm_tick_delta = (itc_freq + HZ/2) / HZ; printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, " "ITC freq=%lu.%03luMHz", smp_processor_id(), platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, @@ -206,9 +235,8 @@ local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<itc_freq; - itc_interpolator.drift = itc_drift; #ifdef CONFIG_SMP /* On IA64 in an SMP configuration ITCs are never accurately synchronized. * Jitter compensation requires a cmpxchg which may limit @@ -220,18 +248,57 @@ * even going backward) if the ITC offsets between the individual CPUs * are too large. */ - if (!nojitter) itc_interpolator.jitter = 1; + if (!nojitter) itc_jitter = 1; #endif - register_time_interpolator(&itc_interpolator); } +#endif /* Setup the CPU local timer tick */ ia64_cpu_local_tick(); + + if (!clocksource_itc_p) { + /* Sort out mult/shift values: */ + clocksource_itc.mult = clocksource_hz2mult(local_cpu_data->itc_freq, + clocksource_itc.shift); + clocksource_register(&clocksource_itc); + clocksource_itc_p = &clocksource_itc; + } } + +static cycle_t itc_get_cycles() +{ + if (itc_jitter) { + u64 lcycle; + u64 now; + + do { + lcycle = itc_lastcycle; + now = get_cycles(); + if (lcycle && time_after(lcycle, now)) + return lcycle; + + /* When holding the xtime write lock, there's no need + * to add the overhead of the cmpxchg. Readers are + * force to retry until the write lock is released. + */ + if (spin_is_locked(&xtime_lock.lock)) { + itc_lastcycle = now; + return now; + } + /* Keep track of the last timer value returned. The use of cmpxchg here + * will cause contention in an SMP environment. + */ + } while (unlikely(cmpxchg(&itc_lastcycle, lcycle, now) != lcycle)); + return now; + } else + return get_cycles(); +} + + static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_NODELAY, .name = "timer" }; @@ -252,6 +319,8 @@ * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC). */ set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); + register_itc_clocksource(); + register_itc_clockevent(); } /* @@ -304,3 +373,10 @@ if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) ia64_printk_clock = ia64_itc_printk_clock; } + +struct clocksource fsyscall_clock __attribute__((aligned(L1_CACHE_BYTES))); + +void update_vsyscall(struct timespec *wall, struct clocksource *c) +{ + fsyscall_clock = *c; +} diff -urN ./linux-2.6.18.1/arch/ia64/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/traps.c --- ./linux-2.6.18.1/arch/ia64/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -24,7 +24,7 @@ #include #include -extern spinlock_t timerlist_lock; +extern raw_spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); @@ -85,11 +85,11 @@ die (const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -226,7 +226,7 @@ * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes * care of clearing psr.dfh. */ -static inline void +void disabled_fph_fault (struct pt_regs *regs) { struct ia64_psr *psr = ia64_psr(regs); @@ -245,7 +245,7 @@ = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); if (ia64_is_local_fpu_owner(current)) { - preempt_enable_no_resched(); + __preempt_enable_no_resched(); return; } @@ -265,7 +265,7 @@ */ psr->mfh = 1; } - preempt_enable_no_resched(); + __preempt_enable_no_resched(); } static inline int diff -urN ./linux-2.6.18.1/arch/ia64/kernel/unwind.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/unwind.c --- ./linux-2.6.18.1/arch/ia64/kernel/unwind.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/unwind.c 2007-05-19 23:58:35.000000000 +0900 @@ -81,7 +81,7 @@ typedef unsigned char unw_hash_index_t; static struct { - spinlock_t lock; /* spinlock for unwind data */ + raw_spinlock_t lock; /* spinlock for unwind data */ /* list of unwind tables (one per load-module) */ struct unw_table *tables; @@ -145,7 +145,7 @@ # endif } unw = { .tables = &unw.kernel_table, - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .save_order = { UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR diff -urN ./linux-2.6.18.1/arch/ia64/kernel/unwind_i.h linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/unwind_i.h --- ./linux-2.6.18.1/arch/ia64/kernel/unwind_i.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/kernel/unwind_i.h 2007-05-19 23:58:35.000000000 +0900 @@ -154,7 +154,7 @@ unsigned long ip; /* ip this script is for */ unsigned long pr_mask; /* mask of predicates script depends on */ unsigned long pr_val; /* predicate values this script is for */ - rwlock_t lock; + raw_rwlock_t lock; unsigned int flags; /* see UNW_FLAG_* in unwind.h */ unsigned short lru_chain; /* used for least-recently-used chain */ unsigned short coll_chain; /* used for hash collisions */ diff -urN ./linux-2.6.18.1/arch/ia64/mm/init.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/mm/init.c --- ./linux-2.6.18.1/arch/ia64/mm/init.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/mm/init.c 2007-05-19 23:58:35.000000000 +0900 @@ -36,7 +36,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist); DEFINE_PER_CPU(long, __pgtable_quicklist_size); @@ -92,15 +92,11 @@ if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES)) return; - preempt_disable(); while (unlikely((pages_to_free = min_pages_to_free()) > 0)) { while (pages_to_free--) { free_page((unsigned long)pgtable_quicklist_alloc()); } - preempt_enable(); - preempt_disable(); } - preempt_enable(); } void diff -urN ./linux-2.6.18.1/arch/ia64/mm/tlb.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/mm/tlb.c --- ./linux-2.6.18.1/arch/ia64/mm/tlb.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/mm/tlb.c 2007-05-19 23:58:35.000000000 +0900 @@ -32,7 +32,7 @@ } purge; struct ia64_ctx ia64_ctx = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .next = 1, .max_ctx = ~0U }; diff -urN ./linux-2.6.18.1/arch/ia64/pci/pci.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/pci/pci.c --- ./linux-2.6.18.1/arch/ia64/pci/pci.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/pci/pci.c 2007-05-19 23:58:35.000000000 +0900 @@ -809,12 +809,3 @@ } return rc; } - -int pci_vector_resources(int last, int nr_released) -{ - int count = nr_released; - - count += (IA64_LAST_DEVICE_VECTOR - last); - - return count; -} diff -urN ./linux-2.6.18.1/arch/ia64/sn/kernel/sn2/timer.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/sn/kernel/sn2/timer.c --- ./linux-2.6.18.1/arch/ia64/sn/kernel/sn2/timer.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ia64/sn/kernel/sn2/timer.c 2007-05-19 23:58:35.000000000 +0900 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -22,11 +23,21 @@ extern unsigned long sn_rtc_cycles_per_second; -static struct time_interpolator sn2_interpolator = { - .drift = -1, - .shift = 10, - .mask = (1LL << 55) - 1, - .source = TIME_SOURCE_MMIO64 +static void __iomem *sn2_mc_ptr; + +static cycle_t read_sn2(void) +{ + return (cycle_t)readq(sn2_mc_ptr); +} + +static struct clocksource clocksource_sn2 = { + .name = "sn2_rtc", + .rating = 300, + .read = read_sn2, + .mask = (1LL << 55) - 1, + .mult = 0, + .shift = 10, + .is_continuous = 1, }; /* @@ -47,9 +58,10 @@ void __init sn_timer_init(void) { - sn2_interpolator.frequency = sn_rtc_cycles_per_second; - sn2_interpolator.addr = RTC_COUNTER_ADDR; - register_time_interpolator(&sn2_interpolator); + clocksource_sn2.fsys_mmio_ptr = sn2_mc_ptr = RTC_COUNTER_ADDR; + clocksource_sn2.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, + clocksource_sn2.shift); + clocksource_register(&clocksource_sn2); ia64_udelay = &ia64_sn_udelay; } diff -urN ./linux-2.6.18.1/arch/mips/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/Kconfig --- ./linux-2.6.18.1/arch/mips/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -417,6 +417,7 @@ config MOMENCO_OCELOT bool "Momentum Ocelot board" select DMA_NONCOHERENT + select NO_SPINLOCK select HW_HAS_PCI select IRQ_CPU select IRQ_CPU_RM7K @@ -837,6 +838,7 @@ endmenu + config RWSEM_GENERIC_SPINLOCK bool default y @@ -844,6 +846,10 @@ config RWSEM_XCHGADD_ALGORITHM bool +config ASM_SEMAPHORES + bool + default y + config GENERIC_FIND_NEXT_BIT bool default y @@ -889,6 +895,9 @@ config OWN_DMA bool +config NO_SPINLOCK + bool + config EARLY_PRINTK bool @@ -1843,12 +1852,17 @@ This will result in additional memory usage, so it is not recommended for normal users. -endmenu - -config RWSEM_GENERIC_SPINLOCK +config GENERIC_TIME bool default y +source "kernel/time/Kconfig" + +config CPU_SPEED + int "CPU speed used for clocksource/clockevent calculations" + default 600 +endmenu + source "init/Kconfig" menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" diff -urN ./linux-2.6.18.1/arch/mips/kernel/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/Makefile --- ./linux-2.6.18.1/arch/mips/kernel/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ - ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \ + ptrace.o reset.o setup.o signal.o syscall.o \ time.o traps.o unaligned.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ @@ -15,6 +15,8 @@ obj-$(CONFIG_APM) += apm.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o + obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o diff -urN ./linux-2.6.18.1/arch/mips/kernel/asm-offsets.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/asm-offsets.c --- ./linux-2.6.18.1/arch/mips/kernel/asm-offsets.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/asm-offsets.c 2007-05-19 23:58:35.000000000 +0900 @@ -10,9 +10,11 @@ */ #include #include +#include #include #include #include +#include #include #include diff -urN ./linux-2.6.18.1/arch/mips/kernel/entry.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/entry.S --- ./linux-2.6.18.1/arch/mips/kernel/entry.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/entry.S 2007-05-19 23:58:35.000000000 +0900 @@ -25,7 +25,7 @@ .endm #else .macro preempt_stop - local_irq_disable + raw_local_irq_disable .endm #define resume_kernel restore_all #endif @@ -40,7 +40,7 @@ beqz t0, resume_kernel resume_userspace: - local_irq_disable # make sure we dont miss an + raw_local_irq_disable # make sure we dont miss an # interrupt setting need_resched # between sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -50,7 +50,9 @@ #ifdef CONFIG_PREEMPT resume_kernel: - local_irq_disable + raw_local_irq_disable + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -60,7 +62,9 @@ LONG_L t0, PT_STATUS(sp) # Interrupts off? andi t0, 1 beqz t0, restore_all + raw_local_irq_disable jal preempt_schedule_irq + sw zero, TI_PRE_COUNT($28) b need_resched #endif @@ -68,7 +72,7 @@ jal schedule_tail # a0 = struct task_struct *prev FEXPORT(syscall_exit) - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -133,19 +137,21 @@ .set at work_pending: - andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS + # a2 is preloaded with TI_FLAGS + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beqz t0, work_notifysig work_resched: + raw_local_irq_enable t0 jal schedule - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) andi t0, a2, _TIF_WORK_MASK # is there any work to be done # other than syscall tracing? beqz t0, restore_all - andi t0, a2, _TIF_NEED_RESCHED + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bnez t0, work_resched work_notifysig: # deal with pending signals and @@ -161,7 +167,7 @@ li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT and t0, a2 # a2 is preloaded with TI_FLAGS beqz t0, work_pending # trace bit set? - local_irq_enable # could let do_syscall_trace() + raw_local_irq_enable # could let do_syscall_trace() # call schedule() instead move a0, sp li a1, 1 diff -urN ./linux-2.6.18.1/arch/mips/kernel/i8259.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/i8259.c --- ./linux-2.6.18.1/arch/mips/kernel/i8259.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/i8259.c 2007-05-19 23:58:35.000000000 +0900 @@ -31,7 +31,7 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { diff -urN ./linux-2.6.18.1/arch/mips/kernel/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/irq.c --- ./linux-2.6.18.1/arch/mips/kernel/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -137,7 +137,10 @@ irq_desc[i].action = NULL; irq_desc[i].depth = 1; irq_desc[i].chip = &no_irq_chip; - spin_lock_init(&irq_desc[i].lock); + _raw_spin_lock_init(&irq_desc[i].lock); +#ifdef CONFIG_PREEMPT_HARDIRQS + irq_desc[i].thread = NULL; +#endif #ifdef CONFIG_MIPS_MT_SMTC irq_hwmask[i] = 0; #endif /* CONFIG_MIPS_MT_SMTC */ diff -urN ./linux-2.6.18.1/arch/mips/kernel/module.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/module.c --- ./linux-2.6.18.1/arch/mips/kernel/module.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/module.c 2007-05-19 23:58:35.000000000 +0900 @@ -39,7 +39,7 @@ static struct mips_hi16 *mips_hi16_list; static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { diff -urN ./linux-2.6.18.1/arch/mips/kernel/process.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/process.c --- ./linux-2.6.18.1/arch/mips/kernel/process.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/process.c 2007-05-19 23:58:35.000000000 +0900 @@ -54,16 +54,18 @@ { /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { #ifdef CONFIG_MIPS_MT_SMTC smtc_idle_loop_hook(); #endif /* CONFIG_MIPS_MT_SMTC */ if (cpu_wait) (*cpu_wait)(); } - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff -urN ./linux-2.6.18.1/arch/mips/kernel/scall32-o32.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall32-o32.S --- ./linux-2.6.18.1/arch/mips/kernel/scall32-o32.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall32-o32.S 2007-05-19 23:58:35.000000000 +0900 @@ -84,7 +84,7 @@ 1: sw v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return lw a2, TI_FLAGS($28) # current->work diff -urN ./linux-2.6.18.1/arch/mips/kernel/scall64-64.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall64-64.S --- ./linux-2.6.18.1/arch/mips/kernel/scall64-64.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall64-64.S 2007-05-19 23:58:35.000000000 +0900 @@ -72,7 +72,7 @@ 1: sd v0, PT_R2(sp) # result n64_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work diff -urN ./linux-2.6.18.1/arch/mips/kernel/scall64-n32.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall64-n32.S --- ./linux-2.6.18.1/arch/mips/kernel/scall64-n32.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall64-n32.S 2007-05-19 23:58:35.000000000 +0900 @@ -69,7 +69,7 @@ sd v0, PT_R0(sp) # set flag for syscall restarting 1: sd v0, PT_R2(sp) # result - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work diff -urN ./linux-2.6.18.1/arch/mips/kernel/scall64-o32.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall64-o32.S --- ./linux-2.6.18.1/arch/mips/kernel/scall64-o32.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/scall64-o32.S 2007-05-19 23:58:35.000000000 +0900 @@ -98,7 +98,7 @@ 1: sd v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make need_resched and + raw_local_irq_disable # make need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) diff -urN ./linux-2.6.18.1/arch/mips/kernel/semaphore.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/semaphore.c --- ./linux-2.6.18.1/arch/mips/kernel/semaphore.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/semaphore.c 2007-05-19 23:58:35.000000000 +0900 @@ -36,7 +36,7 @@ * sem->count and sem->waking atomic. Scalability isn't an issue because * this lock is used on UP only so it's just an empty variable. */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -67,7 +67,7 @@ : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); @@ -80,7 +80,7 @@ return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -94,7 +94,7 @@ wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -104,7 +104,7 @@ * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -133,9 +133,9 @@ wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -165,4 +165,10 @@ return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); + +int fastcall compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} +EXPORT_SYMBOL(compat_sem_is_locked); diff -urN ./linux-2.6.18.1/arch/mips/kernel/signal.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/signal.c --- ./linux-2.6.18.1/arch/mips/kernel/signal.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/signal.c 2007-05-19 23:58:35.000000000 +0900 @@ -416,6 +416,10 @@ siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything diff -urN ./linux-2.6.18.1/arch/mips/kernel/signal32.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/signal32.c --- ./linux-2.6.18.1/arch/mips/kernel/signal32.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/signal32.c 2007-05-19 23:58:35.000000000 +0900 @@ -807,6 +807,10 @@ siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything diff -urN ./linux-2.6.18.1/arch/mips/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/smp.c --- ./linux-2.6.18.1/arch/mips/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -115,7 +115,22 @@ cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -303,6 +318,8 @@ return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -360,6 +377,7 @@ void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_on_other_tlbs(flush_tlb_mm_ipi, (void *)mm); @@ -369,6 +387,7 @@ if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -392,6 +411,8 @@ struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -405,6 +426,7 @@ if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -435,6 +457,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -447,6 +471,7 @@ if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } diff -urN ./linux-2.6.18.1/arch/mips/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/time.c --- ./linux-2.6.18.1/arch/mips/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -10,7 +10,13 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * + * This implementation of High Res Timers uses two timers. One is the system + * timer. The second is used for the high res timers. The high res timers + * require the CPU to have count/compare registers. The mips_set_next_event() + * function schedules the next high res timer interrupt. */ +#include #include #include #include @@ -23,6 +29,7 @@ #include #include #include +#include #include #include @@ -49,7 +56,27 @@ */ extern volatile unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +/* any missed timer interrupts */ +int missed_timer_count; + +DEFINE_RAW_SPINLOCK(rtc_lock); + +#ifdef CONFIG_HIGH_RES_TIMERS +static void mips_set_next_event(unsigned long evt); +static void mips_set_mode(int mode, void *priv); + +static struct clock_event lapic_clockevent = { + .name = "mips clockevent interface", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_HAS_IRQHANDLER +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_next_event = mips_set_next_event, +}; +#endif /* * By default we provide the null RTC ops @@ -68,25 +95,36 @@ int (*rtc_mips_set_time)(unsigned long) = null_rtc_set_time; int (*rtc_mips_set_mmss)(unsigned long); - /* usecs per counter cycle, shifted to left by 32 bits */ static unsigned int sll32_usecs_per_cycle; /* how many counter cycles in a jiffy */ static unsigned long cycles_per_jiffy __read_mostly; +static unsigned long hrt_cycles_per_jiffy __read_mostly; + + /* Cycle counter value at the previous timer interrupt.. */ static unsigned int timerhi, timerlo; /* expirelo is the count value for next CPU timer interrupt */ static unsigned int expirelo; - /* * Null timer ack for systems not needing one (e.g. i8254). */ static void null_timer_ack(void) { /* nothing */ } +#ifdef CONFIG_HIGH_RES_TIMERS +/* + * Set the next event + */ +static void mips_set_next_event(unsigned long evt) +{ + write_c0_compare(read_c0_count() + evt); +} +#endif + /* * Null high precision timer functions for systems lacking one. */ @@ -100,7 +138,6 @@ /* nothing */ } - /* * Timer ack for an R4k-compatible timer of a known frequency. */ @@ -110,14 +147,15 @@ #ifndef CONFIG_SOC_PNX8550 /* pnx8550 resets to zero */ /* Ack this timer interrupt and set the next one. */ - expirelo += cycles_per_jiffy; + expirelo += hrt_cycles_per_jiffy; #endif write_c0_compare(expirelo); /* Check to see if we have missed any timer interrupts. */ - while (((count = read_c0_count()) - expirelo) < 0x7fffffff) { - /* missed_timer_count++; */ - expirelo = count + cycles_per_jiffy; + count = read_c0_count(); + if ((count - expirelo) < 0x7fffffff) { + /* missed_timer_count++; */ + expirelo = count + hrt_cycles_per_jiffy; write_c0_compare(expirelo); } } @@ -146,92 +184,39 @@ write_c0_count(count); } -int (*mips_timer_state)(void); -void (*mips_timer_ack)(void); -unsigned int (*mips_hpt_read)(void); -void (*mips_hpt_init)(unsigned int); - - -/* - * This version of gettimeofday has microsecond resolution and better than - * microsecond precision on fast machines with cycle counter. - */ -void do_gettimeofday(struct timeval *tv) +static cycle_t read_mips_hpt(void) { - unsigned long seq; - unsigned long lost; - unsigned long usec, sec; - unsigned long max_ntp_tick; - - do { - seq = read_seqbegin(&xtime_lock); - - usec = do_gettimeoffset(); - - lost = jiffies - wall_jiffies; - - /* - * If time_adjust is negative then NTP is slowing the clock - * so make sure not to go into next possible interval. - * Better to lose some accuracy than have time go backwards.. - */ - if (unlikely(time_adjust < 0)) { - max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; - usec = min(usec, max_ntp_tick); - - if (lost) - usec += lost * max_ntp_tick; - } else if (unlikely(lost)) - usec += lost * (USEC_PER_SEC / HZ); - - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / 1000); - - } while (read_seqretry(&xtime_lock, seq)); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; + cycle_t ret; + ret = (cycle_t)mips_hpt_read(); + return ret; } -EXPORT_SYMBOL(do_gettimeofday); +static struct clocksource clocksource_tsc = { + .name = "MIPS", + .rating = 250, + .read = read_mips_hpt, + .mask = 0xffffffff, + .mult = 0, + .shift = 24, + .is_continuous = 1, +}; -int do_settimeofday(struct timespec *tv) +static int __init init_tsc_clocksource(void) { - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); + u64 temp; - /* - * This is revolting. We need to set "xtime" correctly. However, - * the value in this location is the value at the most recent update - * of wall time. Discover what correction gettimeofday() would have - * made, and then undo it! - */ - nsec -= do_gettimeoffset() * NSEC_PER_USEC; - nsec -= (jiffies - wall_jiffies) * tick_nsec; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); + temp = (u64) 1000000000 << clocksource_tsc.shift; + do_div(temp, mips_hpt_frequency); + clocksource_tsc.mult = (unsigned)temp; - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; + return clocksource_register(&clocksource_tsc); } +module_init(init_tsc_clocksource); -EXPORT_SYMBOL(do_settimeofday); +int (*mips_timer_state)(void); +void (*mips_timer_ack)(void); +unsigned int (*mips_hpt_read)(void); +void (*mips_hpt_init)(unsigned int); /* * Gettimeoffset routines. These routines returns the time duration @@ -250,11 +235,9 @@ return 0; } - /* The function pointer to one of the gettimeoffset funcs. */ unsigned long (*do_gettimeoffset)(void) = null_gettimeoffset; - static unsigned long fixed_rate_gettimeoffset(void) { u32 count; @@ -396,6 +379,29 @@ /* last time when xtime and rtc are sync'ed up */ static long last_rtc_update; +unsigned long read_persistent_clock(void) +{ + unsigned long sec; + sec = rtc_mips_get_time(); + return sec; +} + +void sync_persistent_clock(struct timespec ts) +{ + if (ntp_synced() && + xtime.tv_sec > last_rtc_update + 660 && + (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && + (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { + if (rtc_mips_set_mmss(xtime.tv_sec) == 0) { + last_rtc_update = xtime.tv_sec; + } + else { + /* do it again in 60 s */ + last_rtc_update = xtime.tv_sec - 600; + } + } +} + /* * local_timer_interrupt() does profiling and process accounting * on a per-CPU basis. @@ -410,6 +416,7 @@ { if (current->pid) profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(regs)); } @@ -438,7 +445,7 @@ /* * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. rtc_mips_set_time() has to be + * CMOS clock accordingly every ~11 minutes. rtc_set_time() has to be * called as close as possible to 500 ms before the new second starts. */ if (ntp_synced() && @@ -518,6 +525,15 @@ EXPORT_SYMBOL(null_perf_irq); EXPORT_SYMBOL(perf_irq); +#ifdef CONFIG_HIGH_RES_TIMERS +void event_timer_handler(struct pt_regs *regs) +{ + c0_timer_ack(); + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs,NULL); +} +#endif + asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs) { int r2 = cpu_has_mips_r2; @@ -531,6 +547,15 @@ * performance counter interrupt was pending, so we have to run the * performance counter interrupt handler anyway. */ +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * Run the event handler + */ + if (!r2 || (read_c0_cause() & (1 << 26))) + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs,NULL); +#endif + if (!r2 || (read_c0_cause() & (1 << 26))) if (perf_irq(regs)) goto out; @@ -563,7 +588,7 @@ * b) (optional) calibrate and set the mips_hpt_frequency * (only needed if you intended to use fixed_rate_gettimeoffset * or use cpu counter as timer interrupt source) - * 2) setup xtime based on rtc_mips_get_time(). + * 2) setup xtime based on rtc_get_time(). * 3) choose a appropriate gettimeoffset routine. * 4) calculate a couple of cached variables for later usage * 5) plat_timer_setup() - @@ -578,7 +603,7 @@ static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, + .flags = IRQF_NODELAY | IRQF_DISABLED, .name = "timer", }; @@ -627,6 +652,9 @@ void __init time_init(void) { +#ifdef CONFIG_HIGH_RES_TIMERS + u64 temp; +#endif if (board_time_init) board_time_init(); @@ -688,6 +716,12 @@ /* Calculate cache parameters. */ cycles_per_jiffy = (mips_hpt_frequency + HZ / 2) / HZ; +#ifdef CONFIG_HIGH_RES_TIMERS + hrt_cycles_per_jiffy = ( (CONFIG_CPU_SPEED * 1000000) + HZ / 2) / HZ; +#else + hrt_cycles_per_jiffy = cycles_per_jiffy; +#endif + /* sll32_usecs_per_cycle = 10^6 * 2^32 / mips_counter_freq */ do_div64_32(sll32_usecs_per_cycle, 1000000, mips_hpt_frequency / 2, @@ -776,3 +810,128 @@ { return (unsigned long long)jiffies*(1000000000/HZ); } + + +#ifdef CONFIG_SMP +/* + * We have to synchronize the master CPU with all the slave CPUs + */ +static atomic_t cpus_started; +static atomic_t cpus_ready; +static atomic_t cpus_count; +/* + * Master processor inits + */ +static void sync_cpus_init(int v) +{ + atomic_set(&cpus_count, 0); + mb(); + atomic_set(&cpus_started, v); + mb(); + atomic_set(&cpus_ready, v); + mb(); +} + +/* + * Called by the master processor + */ +static void sync_cpus_master(int v) +{ + atomic_set(&cpus_count, 0); + mb(); + atomic_set(&cpus_started, v); + mb(); + /* Wait here till all other CPUs are now ready */ + while (atomic_read(&cpus_count) != (num_online_cpus() -1) ) + mb(); + atomic_set(&cpus_ready, v); + mb(); +} +/* + * Called by the slave processors + */ +static void sync_cpus_slave(int v) +{ + /* Check if the master has been through this */ + while (atomic_read(&cpus_started) != v) + mb(); + atomic_inc(&cpus_count); + mb(); + while (atomic_read(&cpus_ready) != v) + mb(); +} +/* + * Called by the slave CPUs when done syncing the count register + * with the master processor + */ +static void sync_cpus_slave_exit(int v) +{ + while (atomic_read(&cpus_started) != v) + mb(); + atomic_inc(&cpus_count); + mb(); +} + +#define LOOPS 100 +static u32 c0_count[NR_CPUS]; /* Count register per CPU */ +static u32 c[NR_CPUS][LOOPS + 1]; /* Count register per CPU per loop for syncing */ + +/* + * Slave processors execute this via IPI + */ +static void sync_c0_count_slave(void *info) +{ + int cpus = 1, loop, prev_count = 0, cpu = smp_processor_id(); + unsigned long flags; + u32 diff_count; /* CPU count registers are 32-bit */ + local_irq_save(flags); + + for(loop = 0; loop <= LOOPS; loop++) { + /* Sync with the Master processor */ + sync_cpus_slave(cpus++); + c[cpu][loop] = c0_count[cpu] = read_c0_count(); + mb(); + sync_cpus_slave(cpus++); + diff_count = c0_count[0] - c0_count[cpu]; + diff_count += prev_count; + diff_count += read_c0_count(); + write_c0_count(diff_count); + prev_count = (prev_count >> 1) + + ((int)(c0_count[0] - c0_count[cpu]) >> 1); + } + + /* Slave processor is done syncing count register with Master */ + sync_cpus_slave_exit(cpus++); + printk("SMP: Slave processor %d done syncing count \n", cpu); + local_irq_restore(flags); +} + +/* + * Master kicks off the syncing process + */ +void sync_c0_count_master(void) +{ + int cpus = 0, loop, cpu = smp_processor_id(); + unsigned long flags; + + printk("SMP: Starting to sync the c0 count register ... \n"); + sync_cpus_init(cpus++); + + /* Kick off the slave processors to also start the syncing process */ + smp_call_function(sync_c0_count_slave, NULL, 0, 0); + local_irq_save(flags); + + for (loop = 0; loop <= LOOPS; loop++) { + /* Wait for all the CPUs here */ + sync_cpus_master(cpus++); + c[cpu][loop] = c0_count[cpu] = read_c0_count(); + mb(); + /* Do syncing once more */ + sync_cpus_master(cpus++); + } + sync_cpus_master(cpus++); + local_irq_restore(flags); + + printk("SMP: Syncing process completed accross CPUs ... \n"); +} +#endif /* CONFIG_SMP */ diff -urN ./linux-2.6.18.1/arch/mips/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/traps.c --- ./linux-2.6.18.1/arch/mips/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -274,7 +274,7 @@ printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); NORET_TYPE void ATTRIB_NORET die(const char * str, struct pt_regs * regs) { diff -urN ./linux-2.6.18.1/arch/mips/mm/init.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/mm/init.c --- ./linux-2.6.18.1/arch/mips/mm/init.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/mm/init.c 2007-05-19 23:58:35.000000000 +0900 @@ -36,7 +36,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; diff -urN ./linux-2.6.18.1/arch/mips/sibyte/cfe/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/cfe/smp.c --- ./linux-2.6.18.1/arch/mips/sibyte/cfe/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/cfe/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -107,4 +107,8 @@ */ void prom_cpus_done(void) { +#ifdef CONFIG_HIGH_RES_TIMERS + extern void sync_c0_count_master(void); + sync_c0_count_master(); +#endif } diff -urN ./linux-2.6.18.1/arch/mips/sibyte/sb1250/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/sb1250/irq.c --- ./linux-2.6.18.1/arch/mips/sibyte/sb1250/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/sb1250/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -85,7 +85,7 @@ /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -262,7 +262,7 @@ static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = IRQF_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, @@ -372,6 +372,10 @@ #ifdef CONFIG_KGDB imask |= STATUSF_IP6; #endif + +#ifdef CONFIG_HIGH_RES_TIMERS + imask |= STATUSF_IP7; +#endif /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); @@ -465,6 +469,10 @@ else #endif +#ifdef CONFIG_HIGH_RES_TIMERS + if (pending & CAUSEF_IP7) + event_timer_handler(regs); +#endif if (pending & CAUSEF_IP4) sb1250_timer_interrupt(regs); diff -urN ./linux-2.6.18.1/arch/mips/sibyte/sb1250/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/sb1250/smp.c --- ./linux-2.6.18.1/arch/mips/sibyte/sb1250/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/sb1250/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -59,7 +59,7 @@ { extern void sb1250_time_init(void); sb1250_time_init(); - local_irq_enable(); + raw_local_irq_enable(); } /* diff -urN ./linux-2.6.18.1/arch/mips/sibyte/swarm/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/swarm/setup.c --- ./linux-2.6.18.1/arch/mips/sibyte/swarm/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/mips/sibyte/swarm/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -131,6 +131,12 @@ rtc_mips_set_time = m41t81_set_time; } +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * set the mips_hpt_frequency here + */ + mips_hpt_frequency = CONFIG_CPU_SPEED * 1000000; +#endif printk("This kernel optimized for " #ifdef CONFIG_SIMULATION "simulation" diff -urN ./linux-2.6.18.1/arch/powerpc/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/Kconfig --- ./linux-2.6.18.1/arch/powerpc/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -26,18 +26,15 @@ bool default y -config GENERIC_HARDIRQS +config GENERIC_TIME bool default y -config IRQ_PER_CPU +config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM +config IRQ_PER_CPU bool default y @@ -596,6 +593,18 @@ source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "fs/Kconfig.binfmt" # We optimistically allocate largepages from the VM, so make the limit diff -urN ./linux-2.6.18.1/arch/powerpc/boot/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/boot/Makefile --- ./linux-2.6.18.1/arch/powerpc/boot/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/boot/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -29,6 +29,14 @@ OBJCOPY_COFF_ARGS := -O aixcoff-rs6000 --set-start 0x500000 OBJCOPY_MIB_ARGS := -O aixcoff-rs6000 -R .stab -R .stabstr -R .comment +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + zlib := inffast.c inflate.c inftrees.c zlibheader := inffast.h inffixed.h inflate.h inftrees.h infutil.h zliblinuxheader := zlib.h zconf.h zutil.h @@ -44,7 +52,7 @@ BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = sed "s@__attribute_used__@@;s@]\+\).*@\"\1\"@" $< > $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@ quiet_cmd_copy_zlibheader = COPY $@ cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/Makefile --- ./linux-2.6.18.1/arch/powerpc/kernel/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -10,10 +10,11 @@ CFLAGS_btext.o += -fPIC endif -obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ +obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ init_task.o process.o systbl.o idle.o obj-y += vdso32/ +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o cpu_setup_power4.o \ diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/entry_32.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/entry_32.S --- ./linux-2.6.18.1/arch/powerpc/kernel/entry_32.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/entry_32.S 2007-05-19 23:58:35.000000000 +0900 @@ -638,7 +638,7 @@ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -856,7 +856,7 @@ #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -870,7 +870,7 @@ MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK beq restore_user @@ -978,3 +978,85 @@ /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_MCOUNT +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function + * preamble, before the stack frame is created. An example of this preamble + * code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/irq.c --- ./linux-2.6.18.1/arch/powerpc/kernel/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -91,8 +91,6 @@ #endif #ifdef CONFIG_PPC64 -EXPORT_SYMBOL(irq_desc); - int distribute_irqs = 1; #endif /* CONFIG_PPC64 */ diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/ppc_ksyms.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/ppc_ksyms.c --- ./linux-2.6.18.1/arch/powerpc/kernel/ppc_ksyms.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/ppc_ksyms.c 2007-05-19 23:58:35.000000000 +0900 @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -189,7 +188,6 @@ #ifdef CONFIG_PPC32 EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(cacheable_memcpy); diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/semaphore.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/semaphore.c --- ./linux-2.6.18.1/arch/powerpc/kernel/semaphore.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/semaphore.c 2007-05-19 23:58:35.000000000 +0900 @@ -31,7 +31,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -50,7 +50,7 @@ return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -63,7 +63,7 @@ __sem_update_count(sem, 1); wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -73,7 +73,7 @@ * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -101,9 +101,9 @@ */ wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore *sem) { int retval = 0; struct task_struct *tsk = current; @@ -132,4 +132,10 @@ wake_up(&sem->wait); return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} +EXPORT_SYMBOL(compat_sem_is_locked); diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/smp.c --- ./linux-2.6.18.1/arch/powerpc/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -148,6 +148,16 @@ smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE); +} + #ifdef CONFIG_DEBUGGER void smp_send_debugger_break(int cpu) { @@ -184,7 +194,7 @@ * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/time.c --- ./linux-2.6.18.1/arch/powerpc/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -73,6 +73,9 @@ #endif #include +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); + /* keep track of when we need to update the rtc */ time_t last_rtc_update; #ifdef CONFIG_PPC_ISERIES @@ -115,8 +118,6 @@ u64 tb_to_ns_scale; unsigned tb_to_ns_shift; -struct gettimeofday_struct do_gtod; - extern unsigned long wall_jiffies; extern struct timezone sys_tz; @@ -407,162 +408,8 @@ } } -/* - * This version of gettimeofday has microsecond resolution. - */ -static inline void __do_gettimeofday(struct timeval *tv) -{ - unsigned long sec, usec; - u64 tb_ticks, xsec; - struct gettimeofday_vars *temp_varp; - u64 temp_tb_to_xs, temp_stamp_xsec; - - /* - * These calculations are faster (gets rid of divides) - * if done in units of 1/2^20 rather than microseconds. - * The conversion to microseconds at the end is done - * without a divide (and in fact, without a multiply) - */ - temp_varp = do_gtod.varp; - - /* Sampling the time base must be done after loading - * do_gtod.varp in order to avoid racing with update_gtod. - */ - data_barrier(temp_varp); - tb_ticks = get_tb() - temp_varp->tb_orig_stamp; - temp_tb_to_xs = temp_varp->tb_to_xs; - temp_stamp_xsec = temp_varp->stamp_xsec; - xsec = temp_stamp_xsec + mulhdu(tb_ticks, temp_tb_to_xs); - sec = xsec / XSEC_PER_SEC; - usec = (unsigned long)xsec & (XSEC_PER_SEC - 1); - usec = SCALE_XSEC(usec, 1000000); - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -void do_gettimeofday(struct timeval *tv) -{ - if (__USE_RTC()) { - /* do this the old way */ - unsigned long flags, seq; - unsigned int sec, nsec, usec; - - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - sec = xtime.tv_sec; - nsec = xtime.tv_nsec + tb_ticks_since(tb_last_jiffy); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - usec = nsec / 1000; - while (usec >= 1000000) { - usec -= 1000000; - ++sec; - } - tv->tv_sec = sec; - tv->tv_usec = usec; - return; - } - __do_gettimeofday(tv); -} - -EXPORT_SYMBOL(do_gettimeofday); - -/* - * There are two copies of tb_to_xs and stamp_xsec so that no - * lock is needed to access and use these values in - * do_gettimeofday. We alternate the copies and as long as a - * reasonable time elapses between changes, there will never - * be inconsistent values. ntpd has a minimum of one minute - * between updates. - */ -static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, - u64 new_tb_to_xs) -{ - unsigned temp_idx; - struct gettimeofday_vars *temp_varp; - - temp_idx = (do_gtod.var_idx == 0); - temp_varp = &do_gtod.vars[temp_idx]; - - temp_varp->tb_to_xs = new_tb_to_xs; - temp_varp->tb_orig_stamp = new_tb_stamp; - temp_varp->stamp_xsec = new_stamp_xsec; - smp_mb(); - do_gtod.varp = temp_varp; - do_gtod.var_idx = temp_idx; - - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - * We expect the caller to have done the first increment of - * vdso_data->tb_update_count already. - */ - vdso_data->tb_orig_stamp = new_tb_stamp; - vdso_data->stamp_xsec = new_stamp_xsec; - vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; - smp_wmb(); - ++(vdso_data->tb_update_count); -} - -/* - * When the timebase - tb_orig_stamp gets too big, we do a manipulation - * between tb_orig_stamp and stamp_xsec. The goal here is to keep the - * difference tb - tb_orig_stamp small enough to always fit inside a - * 32 bits number. This is a requirement of our fast 32 bits userland - * implementation in the vdso. If we "miss" a call to this function - * (interrupt latency, CPU locked in a spinlock, ...) and we end up - * with a too big difference, then the vdso will fallback to calling - * the syscall - */ -static __inline__ void timer_recalc_offset(u64 cur_tb) -{ - unsigned long offset; - u64 new_stamp_xsec; - u64 tlen, t2x; - u64 tb, xsec_old, xsec_new; - struct gettimeofday_vars *varp; - - if (__USE_RTC()) - return; - tlen = current_tick_length(); - offset = cur_tb - do_gtod.varp->tb_orig_stamp; - if (tlen == last_tick_len && offset < 0x80000000u) - return; - if (tlen != last_tick_len) { - t2x = mulhdu(tlen << TICKLEN_SHIFT, ticklen_to_xs); - last_tick_len = tlen; - } else - t2x = do_gtod.varp->tb_to_xs; - new_stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; - do_div(new_stamp_xsec, 1000000000); - new_stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; - - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * Make sure time doesn't go backwards for userspace gettimeofday. - */ - tb = get_tb(); - varp = do_gtod.varp; - xsec_old = mulhdu(tb - varp->tb_orig_stamp, varp->tb_to_xs) - + varp->stamp_xsec; - xsec_new = mulhdu(tb - cur_tb, t2x) + new_stamp_xsec; - if (xsec_new < xsec_old) - new_stamp_xsec += xsec_old - xsec_new; - - update_gtod(cur_tb, new_stamp_xsec, t2x); -} - #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -610,11 +457,7 @@ tb_ticks_per_sec = new_tb_ticks_per_sec; calc_cputime_factors(); div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); - do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; tb_to_xs = divres.result_low; - do_gtod.varp->tb_to_xs = tb_to_xs; - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; - vdso_data->tb_to_xs = tb_to_xs; } else { printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" @@ -781,81 +624,6 @@ return mulhdu(get_tb(), tb_to_ns_scale) << tb_to_ns_shift; } -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, new_sec = tv->tv_sec; - long wtm_nsec, new_nsec = tv->tv_nsec; - unsigned long flags; - u64 new_xsec; - unsigned long tb_delta; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irqsave(&xtime_lock, flags); - - /* - * Updating the RTC is not the job of this code. If the time is - * stepped under NTP, the RTC will be updated after STA_UNSYNC - * is cleared. Tools like clock/hwclock either copy the RTC - * to the system time, in which case there is no point in writing - * to the RTC again, or write to the RTC but then they don't call - * settimeofday to perform this operation. - */ -#ifdef CONFIG_PPC_ISERIES - if (first_settimeofday) { - iSeries_tb_recal(); - first_settimeofday = 0; - } -#endif - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * Subtract off the number of nanoseconds since the - * beginning of the last tick. - * Note that since we don't increment jiffies_64 anywhere other - * than in do_timer (since we don't have a lost tick problem), - * wall_jiffies will always be the same as jiffies, - * and therefore the (jiffies - wall_jiffies) computation - * has been removed. - */ - tb_delta = tb_ticks_since(tb_last_jiffy); - tb_delta = mulhdu(tb_delta, do_gtod.varp->tb_to_xs); /* in xsec */ - new_nsec -= SCALE_XSEC(tb_delta, 1000000000); - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec); - - set_normalized_timespec(&xtime, new_sec, new_nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - /* In case of a large backwards jump in time with NTP, we want the - * clock to be updated as soon as the PLL is again in lock. - */ - last_rtc_update = new_sec - 658; - - ntp_clear(); - - new_xsec = xtime.tv_nsec; - if (new_xsec != 0) { - new_xsec *= XSEC_PER_SEC; - do_div(new_xsec, NSEC_PER_SEC); - } - new_xsec += (u64)xtime.tv_sec * XSEC_PER_SEC; - update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs); - - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; - - write_sequnlock_irqrestore(&xtime_lock, flags); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); static int __init get_freq(char *name, int cells, unsigned long *val) { @@ -1024,20 +792,6 @@ xtime.tv_sec = tm; xtime.tv_nsec = 0; - do_gtod.varp = &do_gtod.vars[0]; - do_gtod.var_idx = 0; - do_gtod.varp->tb_orig_stamp = tb_last_jiffy; - __get_cpu_var(last_jiffy) = tb_last_jiffy; - do_gtod.varp->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; - do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; - do_gtod.varp->tb_to_xs = tb_to_xs; - do_gtod.tb_to_us = tb_to_us; - - vdso_data->tb_orig_stamp = tb_last_jiffy; - vdso_data->tb_update_count = 0; - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; - vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; - vdso_data->tb_to_xs = tb_to_xs; time_freq = 0; @@ -1050,7 +804,6 @@ set_dec(tb_ticks_per_jiffy); } - #define FEBRUARY 2 #define STARTOFTIME 1970 #define SECDAY 86400L @@ -1195,3 +948,36 @@ dr->result_low = ((u64)y << 32) + z; } + + +/* powerpc clocksource code */ + +#include +static cycle_t timebase_read(void) +{ + return (cycle_t)get_tb(); +} + +struct clocksource clocksource_timebase = { + .name = "timebase", + .rating = 200, + .read = timebase_read, + .mask = (cycle_t)-1, + .mult = 0, + .shift = 22, +}; + + +/* XXX - this should be calculated or properly externed! */ +static int __init init_timebase_clocksource(void) +{ + if (__USE_RTC()) + return -ENODEV; + + clocksource_timebase.mult = clocksource_hz2mult(tb_ticks_per_sec, + clocksource_timebase.shift); + return clocksource_register(&clocksource_timebase); +} + +module_init(init_timebase_clocksource); + diff -urN ./linux-2.6.18.1/arch/powerpc/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/traps.c --- ./linux-2.6.18.1/arch/powerpc/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -93,7 +93,7 @@ * Trap & Exception support */ -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { @@ -164,6 +164,11 @@ return; } +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff -urN ./linux-2.6.18.1/arch/powerpc/lib/locks.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/lib/locks.c --- ./linux-2.6.18.1/arch/powerpc/lib/locks.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/lib/locks.c 2007-05-19 23:58:35.000000000 +0900 @@ -24,7 +24,7 @@ #include #include -void __spin_yield(raw_spinlock_t *lock) +void __spin_yield(__raw_spinlock_t *lock) { unsigned int lock_value, holder_cpu, yield_count; @@ -79,7 +79,7 @@ } #endif -void __raw_spin_unlock_wait(raw_spinlock_t *lock) +void __raw_spin_unlock_wait(__raw_spinlock_t *lock) { while (lock->slock) { HMT_low(); diff -urN ./linux-2.6.18.1/arch/powerpc/mm/fault.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/mm/fault.c --- ./linux-2.6.18.1/arch/powerpc/mm/fault.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/mm/fault.c 2007-05-19 23:58:35.000000000 +0900 @@ -149,8 +149,8 @@ * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +int __kprobes notrace do_page_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; diff -urN ./linux-2.6.18.1/arch/powerpc/mm/init_32.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/mm/init_32.c --- ./linux-2.6.18.1/arch/powerpc/mm/init_32.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/mm/init_32.c 2007-05-19 23:58:35.000000000 +0900 @@ -56,7 +56,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; diff -urN ./linux-2.6.18.1/arch/powerpc/mm/tlb_64.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/mm/tlb_64.c --- ./linux-2.6.18.1/arch/powerpc/mm/tlb_64.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/mm/tlb_64.c 2007-05-19 23:58:35.000000000 +0900 @@ -37,7 +37,7 @@ /* This is declared as we are using the more or less generic * include/asm-powerpc/tlb.h file -- tgall */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/cell/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/cell/smp.c --- ./linux-2.6.18.1/arch/powerpc/platforms/cell/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/cell/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -133,7 +133,7 @@ iic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit cell_give_timebase(void) diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/chrp/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/chrp/smp.c --- ./linux-2.6.18.1/arch/powerpc/platforms/chrp/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/chrp/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -45,7 +45,7 @@ mpic_setup_this_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit smp_chrp_give_timebase(void) diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/chrp/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/chrp/time.c --- ./linux-2.6.18.1/arch/powerpc/platforms/chrp/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/chrp/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -27,7 +27,7 @@ #include #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; static int nvram_as1 = NVRAM_AS1; static int nvram_as0 = NVRAM_AS0; diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/iseries/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/iseries/setup.c --- ./linux-2.6.18.1/arch/powerpc/platforms/iseries/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/iseries/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -594,12 +594,14 @@ static void iseries_shared_idle(void) { while (1) { - while (!need_resched() && !hvlpevent_is_pending()) { + while (!need_resched() && !need_resched_delayed() + && !hvlpevent_is_pending()) { local_irq_disable(); ppc64_runlatch_off(); /* Recheck with irqs off */ - if (!need_resched() && !hvlpevent_is_pending()) + if (!need_resched() && !need_resched_delayed() + && !hvlpevent_is_pending()) yield_shared_processor(); HMT_medium(); diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/powermac/feature.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/powermac/feature.c --- ./linux-2.6.18.1/arch/powerpc/platforms/powermac/feature.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/powermac/feature.c 2007-05-19 23:58:35.000000000 +0900 @@ -59,7 +59,7 @@ * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -DEFINE_SPINLOCK(feature_lock); +DEFINE_RAW_SPINLOCK(feature_lock); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/powermac/nvram.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/powermac/nvram.c --- ./linux-2.6.18.1/arch/powerpc/platforms/powermac/nvram.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/powermac/nvram.c 2007-05-19 23:58:35.000000000 +0900 @@ -80,7 +80,7 @@ static int core99_bank = 0; static int nvram_partitions[3]; // XXX Turn that into a sem -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); static int (*core99_write_bank)(int bank, u8* datas); static int (*core99_erase_bank)(int bank); diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/powermac/pic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/powermac/pic.c --- ./linux-2.6.18.1/arch/powerpc/platforms/powermac/pic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/powermac/pic.c 2007-05-19 23:58:35.000000000 +0900 @@ -63,7 +63,7 @@ static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/pseries/setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/pseries/setup.c --- ./linux-2.6.18.1/arch/powerpc/platforms/pseries/setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/pseries/setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -483,7 +483,8 @@ set_thread_flag(TIF_POLLING_NRFLAG); while (get_tb() < start_snooze) { - if (need_resched() || cpu_is_offline(cpu)) + if (need_resched() || need_resched_delayed() || + cpu_is_offline(cpu)) goto out; ppc64_runlatch_off(); HMT_low(); @@ -494,7 +495,8 @@ clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); local_irq_disable(); - if (need_resched() || cpu_is_offline(cpu)) + if (need_resched() || need_resched_delayed() || + cpu_is_offline(cpu)) goto out; } diff -urN ./linux-2.6.18.1/arch/powerpc/platforms/pseries/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/pseries/smp.c --- ./linux-2.6.18.1/arch/powerpc/platforms/pseries/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/powerpc/platforms/pseries/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -344,7 +344,7 @@ } #endif /* CONFIG_XICS */ -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit pSeries_give_timebase(void) diff -urN ./linux-2.6.18.1/arch/ppc/8260_io/enet.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8260_io/enet.c --- ./linux-2.6.18.1/arch/ppc/8260_io/enet.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8260_io/enet.c 2007-05-19 23:58:35.000000000 +0900 @@ -116,7 +116,7 @@ scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); diff -urN ./linux-2.6.18.1/arch/ppc/8260_io/fcc_enet.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8260_io/fcc_enet.c --- ./linux-2.6.18.1/arch/ppc/8260_io/fcc_enet.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8260_io/fcc_enet.c 2007-05-19 23:58:35.000000000 +0900 @@ -376,7 +376,7 @@ volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; diff -urN ./linux-2.6.18.1/arch/ppc/8xx_io/commproc.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8xx_io/commproc.c --- ./linux-2.6.18.1/arch/ppc/8xx_io/commproc.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8xx_io/commproc.c 2007-05-19 23:58:35.000000000 +0900 @@ -356,7 +356,7 @@ /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... diff -urN ./linux-2.6.18.1/arch/ppc/8xx_io/enet.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8xx_io/enet.c --- ./linux-2.6.18.1/arch/ppc/8xx_io/enet.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8xx_io/enet.c 2007-05-19 23:58:35.000000000 +0900 @@ -143,7 +143,7 @@ unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); diff -urN ./linux-2.6.18.1/arch/ppc/8xx_io/fec.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8xx_io/fec.c --- ./linux-2.6.18.1/arch/ppc/8xx_io/fec.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/8xx_io/fec.c 2007-05-19 23:58:35.000000000 +0900 @@ -164,7 +164,7 @@ struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; diff -urN ./linux-2.6.18.1/arch/ppc/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/Kconfig --- ./linux-2.6.18.1/arch/ppc/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -12,13 +12,6 @@ bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_HWEIGHT bool default y @@ -955,6 +948,18 @@ source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "mm/Kconfig" source "fs/Kconfig.binfmt" diff -urN ./linux-2.6.18.1/arch/ppc/boot/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/boot/Makefile --- ./linux-2.6.18.1/arch/ppc/boot/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/boot/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -14,6 +14,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd diff -urN ./linux-2.6.18.1/arch/ppc/kernel/dma-mapping.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/dma-mapping.c --- ./linux-2.6.18.1/arch/ppc/kernel/dma-mapping.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/dma-mapping.c 2007-05-19 23:58:35.000000000 +0900 @@ -70,7 +70,7 @@ * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. diff -urN ./linux-2.6.18.1/arch/ppc/kernel/entry.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/entry.S --- ./linux-2.6.18.1/arch/ppc/kernel/entry.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/entry.S 2007-05-19 23:58:35.000000000 +0900 @@ -856,7 +856,7 @@ #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -870,7 +870,7 @@ MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING beq restore_user diff -urN ./linux-2.6.18.1/arch/ppc/kernel/semaphore.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/semaphore.c --- ./linux-2.6.18.1/arch/ppc/kernel/semaphore.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/semaphore.c 2007-05-19 23:58:35.000000000 +0900 @@ -29,7 +29,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -48,7 +48,7 @@ return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -70,7 +70,7 @@ * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -100,7 +100,7 @@ wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -129,3 +129,8 @@ wake_up(&sem->wait); return retval; } + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} diff -urN ./linux-2.6.18.1/arch/ppc/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/smp.c --- ./linux-2.6.18.1/arch/ppc/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -137,6 +137,16 @@ smp_message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -161,7 +171,7 @@ * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); diff -urN ./linux-2.6.18.1/arch/ppc/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/time.c --- ./linux-2.6.18.1/arch/ppc/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -65,6 +65,9 @@ #include +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); + unsigned long disarm_decr[NR_CPUS]; extern struct timezone sys_tz; @@ -103,7 +106,7 @@ } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); diff -urN ./linux-2.6.18.1/arch/ppc/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/traps.c --- ./linux-2.6.18.1/arch/ppc/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -71,7 +71,7 @@ * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); int die(const char * str, struct pt_regs * fp, long err) { @@ -106,6 +106,10 @@ debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; diff -urN ./linux-2.6.18.1/arch/ppc/lib/locks.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/lib/locks.c --- ./linux-2.6.18.1/arch/ppc/lib/locks.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/lib/locks.c 2007-05-19 23:58:35.000000000 +0900 @@ -42,7 +42,7 @@ return ret; } -void _raw_spin_lock(spinlock_t *lock) +void __raw_spin_lock(raw_spinlock_t *lock) { int cpu = smp_processor_id(); unsigned int stuck = INIT_STUCK; @@ -62,9 +62,9 @@ lock->owner_pc = (unsigned long)__builtin_return_address(0); lock->owner_cpu = cpu; } -EXPORT_SYMBOL(_raw_spin_lock); +EXPORT_SYMBOL(__raw_spin_lock); -int _raw_spin_trylock(spinlock_t *lock) +int __raw_spin_trylock(raw_spinlock_t *lock) { if (__spin_trylock(&lock->lock)) return 0; @@ -72,9 +72,9 @@ lock->owner_pc = (unsigned long)__builtin_return_address(0); return 1; } -EXPORT_SYMBOL(_raw_spin_trylock); +EXPORT_SYMBOL(__raw_spin_trylock); -void _raw_spin_unlock(spinlock_t *lp) +void __raw_spin_unlock(raw_spinlock_t *lp) { if ( !lp->lock ) printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", @@ -88,13 +88,13 @@ wmb(); lp->lock = 0; } -EXPORT_SYMBOL(_raw_spin_unlock); +EXPORT_SYMBOL(__raw_spin_unlock); /* * For rwlocks, zero is unlocked, -1 is write-locked, * positive is read-locked. */ -static __inline__ int __read_trylock(rwlock_t *rw) +static __inline__ int __read_trylock(raw_rwlock_t *rw) { signed int tmp; @@ -114,13 +114,13 @@ return tmp; } -int _raw_read_trylock(rwlock_t *rw) +int __raw_read_trylock(raw_rwlock_t *rw) { return __read_trylock(rw) > 0; } -EXPORT_SYMBOL(_raw_read_trylock); +EXPORT_SYMBOL(__raw_read_trylock); -void _raw_read_lock(rwlock_t *rw) +void __raw_read_lock(rwlock_t *rw) { unsigned int stuck; @@ -135,9 +135,9 @@ } } } -EXPORT_SYMBOL(_raw_read_lock); +EXPORT_SYMBOL(__raw_read_lock); -void _raw_read_unlock(rwlock_t *rw) +void __raw_read_unlock(raw_rwlock_t *rw) { if ( rw->lock == 0 ) printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", @@ -146,9 +146,9 @@ wmb(); atomic_dec((atomic_t *) &(rw)->lock); } -EXPORT_SYMBOL(_raw_read_unlock); +EXPORT_SYMBOL(__raw_read_unlock); -void _raw_write_lock(rwlock_t *rw) +void __raw_write_lock(raw_rwlock_t *rw) { unsigned int stuck; @@ -164,18 +164,18 @@ } wmb(); } -EXPORT_SYMBOL(_raw_write_lock); +EXPORT_SYMBOL(__raw_write_lock); -int _raw_write_trylock(rwlock_t *rw) +int __raw_write_trylock(raw_rwlock_t *rw) { if (cmpxchg(&rw->lock, 0, -1) != 0) return 0; wmb(); return 1; } -EXPORT_SYMBOL(_raw_write_trylock); +EXPORT_SYMBOL(__raw_write_trylock); -void _raw_write_unlock(rwlock_t *rw) +void __raw_write_unlock(raw_rwlock_t *rw) { if (rw->lock >= 0) printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", @@ -184,6 +184,6 @@ wmb(); rw->lock = 0; } -EXPORT_SYMBOL(_raw_write_unlock); +EXPORT_SYMBOL(__raw_write_unlock); #endif diff -urN ./linux-2.6.18.1/arch/ppc/mm/fault.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/mm/fault.c --- ./linux-2.6.18.1/arch/ppc/mm/fault.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/mm/fault.c 2007-05-19 23:58:35.000000000 +0900 @@ -89,7 +89,7 @@ * the error_code parameter is ESR for a data fault, 0 for an instruction * fault. */ -int do_page_fault(struct pt_regs *regs, unsigned long address, +int notrace do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; diff -urN ./linux-2.6.18.1/arch/ppc/mm/init.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/mm/init.c --- ./linux-2.6.18.1/arch/ppc/mm/init.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/mm/init.c 2007-05-19 23:58:35.000000000 +0900 @@ -55,7 +55,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; diff -urN ./linux-2.6.18.1/arch/ppc/platforms/apus_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/apus_setup.c --- ./linux-2.6.18.1/arch/ppc/platforms/apus_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/apus_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -275,6 +275,7 @@ freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; __bus_speed = bus_speed; __speed_test_failed = speed_test_failed; diff -urN ./linux-2.6.18.1/arch/ppc/platforms/ev64260.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/ev64260.c --- ./linux-2.6.18.1/arch/ppc/platforms/ev64260.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/ev64260.c 2007-05-19 23:58:35.000000000 +0900 @@ -550,6 +550,7 @@ tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } diff -urN ./linux-2.6.18.1/arch/ppc/platforms/gemini_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/gemini_setup.c --- ./linux-2.6.18.1/arch/ppc/platforms/gemini_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/gemini_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -459,6 +459,7 @@ divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } unsigned long __init gemini_find_end_of_memory(void) diff -urN ./linux-2.6.18.1/arch/ppc/platforms/hdpu.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/hdpu.c --- ./linux-2.6.18.1/arch/ppc/platforms/hdpu.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/hdpu.c 2007-05-19 23:58:35.000000000 +0900 @@ -55,7 +55,7 @@ static void hdpu_set_l1pe(void); static void hdpu_cpustate_set(unsigned char new_state); #ifdef CONFIG_SMP -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; extern int smp_tb_synchronized; diff -urN ./linux-2.6.18.1/arch/ppc/platforms/powerpmc250.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/powerpmc250.c --- ./linux-2.6.18.1/arch/ppc/platforms/powerpmc250.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/powerpmc250.c 2007-05-19 23:58:35.000000000 +0900 @@ -163,6 +163,7 @@ tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void diff -urN ./linux-2.6.18.1/arch/ppc/platforms/prep_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/prep_setup.c --- ./linux-2.6.18.1/arch/ppc/platforms/prep_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/prep_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -940,6 +940,7 @@ (freq/divisor)/1000000, (freq/divisor)%1000000); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; tb_ticks_per_jiffy = freq / HZ / divisor; } } diff -urN ./linux-2.6.18.1/arch/ppc/platforms/prpmc750.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/prpmc750.c --- ./linux-2.6.18.1/arch/ppc/platforms/prpmc750.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/prpmc750.c 2007-05-19 23:58:35.000000000 +0900 @@ -268,6 +268,7 @@ tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void prpmc750_restart(char *cmd) diff -urN ./linux-2.6.18.1/arch/ppc/platforms/prpmc800.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/prpmc800.c --- ./linux-2.6.18.1/arch/ppc/platforms/prpmc800.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/prpmc800.c 2007-05-19 23:58:35.000000000 +0900 @@ -327,6 +327,7 @@ tb_ticks_per_second = 100000000 / 4; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; return; } @@ -367,6 +368,7 @@ tb_ticks_per_second = (tbl_end - tbl_start) * 2; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; } static void prpmc800_restart(char *cmd) diff -urN ./linux-2.6.18.1/arch/ppc/platforms/sbc82xx.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/sbc82xx.c --- ./linux-2.6.18.1/arch/ppc/platforms/sbc82xx.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/sbc82xx.c 2007-05-19 23:58:35.000000000 +0900 @@ -65,7 +65,7 @@ static volatile char *sbc82xx_i8259_map; static char sbc82xx_i8259_mask = 0xff; -static DEFINE_SPINLOCK(sbc82xx_i8259_lock); +static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock); static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr) { diff -urN ./linux-2.6.18.1/arch/ppc/platforms/spruce.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/spruce.c --- ./linux-2.6.18.1/arch/ppc/platforms/spruce.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/platforms/spruce.c 2007-05-19 23:58:35.000000000 +0900 @@ -147,6 +147,7 @@ freq = SPRUCE_BUS_SPEED; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int diff -urN ./linux-2.6.18.1/arch/ppc/syslib/cpm2_common.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/cpm2_common.c --- ./linux-2.6.18.1/arch/ppc/syslib/cpm2_common.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/cpm2_common.c 2007-05-19 23:58:35.000000000 +0900 @@ -114,7 +114,7 @@ /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... */ static rh_block_t cpm_boot_dpmem_rh_block[16]; diff -urN ./linux-2.6.18.1/arch/ppc/syslib/ibm44x_common.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ibm44x_common.c --- ./linux-2.6.18.1/arch/ppc/syslib/ibm44x_common.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ibm44x_common.c 2007-05-19 23:58:35.000000000 +0900 @@ -63,6 +63,7 @@ { tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); diff -urN ./linux-2.6.18.1/arch/ppc/syslib/m8260_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/m8260_setup.c --- ./linux-2.6.18.1/arch/ppc/syslib/m8260_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/m8260_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -79,6 +79,7 @@ divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } /* The 8260 has an internal 1-second timer update register that diff -urN ./linux-2.6.18.1/arch/ppc/syslib/m8xx_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/m8xx_setup.c --- ./linux-2.6.18.1/arch/ppc/syslib/m8xx_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/m8xx_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -218,6 +218,7 @@ printk("Decrementer Frequency = %d/%d\n", freq, divisor); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Perform some more timer/timebase initialization. This used * to be done elsewhere, but other changes caused it to get diff -urN ./linux-2.6.18.1/arch/ppc/syslib/mpc52xx_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/mpc52xx_setup.c --- ./linux-2.6.18.1/arch/ppc/syslib/mpc52xx_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/mpc52xx_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -215,6 +215,7 @@ tb_ticks_per_jiffy = xlbfreq / HZ / divisor; tb_to_us = mulhwu_scale_factor(xlbfreq / divisor, 1000000); + cpu_khz = (xlbfreq / divisor) / 1000; } diff -urN ./linux-2.6.18.1/arch/ppc/syslib/ocp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ocp.c --- ./linux-2.6.18.1/arch/ppc/syslib/ocp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ocp.c 2007-05-19 23:58:35.000000000 +0900 @@ -44,11 +44,11 @@ #include #include #include +#include #include #include #include -#include #include //#define DBG(x) printk x diff -urN ./linux-2.6.18.1/arch/ppc/syslib/open_pic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/open_pic.c --- ./linux-2.6.18.1/arch/ppc/syslib/open_pic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/open_pic.c 2007-05-19 23:58:35.000000000 +0900 @@ -526,7 +526,7 @@ } #if defined(CONFIG_SMP) || defined(CONFIG_PM) -static DEFINE_SPINLOCK(openpic_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic_setup_lock); #endif #ifdef CONFIG_SMP diff -urN ./linux-2.6.18.1/arch/ppc/syslib/open_pic2.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/open_pic2.c --- ./linux-2.6.18.1/arch/ppc/syslib/open_pic2.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/open_pic2.c 2007-05-19 23:58:35.000000000 +0900 @@ -380,7 +380,7 @@ vec); } -static DEFINE_SPINLOCK(openpic2_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic2_setup_lock); /* * Initialize a timer interrupt (and disable it) diff -urN ./linux-2.6.18.1/arch/ppc/syslib/ppc4xx_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ppc4xx_setup.c --- ./linux-2.6.18.1/arch/ppc/syslib/ppc4xx_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ppc4xx_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -172,6 +172,7 @@ freq = bip->bi_tbfreq; tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero. ** At 200 Mhz, time base will rollover in ~2925 years. diff -urN ./linux-2.6.18.1/arch/ppc/syslib/ppc85xx_setup.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ppc85xx_setup.c --- ./linux-2.6.18.1/arch/ppc/syslib/ppc85xx_setup.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/ppc85xx_setup.c 2007-05-19 23:58:35.000000000 +0900 @@ -57,6 +57,7 @@ divisor = 8; tb_ticks_per_jiffy = freq / divisor / HZ; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); diff -urN ./linux-2.6.18.1/arch/ppc/syslib/todc_time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/todc_time.c --- ./linux-2.6.18.1/arch/ppc/syslib/todc_time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/ppc/syslib/todc_time.c 2007-05-19 23:58:35.000000000 +0900 @@ -506,6 +506,7 @@ tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } diff -urN ./linux-2.6.18.1/arch/sparc64/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/Kconfig --- ./linux-2.6.18.1/arch/sparc64/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -26,7 +26,7 @@ bool default y -config TIME_INTERPOLATION +config GENERIC_TIME bool default y diff -urN ./linux-2.6.18.1/arch/sparc64/defconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/defconfig --- ./linux-2.6.18.1/arch/sparc64/defconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/defconfig 2007-05-19 23:58:35.000000000 +0900 @@ -7,7 +7,7 @@ CONFIG_SPARC64=y CONFIG_64BIT=y CONFIG_MMU=y -CONFIG_TIME_INTERPOLATION=y +CONFIG_GENERIC_TIME=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_SPARC64_PAGE_SIZE_8KB=y # CONFIG_SPARC64_PAGE_SIZE_64KB is not set diff -urN ./linux-2.6.18.1/arch/sparc64/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/kernel/time.c --- ./linux-2.6.18.1/arch/sparc64/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/kernel/time.c 2007-05-20 00:11:12.000000000 +0900 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -621,7 +622,7 @@ if (!mregs && !dregs) { prom_printf("Something wrong, clock regs not mapped yet.\n"); prom_halt(); - } + } if (mregs) { spin_lock_irq(&mostek_lock); @@ -821,7 +822,7 @@ } set_system_time(); - + local_irq_restore(flags); return 0; @@ -976,22 +977,33 @@ #endif /* CONFIG_CPU_FREQ */ -static struct time_interpolator sparc64_cpu_interpolator = { - .source = TIME_SOURCE_CPU, - .shift = 16, - .mask = 0xffffffffffffffffLL +static cycle_t read_itc(void) +{ + return (cycle_t)get_cycles()); +} + +static struct clocksource clocksource_sparc64_itc = { + .name = "sparc64_itc", + .rating = 300, + .read = read_itc, + .mask = 0xffffffffffffffffLL, + .mult = 0, /*to be caluclated*/ + .shift = 16, + .is_continuous = 1, }; + /* The quotient formula is taken from the IA64 port. */ #define SPARC64_NSEC_PER_CYC_SHIFT 10UL void __init time_init(void) { unsigned long clock = sparc64_init_timers(); - sparc64_cpu_interpolator.frequency = clock; - register_time_interpolator(&sparc64_cpu_interpolator); + clocksource_sparc64_itc.mult = clocksource_hz2mult(clock, + clocksource_sparc64_itc.shift); + clocksource_register(&clocksource_sparc64_itc); - /* Now that the interpolator is registered, it is + /* Now that the clocksource is registered, it is * safe to start the timer ticking. */ sparc64_start_timers(); @@ -1026,11 +1038,11 @@ unsigned long flags; u8 tmp; - /* + /* * Not having a register set can lead to trouble. * Also starfire doesn't have a tod clock. */ - if (!mregs && !dregs) + if (!mregs && !dregs) return -1; if (mregs) { diff -urN ./linux-2.6.18.1/arch/sparc64/kernel/time.c.orig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/kernel/time.c.orig --- ./linux-2.6.18.1/arch/sparc64/kernel/time.c.orig 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/sparc64/kernel/time.c.orig 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,1380 @@ +/* $Id: time.c,v 1.42 2002/01/23 14:33:55 davem Exp $ + * time.c: UltraSparc timer and TOD clock support. + * + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * + * Based largely on code which is: + * + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_SPINLOCK(mostek_lock); +DEFINE_SPINLOCK(rtc_lock); +void __iomem *mstk48t02_regs = NULL; +#ifdef CONFIG_PCI +unsigned long ds1287_regs = 0UL; +#endif + +extern unsigned long wall_jiffies; + +static void __iomem *mstk48t08_regs; +static void __iomem *mstk48t59_regs; + +static int set_rtc_mmss(unsigned long); + +#define TICK_PRIV_BIT (1UL << 63) + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + if (in_lock_functions(pc)) + return regs->u_regs[UREG_RETPC]; + return pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +static void tick_disable_protection(void) +{ + /* Set things up so user can access tick register for profiling + * purposes. Also workaround BB_ERRATA_1 by doing a dummy + * read back of %tick after writing it. + */ + __asm__ __volatile__( + " ba,pt %%xcc, 1f\n" + " nop\n" + " .align 64\n" + "1: rd %%tick, %%g2\n" + " add %%g2, 6, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wrpr %%g2, 0, %%tick\n" + " rdpr %%tick, %%g0" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g2"); +} + +static void tick_init_tick(unsigned long offset) +{ + tick_disable_protection(); + + __asm__ __volatile__( + " rd %%tick, %%g1\n" + " andn %%g1, %1, %%g1\n" + " ba,pt %%xcc, 1f\n" + " add %%g1, %0, %%g1\n" + " .align 64\n" + "1: wr %%g1, 0x0, %%tick_cmpr\n" + " rd %%tick_cmpr, %%g0" + : /* no outputs */ + : "r" (offset), "r" (TICK_PRIV_BIT) + : "g1"); +} + +static unsigned long tick_get_tick(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%tick, %0\n\t" + "mov %0, %0" + : "=r" (ret)); + + return ret & ~TICK_PRIV_BIT; +} + +static unsigned long tick_get_compare(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%tick_cmpr, %0\n\t" + "mov %0, %0" + : "=r" (ret)); + + return ret; +} + +static unsigned long tick_add_compare(unsigned long adj) +{ + unsigned long new_compare; + + /* Workaround for Spitfire Errata (#54 I think??), I discovered + * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch + * number 103640. + * + * On Blackbird writes to %tick_cmpr can fail, the + * workaround seems to be to execute the wr instruction + * at the start of an I-cache line, and perform a dummy + * read back from %tick_cmpr right after writing to it. -DaveM + */ + __asm__ __volatile__("rd %%tick_cmpr, %0\n\t" + "ba,pt %%xcc, 1f\n\t" + " add %0, %1, %0\n\t" + ".align 64\n" + "1:\n\t" + "wr %0, 0, %%tick_cmpr\n\t" + "rd %%tick_cmpr, %%g0" + : "=&r" (new_compare) + : "r" (adj)); + + return new_compare; +} + +static unsigned long tick_add_tick(unsigned long adj, unsigned long offset) +{ + unsigned long new_tick, tmp; + + /* Also need to handle Blackbird bug here too. */ + __asm__ __volatile__("rd %%tick, %0\n\t" + "add %0, %2, %0\n\t" + "wrpr %0, 0, %%tick\n\t" + "andn %0, %4, %1\n\t" + "ba,pt %%xcc, 1f\n\t" + " add %1, %3, %1\n\t" + ".align 64\n" + "1:\n\t" + "wr %1, 0, %%tick_cmpr\n\t" + "rd %%tick_cmpr, %%g0" + : "=&r" (new_tick), "=&r" (tmp) + : "r" (adj), "r" (offset), "r" (TICK_PRIV_BIT)); + + return new_tick; +} + +static struct sparc64_tick_ops tick_operations __read_mostly = { + .init_tick = tick_init_tick, + .get_tick = tick_get_tick, + .get_compare = tick_get_compare, + .add_tick = tick_add_tick, + .add_compare = tick_add_compare, + .softint_mask = 1UL << 0, +}; + +struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations; + +static void stick_init_tick(unsigned long offset) +{ + /* Writes to the %tick and %stick register are not + * allowed on sun4v. The Hypervisor controls that + * bit, per-strand. + */ + if (tlb_type != hypervisor) { + tick_disable_protection(); + + /* Let the user get at STICK too. */ + __asm__ __volatile__( + " rd %%asr24, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wr %%g2, 0, %%asr24" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g1", "g2"); + } + + __asm__ __volatile__( + " rd %%asr24, %%g1\n" + " andn %%g1, %1, %%g1\n" + " add %%g1, %0, %%g1\n" + " wr %%g1, 0x0, %%asr25" + : /* no outputs */ + : "r" (offset), "r" (TICK_PRIV_BIT) + : "g1"); +} + +static unsigned long stick_get_tick(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (ret)); + + return ret & ~TICK_PRIV_BIT; +} + +static unsigned long stick_get_compare(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%asr25, %0" + : "=r" (ret)); + + return ret; +} + +static unsigned long stick_add_tick(unsigned long adj, unsigned long offset) +{ + unsigned long new_tick, tmp; + + __asm__ __volatile__("rd %%asr24, %0\n\t" + "add %0, %2, %0\n\t" + "wr %0, 0, %%asr24\n\t" + "andn %0, %4, %1\n\t" + "add %1, %3, %1\n\t" + "wr %1, 0, %%asr25" + : "=&r" (new_tick), "=&r" (tmp) + : "r" (adj), "r" (offset), "r" (TICK_PRIV_BIT)); + + return new_tick; +} + +static unsigned long stick_add_compare(unsigned long adj) +{ + unsigned long new_compare; + + __asm__ __volatile__("rd %%asr25, %0\n\t" + "add %0, %1, %0\n\t" + "wr %0, 0, %%asr25" + : "=&r" (new_compare) + : "r" (adj)); + + return new_compare; +} + +static struct sparc64_tick_ops stick_operations __read_mostly = { + .init_tick = stick_init_tick, + .get_tick = stick_get_tick, + .get_compare = stick_get_compare, + .add_tick = stick_add_tick, + .add_compare = stick_add_compare, + .softint_mask = 1UL << 16, +}; + +/* On Hummingbird the STICK/STICK_CMPR register is implemented + * in I/O space. There are two 64-bit registers each, the + * first holds the low 32-bits of the value and the second holds + * the high 32-bits. + * + * Since STICK is constantly updating, we have to access it carefully. + * + * The sequence we use to read is: + * 1) read high + * 2) read low + * 3) read high again, if it rolled re-read both low and high again. + * + * Writing STICK safely is also tricky: + * 1) write low to zero + * 2) write high + * 3) write low + */ +#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL +#define HBIRD_STICK_ADDR 0x1fe0000f070UL + +static unsigned long __hbird_read_stick(void) +{ + unsigned long ret, tmp1, tmp2, tmp3; + unsigned long addr = HBIRD_STICK_ADDR+8; + + __asm__ __volatile__("ldxa [%1] %5, %2\n" + "1:\n\t" + "sub %1, 0x8, %1\n\t" + "ldxa [%1] %5, %3\n\t" + "add %1, 0x8, %1\n\t" + "ldxa [%1] %5, %4\n\t" + "cmp %4, %2\n\t" + "bne,a,pn %%xcc, 1b\n\t" + " mov %4, %2\n\t" + "sllx %4, 32, %4\n\t" + "or %3, %4, %0\n\t" + : "=&r" (ret), "=&r" (addr), + "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3) + : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr)); + + return ret; +} + +static unsigned long __hbird_read_compare(void) +{ + unsigned long low, high; + unsigned long addr = HBIRD_STICKCMP_ADDR; + + __asm__ __volatile__("ldxa [%2] %3, %0\n\t" + "add %2, 0x8, %2\n\t" + "ldxa [%2] %3, %1" + : "=&r" (low), "=&r" (high), "=&r" (addr) + : "i" (ASI_PHYS_BYPASS_EC_E), "2" (addr)); + + return (high << 32UL) | low; +} + +static void __hbird_write_stick(unsigned long val) +{ + unsigned long low = (val & 0xffffffffUL); + unsigned long high = (val >> 32UL); + unsigned long addr = HBIRD_STICK_ADDR; + + __asm__ __volatile__("stxa %%g0, [%0] %4\n\t" + "add %0, 0x8, %0\n\t" + "stxa %3, [%0] %4\n\t" + "sub %0, 0x8, %0\n\t" + "stxa %2, [%0] %4" + : "=&r" (addr) + : "0" (addr), "r" (low), "r" (high), + "i" (ASI_PHYS_BYPASS_EC_E)); +} + +static void __hbird_write_compare(unsigned long val) +{ + unsigned long low = (val & 0xffffffffUL); + unsigned long high = (val >> 32UL); + unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL; + + __asm__ __volatile__("stxa %3, [%0] %4\n\t" + "sub %0, 0x8, %0\n\t" + "stxa %2, [%0] %4" + : "=&r" (addr) + : "0" (addr), "r" (low), "r" (high), + "i" (ASI_PHYS_BYPASS_EC_E)); +} + +static void hbtick_init_tick(unsigned long offset) +{ + unsigned long val; + + tick_disable_protection(); + + /* XXX This seems to be necessary to 'jumpstart' Hummingbird + * XXX into actually sending STICK interrupts. I think because + * XXX of how we store %tick_cmpr in head.S this somehow resets the + * XXX {TICK + STICK} interrupt mux. -DaveM + */ + __hbird_write_stick(__hbird_read_stick()); + + val = __hbird_read_stick() & ~TICK_PRIV_BIT; + __hbird_write_compare(val + offset); +} + +static unsigned long hbtick_get_tick(void) +{ + return __hbird_read_stick() & ~TICK_PRIV_BIT; +} + +static unsigned long hbtick_get_compare(void) +{ + return __hbird_read_compare(); +} + +static unsigned long hbtick_add_tick(unsigned long adj, unsigned long offset) +{ + unsigned long val; + + val = __hbird_read_stick() + adj; + __hbird_write_stick(val); + + val &= ~TICK_PRIV_BIT; + __hbird_write_compare(val + offset); + + return val; +} + +static unsigned long hbtick_add_compare(unsigned long adj) +{ + unsigned long val = __hbird_read_compare() + adj; + + val &= ~TICK_PRIV_BIT; + __hbird_write_compare(val); + + return val; +} + +static struct sparc64_tick_ops hbtick_operations __read_mostly = { + .init_tick = hbtick_init_tick, + .get_tick = hbtick_get_tick, + .get_compare = hbtick_get_compare, + .add_tick = hbtick_add_tick, + .add_compare = hbtick_add_compare, + .softint_mask = 1UL << 0, +}; + +/* timer_interrupt() needs to keep up the real-time clock, + * as well as call the "do_timer()" routine every clocktick + * + * NOTE: On SUN5 systems the ticker interrupt comes in using 2 + * interrupts, one at level14 and one with softint bit 0. + */ +unsigned long timer_tick_offset __read_mostly; + +static unsigned long timer_ticks_per_nsec_quotient __read_mostly; + +#define TICK_SIZE (tick_nsec / 1000) + +static inline void timer_check_rtc(void) +{ + /* last time the cmos clock got updated */ + static long last_rtc_update; + + /* Determine when to update the Mostek clock. */ + if (ntp_synced() && + xtime.tv_sec > last_rtc_update + 660 && + (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && + (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { + if (set_rtc_mmss(xtime.tv_sec) == 0) + last_rtc_update = xtime.tv_sec; + else + last_rtc_update = xtime.tv_sec - 600; + /* do it again in 60 s */ + } +} + +irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + unsigned long ticks, compare, pstate; + + write_seqlock(&xtime_lock); + + do { +#ifndef CONFIG_SMP + profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(regs)); +#endif + do_timer(regs); + + /* Guarantee that the following sequences execute + * uninterrupted. + */ + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + compare = tick_ops->add_compare(timer_tick_offset); + ticks = tick_ops->get_tick(); + + /* Restore PSTATE_IE. */ + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : /* no outputs */ + : "r" (pstate)); + } while (time_after_eq(ticks, compare)); + + timer_check_rtc(); + + write_sequnlock(&xtime_lock); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_SMP +void timer_tick_interrupt(struct pt_regs *regs) +{ + write_seqlock(&xtime_lock); + + do_timer(regs); + + timer_check_rtc(); + + write_sequnlock(&xtime_lock); +} +#endif + +/* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */ +static void __init kick_start_clock(void) +{ + void __iomem *regs = mstk48t02_regs; + u8 sec, tmp; + int i, count; + + prom_printf("CLOCK: Clock was stopped. Kick start "); + + spin_lock_irq(&mostek_lock); + + /* Turn on the kick start bit to start the oscillator. */ + tmp = mostek_read(regs + MOSTEK_CREG); + tmp |= MSTK_CREG_WRITE; + mostek_write(regs + MOSTEK_CREG, tmp); + tmp = mostek_read(regs + MOSTEK_SEC); + tmp &= ~MSTK_STOP; + mostek_write(regs + MOSTEK_SEC, tmp); + tmp = mostek_read(regs + MOSTEK_HOUR); + tmp |= MSTK_KICK_START; + mostek_write(regs + MOSTEK_HOUR, tmp); + tmp = mostek_read(regs + MOSTEK_CREG); + tmp &= ~MSTK_CREG_WRITE; + mostek_write(regs + MOSTEK_CREG, tmp); + + spin_unlock_irq(&mostek_lock); + + /* Delay to allow the clock oscillator to start. */ + sec = MSTK_REG_SEC(regs); + for (i = 0; i < 3; i++) { + while (sec == MSTK_REG_SEC(regs)) + for (count = 0; count < 100000; count++) + /* nothing */ ; + prom_printf("."); + sec = MSTK_REG_SEC(regs); + } + prom_printf("\n"); + + spin_lock_irq(&mostek_lock); + + /* Turn off kick start and set a "valid" time and date. */ + tmp = mostek_read(regs + MOSTEK_CREG); + tmp |= MSTK_CREG_WRITE; + mostek_write(regs + MOSTEK_CREG, tmp); + tmp = mostek_read(regs + MOSTEK_HOUR); + tmp &= ~MSTK_KICK_START; + mostek_write(regs + MOSTEK_HOUR, tmp); + MSTK_SET_REG_SEC(regs,0); + MSTK_SET_REG_MIN(regs,0); + MSTK_SET_REG_HOUR(regs,0); + MSTK_SET_REG_DOW(regs,5); + MSTK_SET_REG_DOM(regs,1); + MSTK_SET_REG_MONTH(regs,8); + MSTK_SET_REG_YEAR(regs,1996 - MSTK_YEAR_ZERO); + tmp = mostek_read(regs + MOSTEK_CREG); + tmp &= ~MSTK_CREG_WRITE; + mostek_write(regs + MOSTEK_CREG, tmp); + + spin_unlock_irq(&mostek_lock); + + /* Ensure the kick start bit is off. If it isn't, turn it off. */ + while (mostek_read(regs + MOSTEK_HOUR) & MSTK_KICK_START) { + prom_printf("CLOCK: Kick start still on!\n"); + + spin_lock_irq(&mostek_lock); + + tmp = mostek_read(regs + MOSTEK_CREG); + tmp |= MSTK_CREG_WRITE; + mostek_write(regs + MOSTEK_CREG, tmp); + + tmp = mostek_read(regs + MOSTEK_HOUR); + tmp &= ~MSTK_KICK_START; + mostek_write(regs + MOSTEK_HOUR, tmp); + + tmp = mostek_read(regs + MOSTEK_CREG); + tmp &= ~MSTK_CREG_WRITE; + mostek_write(regs + MOSTEK_CREG, tmp); + + spin_unlock_irq(&mostek_lock); + } + + prom_printf("CLOCK: Kick start procedure successful.\n"); +} + +/* Return nonzero if the clock chip battery is low. */ +static int __init has_low_battery(void) +{ + void __iomem *regs = mstk48t02_regs; + u8 data1, data2; + + spin_lock_irq(&mostek_lock); + + data1 = mostek_read(regs + MOSTEK_EEPROM); /* Read some data. */ + mostek_write(regs + MOSTEK_EEPROM, ~data1); /* Write back the complement. */ + data2 = mostek_read(regs + MOSTEK_EEPROM); /* Read back the complement. */ + mostek_write(regs + MOSTEK_EEPROM, data1); /* Restore original value. */ + + spin_unlock_irq(&mostek_lock); + + return (data1 == data2); /* Was the write blocked? */ +} + +/* Probe for the real time clock chip. */ +static void __init set_system_time(void) +{ + unsigned int year, mon, day, hour, min, sec; + void __iomem *mregs = mstk48t02_regs; +#ifdef CONFIG_PCI + unsigned long dregs = ds1287_regs; +#else + unsigned long dregs = 0UL; +#endif + u8 tmp; + + if (!mregs && !dregs) { + prom_printf("Something wrong, clock regs not mapped yet.\n"); + prom_halt(); + } + + if (mregs) { + spin_lock_irq(&mostek_lock); + + /* Traditional Mostek chip. */ + tmp = mostek_read(mregs + MOSTEK_CREG); + tmp |= MSTK_CREG_READ; + mostek_write(mregs + MOSTEK_CREG, tmp); + + sec = MSTK_REG_SEC(mregs); + min = MSTK_REG_MIN(mregs); + hour = MSTK_REG_HOUR(mregs); + day = MSTK_REG_DOM(mregs); + mon = MSTK_REG_MONTH(mregs); + year = MSTK_CVT_YEAR( MSTK_REG_YEAR(mregs) ); + } else { + /* Dallas 12887 RTC chip. */ + + do { + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + } while (sec != CMOS_READ(RTC_SECONDS)); + + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + if ((year += 1900) < 1970) + year += 100; + } + + xtime.tv_sec = mktime(year, mon, day, hour, min, sec); + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + + if (mregs) { + tmp = mostek_read(mregs + MOSTEK_CREG); + tmp &= ~MSTK_CREG_READ; + mostek_write(mregs + MOSTEK_CREG, tmp); + + spin_unlock_irq(&mostek_lock); + } +} + +/* davem suggests we keep this within the 4M locked kernel image */ +static u32 starfire_get_time(void) +{ + static char obp_gettod[32]; + static u32 unix_tod; + + sprintf(obp_gettod, "h# %08x unix-gettod", + (unsigned int) (long) &unix_tod); + prom_feval(obp_gettod); + + return unix_tod; +} + +static int starfire_set_time(u32 val) +{ + /* Do nothing, time is set using the service processor + * console on this platform. + */ + return 0; +} + +static u32 hypervisor_get_time(void) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + int retries = 10000; + +retry: + func = HV_FAST_TOD_GET; + arg0 = 0; + arg1 = 0; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); + if (arg0 == HV_EOK) + return arg1; + if (arg0 == HV_EWOULDBLOCK) { + if (--retries > 0) { + udelay(100); + goto retry; + } + printk(KERN_WARNING "SUN4V: tod_get() timed out.\n"); + return 0; + } + printk(KERN_WARNING "SUN4V: tod_get() not supported.\n"); + return 0; +} + +static int hypervisor_set_time(u32 secs) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + int retries = 10000; + +retry: + func = HV_FAST_TOD_SET; + arg0 = secs; + __asm__ __volatile__("ta %4" + : "=&r" (func), "=&r" (arg0) + : "0" (func), "1" (arg0), + "i" (HV_FAST_TRAP)); + if (arg0 == HV_EOK) + return 0; + if (arg0 == HV_EWOULDBLOCK) { + if (--retries > 0) { + udelay(100); + goto retry; + } + printk(KERN_WARNING "SUN4V: tod_set() timed out.\n"); + return -EAGAIN; + } + printk(KERN_WARNING "SUN4V: tod_set() not supported.\n"); + return -EOPNOTSUPP; +} + +static int __init clock_model_matches(char *model) +{ + if (strcmp(model, "mk48t02") && + strcmp(model, "mk48t08") && + strcmp(model, "mk48t59") && + strcmp(model, "m5819") && + strcmp(model, "m5819p") && + strcmp(model, "m5823") && + strcmp(model, "ds1287")) + return 0; + + return 1; +} + +static int __devinit clock_probe(struct of_device *op, const struct of_device_id *match) +{ + struct device_node *dp = op->node; + char *model = of_get_property(dp, "model", NULL); + unsigned long size, flags; + void __iomem *regs; + + if (!model || !clock_model_matches(model)) + return -ENODEV; + + /* On an Enterprise system there can be multiple mostek clocks. + * We should only match the one that is on the central FHC bus. + */ + if (!strcmp(dp->parent->name, "fhc") && + strcmp(dp->parent->parent->name, "central") != 0) + return -ENODEV; + + size = (op->resource[0].end - op->resource[0].start) + 1; + regs = of_ioremap(&op->resource[0], 0, size, "clock"); + if (!regs) + return -ENOMEM; + +#ifdef CONFIG_PCI + if (!strcmp(model, "ds1287") || + !strcmp(model, "m5819") || + !strcmp(model, "m5819p") || + !strcmp(model, "m5823")) { + ds1287_regs = (unsigned long) regs; + } else +#endif + if (model[5] == '0' && model[6] == '2') { + mstk48t02_regs = regs; + } else if(model[5] == '0' && model[6] == '8') { + mstk48t08_regs = regs; + mstk48t02_regs = mstk48t08_regs + MOSTEK_48T08_48T02; + } else { + mstk48t59_regs = regs; + mstk48t02_regs = mstk48t59_regs + MOSTEK_48T59_48T02; + } + + printk(KERN_INFO "%s: Clock regs at %p\n", dp->full_name, regs); + + local_irq_save(flags); + + if (mstk48t02_regs != NULL) { + /* Report a low battery voltage condition. */ + if (has_low_battery()) + prom_printf("NVRAM: Low battery voltage!\n"); + + /* Kick start the clock if it is completely stopped. */ + if (mostek_read(mstk48t02_regs + MOSTEK_SEC) & MSTK_STOP) + kick_start_clock(); + } + + set_system_time(); + + local_irq_restore(flags); + + return 0; +} + +static struct of_device_id clock_match[] = { + { + .name = "eeprom", + }, + { + .name = "rtc", + }, + {}, +}; + +static struct of_platform_driver clock_driver = { + .name = "clock", + .match_table = clock_match, + .probe = clock_probe, +}; + +static int __init clock_init(void) +{ + if (this_is_starfire) { + xtime.tv_sec = starfire_get_time(); + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + return 0; + } + if (tlb_type == hypervisor) { + xtime.tv_sec = hypervisor_get_time(); + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + return 0; + } + + return of_register_driver(&clock_driver, &of_bus_type); +} + +/* Must be after subsys_initcall() so that busses are probed. Must + * be before device_initcall() because things like the RTC driver + * need to see the clock registers. + */ +fs_initcall(clock_init); + +/* This is gets the master TICK_INT timer going. */ +static unsigned long sparc64_init_timers(void) +{ + struct device_node *dp; + struct property *prop; + unsigned long clock; +#ifdef CONFIG_SMP + extern void smp_tick_init(void); +#endif + + dp = of_find_node_by_path("/"); + if (tlb_type == spitfire) { + unsigned long ver, manuf, impl; + + __asm__ __volatile__ ("rdpr %%ver, %0" + : "=&r" (ver)); + manuf = ((ver >> 48) & 0xffff); + impl = ((ver >> 32) & 0xffff); + if (manuf == 0x17 && impl == 0x13) { + /* Hummingbird, aka Ultra-IIe */ + tick_ops = &hbtick_operations; + prop = of_find_property(dp, "stick-frequency", NULL); + } else { + tick_ops = &tick_operations; + cpu_find_by_instance(0, &dp, NULL); + prop = of_find_property(dp, "clock-frequency", NULL); + } + } else { + tick_ops = &stick_operations; + prop = of_find_property(dp, "stick-frequency", NULL); + } + clock = *(unsigned int *) prop->value; + timer_tick_offset = clock / HZ; + +#ifdef CONFIG_SMP + smp_tick_init(); +#endif + + return clock; +} + +static void sparc64_start_timers(void) +{ + unsigned long pstate; + + /* Guarantee that the following sequences execute + * uninterrupted. + */ + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + tick_ops->init_tick(timer_tick_offset); + + /* Restore PSTATE_IE. */ + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : /* no outputs */ + : "r" (pstate)); +} + +struct freq_table { + unsigned long clock_tick_ref; + unsigned int ref_freq; +}; +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 }; + +unsigned long sparc64_get_clock_tick(unsigned int cpu) +{ + struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + + if (ft->clock_tick_ref) + return ft->clock_tick_ref; + return cpu_data(cpu).clock_tick; +} + +#ifdef CONFIG_CPU_FREQ + +static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned int cpu = freq->cpu; + struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + + if (!ft->ref_freq) { + ft->ref_freq = freq->old; + ft->clock_tick_ref = cpu_data(cpu).clock_tick; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + cpu_data(cpu).clock_tick = + cpufreq_scale(ft->clock_tick_ref, + ft->ref_freq, + freq->new); + } + + return 0; +} + +static struct notifier_block sparc64_cpufreq_notifier_block = { + .notifier_call = sparc64_cpufreq_notifier +}; + +#endif /* CONFIG_CPU_FREQ */ + +static cycle_t read_itc(void) +{ + return (cycle_t)get_cycles()); +} + +static struct clocksource clocksource_sparc64_itc = { + .name = "sparc64_itc", + .rating = 300, + .read = read_itc, + .mask = 0xffffffffffffffffLL, + .mult = 0, /*to be caluclated*/ + .shift = 16, + .is_continuous = 1, +}; + + +/* The quotient formula is taken from the IA64 port. */ +#define SPARC64_NSEC_PER_CYC_SHIFT 30UL +void __init time_init(void) +{ + unsigned long clock = sparc64_init_timers(); + + clocksource_sparc64_itc.mult = clocksource_hz2mult(clock, + clocksource_sparc64_itc.shift); + clocksource_register(&clocksource_sparc64_itc); + + /* Now that the clocksource is registered, it is + * safe to start the timer ticking. + */ + sparc64_start_timers(); + + timer_ticks_per_nsec_quotient = + (((NSEC_PER_SEC << SPARC64_NSEC_PER_CYC_SHIFT) + + (clock / 2)) / clock); + +#ifdef CONFIG_CPU_FREQ + cpufreq_register_notifier(&sparc64_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); +#endif +} + +unsigned long long sched_clock(void) +{ + unsigned long ticks = tick_ops->get_tick(); + + return (ticks * timer_ticks_per_nsec_quotient) + >> SPARC64_NSEC_PER_CYC_SHIFT; +} + +static int set_rtc_mmss(unsigned long nowtime) +{ + int real_seconds, real_minutes, chip_minutes; + void __iomem *mregs = mstk48t02_regs; +#ifdef CONFIG_PCI + unsigned long dregs = ds1287_regs; +#else + unsigned long dregs = 0UL; +#endif + unsigned long flags; + u8 tmp; + + /* + * Not having a register set can lead to trouble. + * Also starfire doesn't have a tod clock. + */ + if (!mregs && !dregs) + return -1; + + if (mregs) { + spin_lock_irqsave(&mostek_lock, flags); + + /* Read the current RTC minutes. */ + tmp = mostek_read(mregs + MOSTEK_CREG); + tmp |= MSTK_CREG_READ; + mostek_write(mregs + MOSTEK_CREG, tmp); + + chip_minutes = MSTK_REG_MIN(mregs); + + tmp = mostek_read(mregs + MOSTEK_CREG); + tmp &= ~MSTK_CREG_READ; + mostek_write(mregs + MOSTEK_CREG, tmp); + + /* + * since we're only adjusting minutes and seconds, + * don't interfere with hour overflow. This avoids + * messing with unknown time zones but requires your + * RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - chip_minutes) + 15)/30) & 1) + real_minutes += 30; /* correct for half hour time zone */ + real_minutes %= 60; + + if (abs(real_minutes - chip_minutes) < 30) { + tmp = mostek_read(mregs + MOSTEK_CREG); + tmp |= MSTK_CREG_WRITE; + mostek_write(mregs + MOSTEK_CREG, tmp); + + MSTK_SET_REG_SEC(mregs,real_seconds); + MSTK_SET_REG_MIN(mregs,real_minutes); + + tmp = mostek_read(mregs + MOSTEK_CREG); + tmp &= ~MSTK_CREG_WRITE; + mostek_write(mregs + MOSTEK_CREG, tmp); + + spin_unlock_irqrestore(&mostek_lock, flags); + + return 0; + } else { + spin_unlock_irqrestore(&mostek_lock, flags); + + return -1; + } + } else { + int retval = 0; + unsigned char save_control, save_freq_select; + + /* Stolen from arch/i386/kernel/time.c, see there for + * credits and descriptive comments. + */ + spin_lock_irqsave(&rtc_lock, flags); + save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */ + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */ + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + chip_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + BCD_TO_BIN(chip_minutes); + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - chip_minutes) + 15)/30) & 1) + real_minutes += 30; + real_minutes %= 60; + + if (abs(real_minutes - chip_minutes) < 30) { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } else { + printk(KERN_WARNING + "set_rtc_mmss: can't update from %d to %d\n", + chip_minutes, real_minutes); + retval = -1; + } + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock_irqrestore(&rtc_lock, flags); + + return retval; + } +} + +#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */ +static unsigned char mini_rtc_status; /* bitmapped status byte. */ + +/* months start at 0 now */ +static unsigned char days_in_mo[] = +{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +#define FEBRUARY 2 +#define STARTOFTIME 1970 +#define SECDAY 86400L +#define SECYR (SECDAY * 365) +#define leapyear(year) ((year) % 4 == 0 && \ + ((year) % 100 != 0 || (year) % 400 == 0)) +#define days_in_year(a) (leapyear(a) ? 366 : 365) +#define days_in_month(a) (month_days[(a) - 1]) + +static int month_days[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) + */ +static void GregorianDay(struct rtc_time * tm) +{ + int leapsToDate; + int lastYear; + int day; + int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + + lastYear = tm->tm_year - 1; + + /* + * Number of leap corrections to apply up to end of last year + */ + leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; + + /* + * This year is a leap year if it is divisible by 4 except when it is + * divisible by 100 unless it is divisible by 400 + * + * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was + */ + day = tm->tm_mon > 2 && leapyear(tm->tm_year); + + day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + + tm->tm_mday; + + tm->tm_wday = day % 7; +} + +static void to_tm(int tim, struct rtc_time *tm) +{ + register int i; + register long hms, day; + + day = tim / SECDAY; + hms = tim % SECDAY; + + /* Hours, minutes, seconds are easy */ + tm->tm_hour = hms / 3600; + tm->tm_min = (hms % 3600) / 60; + tm->tm_sec = (hms % 3600) % 60; + + /* Number of years in days */ + for (i = STARTOFTIME; day >= days_in_year(i); i++) + day -= days_in_year(i); + tm->tm_year = i; + + /* Number of months in days left */ + if (leapyear(tm->tm_year)) + days_in_month(FEBRUARY) = 29; + for (i = 1; day >= days_in_month(i); i++) + day -= days_in_month(i); + days_in_month(FEBRUARY) = 28; + tm->tm_mon = i; + + /* Days are what is left over (+1) from all that. */ + tm->tm_mday = day + 1; + + /* + * Determine the day of week + */ + GregorianDay(tm); +} + +/* Both Starfire and SUN4V give us seconds since Jan 1st, 1970, + * aka Unix time. So we have to convert to/from rtc_time. + */ +static inline void mini_get_rtc_time(struct rtc_time *time) +{ + unsigned long flags; + u32 seconds; + + spin_lock_irqsave(&rtc_lock, flags); + seconds = 0; + if (this_is_starfire) + seconds = starfire_get_time(); + else if (tlb_type == hypervisor) + seconds = hypervisor_get_time(); + spin_unlock_irqrestore(&rtc_lock, flags); + + to_tm(seconds, time); + time->tm_year -= 1900; + time->tm_mon -= 1; +} + +static inline int mini_set_rtc_time(struct rtc_time *time) +{ + u32 seconds = mktime(time->tm_year + 1900, time->tm_mon + 1, + time->tm_mday, time->tm_hour, + time->tm_min, time->tm_sec); + unsigned long flags; + int err; + + spin_lock_irqsave(&rtc_lock, flags); + err = -ENODEV; + if (this_is_starfire) + err = starfire_set_time(seconds); + else if (tlb_type == hypervisor) + err = hypervisor_set_time(seconds); + spin_unlock_irqrestore(&rtc_lock, flags); + + return err; +} + +static int mini_rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct rtc_time wtime; + void __user *argp = (void __user *)arg; + + switch (cmd) { + + case RTC_PLL_GET: + return -EINVAL; + + case RTC_PLL_SET: + return -EINVAL; + + case RTC_UIE_OFF: /* disable ints from RTC updates. */ + return 0; + + case RTC_UIE_ON: /* enable ints for RTC updates. */ + return -EINVAL; + + case RTC_RD_TIME: /* Read the time/date from RTC */ + /* this doesn't get week-day, who cares */ + memset(&wtime, 0, sizeof(wtime)); + mini_get_rtc_time(&wtime); + + return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0; + + case RTC_SET_TIME: /* Set the RTC */ + { + int year; + unsigned char leap_yr; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&wtime, argp, sizeof(wtime))) + return -EFAULT; + + year = wtime.tm_year + 1900; + leap_yr = ((!(year % 4) && (year % 100)) || + !(year % 400)); + + if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1)) + return -EINVAL; + + if (wtime.tm_mday < 0 || wtime.tm_mday > + (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr))) + return -EINVAL; + + if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 || + wtime.tm_min < 0 || wtime.tm_min >= 60 || + wtime.tm_sec < 0 || wtime.tm_sec >= 60) + return -EINVAL; + + return mini_set_rtc_time(&wtime); + } + } + + return -EINVAL; +} + +static int mini_rtc_open(struct inode *inode, struct file *file) +{ + if (mini_rtc_status & RTC_IS_OPEN) + return -EBUSY; + + mini_rtc_status |= RTC_IS_OPEN; + + return 0; +} + +static int mini_rtc_release(struct inode *inode, struct file *file) +{ + mini_rtc_status &= ~RTC_IS_OPEN; + return 0; +} + + +static struct file_operations mini_rtc_fops = { + .owner = THIS_MODULE, + .ioctl = mini_rtc_ioctl, + .open = mini_rtc_open, + .release = mini_rtc_release, +}; + +static struct miscdevice rtc_mini_dev = +{ + .minor = RTC_MINOR, + .name = "rtc", + .fops = &mini_rtc_fops, +}; + +static int __init rtc_mini_init(void) +{ + int retval; + + if (tlb_type != hypervisor && !this_is_starfire) + return -ENODEV; + + printk(KERN_INFO "Mini RTC Driver\n"); + + retval = misc_register(&rtc_mini_dev); + if (retval < 0) + return retval; + + return 0; +} + +static void __exit rtc_mini_exit(void) +{ + misc_deregister(&rtc_mini_dev); +} + + +module_init(rtc_mini_init); +module_exit(rtc_mini_exit); diff -urN ./linux-2.6.18.1/arch/v850/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/v850/Kconfig --- ./linux-2.6.18.1/arch/v850/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/v850/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -34,6 +34,10 @@ bool default y +config GENERIC_TIME + bool + default y + config TIME_LOW_RES bool default y diff -urN ./linux-2.6.18.1/arch/v850/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/v850/kernel/time.c --- ./linux-2.6.18.1/arch/v850/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/v850/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -99,81 +99,6 @@ return IRQ_HANDLED; } -/* - * This version of gettimeofday has near microsecond resolution. - */ -void do_gettimeofday (struct timeval *tv) -{ -#if 0 /* DAVIDM later if possible */ - extern volatile unsigned long lost_ticks; - unsigned long lost; -#endif - unsigned long flags; - unsigned long usec, sec; - unsigned long seq; - - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - -#if 0 - usec = mach_gettimeoffset ? mach_gettimeoffset () : 0; -#else - usec = 0; -#endif -#if 0 /* DAVIDM later if possible */ - lost = lost_ticks; - if (lost) - usec += lost * (1000000/HZ); -#endif - sec = xtime.tv_sec; - usec += xtime.tv_nsec / 1000; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq (&xtime_lock); - - /* This is revolting. We need to set the xtime.tv_nsec - * correctly. However, the value in this location is - * is value at the last tick. - * Discover what correction gettimeofday - * would have done, and then undo it! - */ -#if 0 - tv->tv_nsec -= mach_gettimeoffset() * 1000; -#endif - - while (tv->tv_nsec < 0) { - tv->tv_nsec += NSEC_PER_SEC; - tv->tv_sec--; - } - - xtime.tv_sec = tv->tv_sec; - xtime.tv_nsec = tv->tv_nsec; - - ntp_clear(); - - write_sequnlock_irq (&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - static int timer_dev_id; static struct irqaction timer_irqaction = { timer_interrupt, diff -urN ./linux-2.6.18.1/arch/x86_64/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/Kconfig --- ./linux-2.6.18.1/arch/x86_64/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -24,6 +24,14 @@ bool default y +config GENERIC_TIME + bool + default y + +config GENERIC_TIME_VSYSCALL + bool + default y + config LOCKDEP_SUPPORT bool default y @@ -46,13 +54,6 @@ config SBUS bool -config RWSEM_GENERIC_SPINLOCK - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - config GENERIC_HWEIGHT bool default y @@ -289,6 +290,14 @@ If the system is EM64T, you should say N unless your system is EM64T NUMA. +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + bool + config K8_NUMA bool "Old style AMD Opteron NUMA detection" depends on NUMA @@ -659,3 +668,6 @@ source "crypto/Kconfig" source "lib/Kconfig" + +source "kernel/time/Kconfig" + diff -urN ./linux-2.6.18.1/arch/x86_64/ia32/ia32entry.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/ia32/ia32entry.S --- ./linux-2.6.18.1/arch/x86_64/ia32/ia32entry.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/ia32/ia32entry.S 2007-05-19 23:58:35.000000000 +0900 @@ -119,7 +119,9 @@ cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys IA32_ARG_FIXUP 1 + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli @@ -227,7 +229,9 @@ cmpl $IA32_NR_syscalls-1,%eax ja ia32_badsys IA32_ARG_FIXUP 1 + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli @@ -320,8 +324,10 @@ cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys IA32_ARG_FIXUP + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) jmp int_ret_from_sys_call @@ -390,7 +396,7 @@ .section .rodata,"a" .align 8 -ia32_sys_call_table: +ENTRY(ia32_sys_call_table) .quad sys_restart_syscall .quad sys_exit .quad stub32_fork @@ -713,4 +719,7 @@ .quad sys_tee .quad compat_sys_vmsplice .quad compat_sys_move_pages +#ifdef CONFIG_LATENCY_TRACE +.globl ia32_syscall_end +#endif ia32_syscall_end: diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/Makefile --- ./linux-2.6.18.1/arch/x86_64/kernel/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -8,7 +8,7 @@ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - pci-dma.o pci-nommu.o alternative.o + pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce.o diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/apic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/apic.c --- ./linux-2.6.18.1/arch/x86_64/kernel/apic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/apic.c 2007-05-19 23:58:35.000000000 +0900 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -36,9 +37,9 @@ #include #include #include +#include int apic_verbosity; -int apic_runs_main_timer; int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; @@ -52,6 +53,25 @@ /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer __read_mostly = 0; + +static unsigned int calibration_result; + +static void lapic_next_event(unsigned long delta, struct clock_event *evt); +static void lapic_timer_setup(int mode, struct clock_event *evt); + +static struct clock_event lapic_clockevent = { + .name = "lapic", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, +}; +static DEFINE_PER_CPU(struct clock_event, lapic_events); + static void apic_pm_activate(void); void enable_NMI_through_LVT0 (void * dummy) @@ -527,8 +547,7 @@ apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_save_flags(flags); - local_irq_disable(); + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); return 0; @@ -696,13 +715,16 @@ #define APIC_DIVISOR 16 -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot) { unsigned int lvtt_value, tmp_value, ver; int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) lvtt_value |= APIC_LVT_MASKED; @@ -717,48 +739,34 @@ & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write(APIC_TMICT, clocks/APIC_DIVISOR); } -static void setup_APIC_timer(unsigned int clocks) +static void lapic_next_event(unsigned long delta, struct clock_event *evt) +{ + apic_write(APIC_TMICT, delta); +} + +static void lapic_timer_setup(int mode, struct clock_event *evt) { unsigned long flags; local_irq_save(flags); - - /* wait for irq slice */ - if (vxtime.hpet_address && hpet_use_timer) { - int trigger = hpet_readl(HPET_T0_CMP); - while (hpet_readl(HPET_COUNTER) >= trigger) - /* do nothing */ ; - while (hpet_readl(HPET_COUNTER) < trigger) - /* do nothing */ ; - } else { - int c1, c2; - outb_p(0x00, 0x43); - c2 = inb_p(0x40); - c2 |= inb_p(0x40) << 8; - do { - c1 = c2; - outb_p(0x00, 0x43); - c2 = inb_p(0x40); - c2 |= inb_p(0x40) << 8; - } while (c2 - c1 < 300); - } - __setup_APIC_LVTT(clocks); - /* Turn off PIT interrupt if we use APIC timer as main timer. - Only works with the PM timer right now - TBD fix it for HPET too. */ - if (vxtime.mode == VXTIME_PMTMR && - smp_processor_id() == boot_cpu_id && - apic_runs_main_timer == 1 && - !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { - stop_timer_interrupt(); - apic_runs_main_timer++; - } + __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC); local_irq_restore(flags); } + +static void __devinit setup_APIC_timer(void) +{ + struct clock_event *levt = &__get_cpu_var(lapic_events); + + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + + register_local_clockevent(levt); +} + /* * In this function we calibrate APIC bus clocks to the external * timer. Unfortunately we cannot use jiffies and the timer irq @@ -778,12 +786,13 @@ { int apic, apic_start, tsc, tsc_start; int result; + u64 wallclock_nsecs; /* * Put whatever arbitrary (but long enough) timeout * value into the APIC clock, we just want to get the * counter running for calibration. */ - __setup_APIC_LVTT(1000000000); + __setup_APIC_LVTT(1000000000, 0); apic_start = apic_read(APIC_TMCCT); #ifdef CONFIG_X86_PM_TIMER @@ -791,6 +800,8 @@ pmtimer_wait(5000); /* 5ms wait */ apic = apic_read(APIC_TMCCT); result = (apic_start - apic) * 1000L / 5; + printk("using pmtimer for lapic calibration\n"); + wallclock_nsecs = 5000000; } else #endif { @@ -804,6 +815,8 @@ result = (apic_start - apic) * 1000L * cpu_khz / (tsc - tsc_start); + wallclock_nsecs = ((u64)tsc - (u64)tsc_start) * 1000000 / (u64)cpu_khz; + } printk("result %d\n", result); @@ -811,11 +824,22 @@ printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); + + + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(apic_start - apic, wallclock_nsecs, 32); + + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + printk("lapic max_delta_ns: %ld\n", lapic_clockevent.max_delta_ns); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + return result * APIC_DIVISOR / HZ; } -static unsigned int calibration_result; - void __init setup_boot_APIC_clock (void) { if (disable_apic_timer) { @@ -832,7 +856,7 @@ /* * Now set up the timer for real. */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); local_irq_enable(); } @@ -840,7 +864,7 @@ void __cpuinit setup_secondary_APIC_clock(void) { local_irq_disable(); /* FIXME: Do we need this? --RR */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); local_irq_enable(); } @@ -887,6 +911,13 @@ !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { disable_APIC_timer(); cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); +#ifdef CONFIG_HIGH_RES_TIMERS + printk("Disabling NO_HZ and high resolution timers " + "due to timer broadcasting\n"); + for_each_possible_cpu(cpu) + per_cpu(lapic_events, cpu).capabilities &= + ~CLOCK_CAP_NEXTEVT; +#endif } } EXPORT_SYMBOL(switch_APIC_timer_to_ipi); @@ -945,8 +976,6 @@ #ifdef CONFIG_SMP update_process_times(user_mode(regs)); #endif - if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id) - main_timer_handler(regs); /* * We take the 'long' return path, and there every subsystem * grabs the appropriate locks (kernel lock/ irq lock). @@ -969,6 +998,8 @@ */ void smp_apic_timer_interrupt(struct pt_regs *regs) { + int cpu = smp_processor_id(); + struct clock_event *evt = &per_cpu(lapic_events, cpu); /* * the NMI deadlock-detector uses this. */ @@ -986,7 +1017,7 @@ */ exit_idle(); irq_enter(); - smp_local_timer_interrupt(regs); + evt->event_handler(regs); irq_exit(); } @@ -1161,26 +1192,11 @@ return 1; } -static __init int setup_apicmaintimer(char *str) -{ - apic_runs_main_timer = 1; - nohpet = 1; - return 1; -} -__setup("apicmaintimer", setup_apicmaintimer); - -static __init int setup_noapicmaintimer(char *str) -{ - apic_runs_main_timer = -1; - return 1; -} -__setup("noapicmaintimer", setup_noapicmaintimer); - static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return setup_apicmaintimer(NULL); + return 1; } __setup("apicpmtimer", setup_apicpmtimer); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/early_printk.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/early_printk.c --- ./linux-2.6.18.1/arch/x86_64/kernel/early_printk.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/early_printk.c 2007-05-19 23:58:35.000000000 +0900 @@ -203,7 +203,7 @@ void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/entry.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/entry.S --- ./linux-2.6.18.1/arch/x86_64/kernel/entry.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/entry.S 2007-05-19 23:58:35.000000000 +0900 @@ -45,6 +45,47 @@ .code64 +#ifdef CONFIG_LATENCY_TRACE + +ENTRY(mcount) + cmpq $0, mcount_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r11 + push %r10 + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + + pop %rbp +out: + ret + +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif @@ -221,7 +262,9 @@ cmpq $__NR_syscall_max,%rax ja badsys movq %r10,%rcx + TRACE_SYS_CALL call *sys_call_table(,%rax,8) # XXX: rip relative + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) /* * Syscall return path ending with SYSRET (fast path) @@ -255,8 +298,8 @@ /* edx: work, edi: workmask */ sysret_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz sysret_signal TRACE_IRQS_ON sti pushq %rdi @@ -279,7 +322,7 @@ leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi /* Use IRET because user could have changed frame. This works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ cli @@ -303,7 +346,9 @@ cmpq $__NR_syscall_max,%rax ja 1f movq %r10,%rcx /* fixup for C */ + TRACE_SYS_CALL call *sys_call_table(,%rax,8) + TRACE_SYS_RET 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ jmp int_ret_from_sys_call @@ -349,8 +394,8 @@ /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz int_very_careful TRACE_IRQS_ON sti pushq %rdi @@ -387,7 +432,7 @@ movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 call do_notify_resume -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi int_restore_rest: RESTORE_REST cli @@ -585,8 +630,8 @@ /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz retint_signal TRACE_IRQS_ON sti pushq %rdi @@ -612,7 +657,7 @@ RESTORE_REST cli TRACE_IRQS_OFF - movl $_TIF_NEED_RESCHED,%edi + movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi GET_THREAD_INFO(%rcx) jmp retint_check diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/head64.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/head64.c --- ./linux-2.6.18.1/arch/x86_64/kernel/head64.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/head64.c 2007-05-19 23:58:35.000000000 +0900 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ boot_cpu_data.x86_mask = eax & 0xf; } -void __init x86_64_start_kernel(char * real_mode_data) +void __init notrace x86_64_start_kernel(char * real_mode_data) { char *s; int i; @@ -99,6 +100,7 @@ cpu_pda(i) = &boot_cpu_pda[i]; pda_init(0); + copy_bootdata(real_mode_data); #ifdef CONFIG_SMP cpu_set(0, cpu_online_map); @@ -120,5 +122,6 @@ panic("Kernel too big for kernel mapping\n"); setup_boot_cpu_data(); + start_kernel(); } diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/hpet.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/hpet.c --- ./linux-2.6.18.1/arch/x86_64/kernel/hpet.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/hpet.c 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,475 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int nohpet __initdata = 0; + +unsigned long hpet_address; +static unsigned long hpet_period; /* fsecs / HPET clock */ +unsigned long hpet_tick; /* HPET clocks / interrupt */ +int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ + +#define FSEC_PER_TICK (FSEC_PER_SEC / HZ) + +/* + * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing + * it to the HPET timer of known frequency. + */ + +#define TICK_COUNT 100000000 + +unsigned int __init hpet_calibrate_tsc(void) +{ + int tsc_start, hpet_start; + int tsc_now, hpet_now; + unsigned long flags; + + local_irq_save(flags); + local_irq_disable(); + + hpet_start = hpet_readl(HPET_COUNTER); + rdtscl(tsc_start); + + do { + local_irq_disable(); + hpet_now = hpet_readl(HPET_COUNTER); + tsc_now = get_cycles_sync(); + local_irq_restore(flags); + } while ((tsc_now - tsc_start) < TICK_COUNT && + (hpet_now - hpet_start) < TICK_COUNT); + + return (tsc_now - tsc_start) * 1000000000L + / ((hpet_now - hpet_start) * hpet_period / 1000); +} + + + +#ifdef CONFIG_HPET +static __init int late_hpet_init(void) +{ + struct hpet_data hd; + unsigned int ntimer; + + if (!hpet_address) + return 0; + + memset(&hd, 0, sizeof (hd)); + + ntimer = hpet_readl(HPET_ID); + ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; + ntimer++; + + /* + * Register with driver. + * Timer0 and Timer1 is used by platform. + */ + hd.hd_phys_address = hpet_address; + hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); + hd.hd_nirqs = ntimer; + hd.hd_flags = HPET_DATA_PLATFORM; + hpet_reserve_timer(&hd, 0); +#ifdef CONFIG_HPET_EMULATE_RTC + hpet_reserve_timer(&hd, 1); +#endif + hd.hd_irq[0] = HPET_LEGACY_8254; + hd.hd_irq[1] = HPET_LEGACY_RTC; + if (ntimer > 2) { + struct hpet *hpet; + struct hpet_timer *timer; + int i; + + hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); + timer = &hpet->hpet_timers[2]; + for (i = 2; i < ntimer; timer++, i++) + hd.hd_irq[i] = (timer->hpet_config & + Tn_INT_ROUTE_CNF_MASK) >> + Tn_INT_ROUTE_CNF_SHIFT; + + } + + hpet_alloc(&hd); + return 0; +} +fs_initcall(late_hpet_init); +#endif + +static int hpet_timer_stop_set_go(unsigned long tick) +{ + unsigned int cfg; + +/* + * Stop the timers and reset the main counter. + */ + + cfg = hpet_readl(HPET_CFG); + cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); + hpet_writel(cfg, HPET_CFG); + hpet_writel(0, HPET_COUNTER); + hpet_writel(0, HPET_COUNTER + 4); + +/* + * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, + * and period also hpet_tick. + */ + if (hpet_use_timer) { + hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + HPET_TN_32BIT, HPET_T0_CFG); + hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ + hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ + cfg |= HPET_CFG_LEGACY; + } +/* + * Go! + */ + + cfg |= HPET_CFG_ENABLE; + hpet_writel(cfg, HPET_CFG); + + return 0; +} + +int hpet_arch_init(void) +{ + unsigned int id; + + if (!hpet_address) + return -1; + set_fixmap_nocache(FIX_HPET_BASE, hpet_address); + __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); + +/* + * Read the period, compute tick and quotient. + */ + + id = hpet_readl(HPET_ID); + + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) + return -1; + + hpet_period = hpet_readl(HPET_PERIOD); + if (hpet_period < 100000 || hpet_period > 100000000) + return -1; + + hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; + + hpet_use_timer = (id & HPET_ID_LEGSUP); + + return hpet_timer_stop_set_go(hpet_tick); +} + +int hpet_reenable(void) +{ + return hpet_timer_stop_set_go(hpet_tick); +} + +int hpet_stop(void) +{ + return hpet_timer_stop_set_go(0); +} + +#ifdef CONFIG_HPET_EMULATE_RTC +/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET + * is enabled, we support RTC interrupt functionality in software. + * RTC has 3 kinds of interrupts: + * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock + * is updated + * 2) Alarm Interrupt - generate an interrupt at a specific time of day + * 3) Periodic Interrupt - generate periodic interrupt, with frequencies + * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) + * (1) and (2) above are implemented using polling at a frequency of + * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt + * overhead. (DEFAULT_RTC_INT_FREQ) + * For (3), we use interrupts at 64Hz or user specified periodic + * frequency, whichever is higher. + */ +#include + +#define DEFAULT_RTC_INT_FREQ 64 +#define RTC_NUM_INTS 1 + +static unsigned long UIE_on; +static unsigned long prev_update_sec; + +static unsigned long AIE_on; +static struct rtc_time alarm_time; + +static unsigned long PIE_on; +static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; +static unsigned long PIE_count; + +static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ +static unsigned int hpet_t1_cmp; /* cached comparator register */ + +int is_hpet_enabled(void) +{ + return hpet_address != 0; +} + +/* + * Timer 1 for RTC, we do not use periodic interrupt feature, + * even if HPET supports periodic interrupts on Timer 1. + * The reason being, to set up a periodic interrupt in HPET, we need to + * stop the main counter. And if we do that everytime someone diables/enables + * RTC, we will have adverse effect on main kernel timer running on Timer 0. + * So, for the time being, simulate the periodic interrupt in software. + * + * hpet_rtc_timer_init() is called for the first time and during subsequent + * interuppts reinit happens through hpet_rtc_timer_reinit(). + */ +int hpet_rtc_timer_init(void) +{ + unsigned int cfg, cnt; + unsigned long flags; + + if (!is_hpet_enabled()) + return 0; + /* + * Set the counter 1 and enable the interrupts. + */ + if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) + hpet_rtc_int_freq = PIE_freq; + else + hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; + + local_irq_save(flags); + cnt = hpet_readl(HPET_COUNTER); + cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); + hpet_writel(cnt, HPET_T1_CMP); + hpet_t1_cmp = cnt; + local_irq_restore(flags); + + cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_PERIODIC; + cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; + hpet_writel(cfg, HPET_T1_CFG); + + return 1; +} + +static void hpet_rtc_timer_reinit(void) +{ + unsigned int cfg, cnt; + + if (unlikely(!(PIE_on | AIE_on | UIE_on))) { + cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; + hpet_writel(cfg, HPET_T1_CFG); + return; + } + + if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) + hpet_rtc_int_freq = PIE_freq; + else + hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; + + /* It is more accurate to use the comparator value than current count.*/ + cnt = hpet_t1_cmp; + cnt += hpet_tick*HZ/hpet_rtc_int_freq; + hpet_writel(cnt, HPET_T1_CMP); + hpet_t1_cmp = cnt; +} + +/* + * The functions below are called from rtc driver. + * Return 0 if HPET is not being used. + * Otherwise do the necessary changes and return 1. + */ +int hpet_mask_rtc_irq_bit(unsigned long bit_mask) +{ + if (!is_hpet_enabled()) + return 0; + + if (bit_mask & RTC_UIE) + UIE_on = 0; + if (bit_mask & RTC_PIE) + PIE_on = 0; + if (bit_mask & RTC_AIE) + AIE_on = 0; + + return 1; +} + +int hpet_set_rtc_irq_bit(unsigned long bit_mask) +{ + int timer_init_reqd = 0; + + if (!is_hpet_enabled()) + return 0; + + if (!(PIE_on | AIE_on | UIE_on)) + timer_init_reqd = 1; + + if (bit_mask & RTC_UIE) { + UIE_on = 1; + } + if (bit_mask & RTC_PIE) { + PIE_on = 1; + PIE_count = 0; + } + if (bit_mask & RTC_AIE) { + AIE_on = 1; + } + + if (timer_init_reqd) + hpet_rtc_timer_init(); + + return 1; +} + +int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) +{ + if (!is_hpet_enabled()) + return 0; + + alarm_time.tm_hour = hrs; + alarm_time.tm_min = min; + alarm_time.tm_sec = sec; + + return 1; +} + +int hpet_set_periodic_freq(unsigned long freq) +{ + if (!is_hpet_enabled()) + return 0; + + PIE_freq = freq; + PIE_count = 0; + + return 1; +} + +int hpet_rtc_dropped_irq(void) +{ + if (!is_hpet_enabled()) + return 0; + + return 1; +} + +irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct rtc_time curr_time; + unsigned long rtc_int_flag = 0; + int call_rtc_interrupt = 0; + + hpet_rtc_timer_reinit(); + + if (UIE_on | AIE_on) { + rtc_get_rtc_time(&curr_time); + } + if (UIE_on) { + if (curr_time.tm_sec != prev_update_sec) { + /* Set update int info, call real rtc int routine */ + call_rtc_interrupt = 1; + rtc_int_flag = RTC_UF; + prev_update_sec = curr_time.tm_sec; + } + } + if (PIE_on) { + PIE_count++; + if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { + /* Set periodic int info, call real rtc int routine */ + call_rtc_interrupt = 1; + rtc_int_flag |= RTC_PF; + PIE_count = 0; + } + } + if (AIE_on) { + if ((curr_time.tm_sec == alarm_time.tm_sec) && + (curr_time.tm_min == alarm_time.tm_min) && + (curr_time.tm_hour == alarm_time.tm_hour)) { + /* Set alarm int info, call real rtc int routine */ + call_rtc_interrupt = 1; + rtc_int_flag |= RTC_AF; + } + } + if (call_rtc_interrupt) { + rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); + rtc_interrupt(rtc_int_flag, dev_id, regs); + } + return IRQ_HANDLED; +} +#endif + +static int __init nohpet_setup(char *s) +{ + nohpet = 1; + return 1; +} + +__setup("nohpet", nohpet_setup); + +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + +static void *hpet_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)readl(hpet_ptr); +} + +static cycle_t __vsyscall_fn vread_hpet(void) +{ + return (cycle_t)readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .is_continuous = 1, + .vread = vread_hpet, +}; + +static int __init init_hpet_clocksource(void) +{ + unsigned long hpet_period; + void __iomem *hpet_base; + u64 tmp; + + if (!hpet_address) + return -ENODEV; + + /* calculate the hpet address: */ + hpet_base = + (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_ptr = hpet_base + HPET_COUNTER; + + /* calculate the frequency: */ + hpet_period = readl(hpet_base + HPET_PERIOD); + + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + return clocksource_register(&clocksource_hpet); +} + +module_init(init_hpet_clocksource); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/i8259.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/i8259.c --- ./linux-2.6.18.1/arch/x86_64/kernel/i8259.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/i8259.c 2007-05-19 23:58:35.000000000 +0900 @@ -43,17 +43,10 @@ BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ BI(x,c) BI(x,d) BI(x,e) BI(x,f) -#define BUILD_15_IRQS(x) \ - BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ - BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ - BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ - BI(x,c) BI(x,d) BI(x,e) - /* * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: * (these are usually mapped to vectors 0x20-0x2f) */ -BUILD_16_IRQS(0x0) #ifdef CONFIG_X86_LOCAL_APIC /* @@ -66,19 +59,14 @@ * * (these are usually mapped into the 0x30-0xff vector range) */ - BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) + BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) -BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) - -#ifdef CONFIG_PCI_MSI - BUILD_15_IRQS(0xe) -#endif +BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) #endif #undef BUILD_16_IRQS -#undef BUILD_15_IRQS #undef BI @@ -91,26 +79,11 @@ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -#define IRQLIST_15(x) \ - IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ - IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ - IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ - IRQ(x,c), IRQ(x,d), IRQ(x,e) - void (*interrupt[NR_IRQS])(void) = { - IRQLIST_16(0x0), - -#ifdef CONFIG_X86_IO_APIC - IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), + IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), - IRQLIST_16(0xc), IRQLIST_16(0xd) - -#ifdef CONFIG_PCI_MSI - , IRQLIST_15(0xe) -#endif - -#endif + IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) }; #undef IRQ @@ -126,46 +99,21 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); - -static void end_8259A_irq (unsigned int irq) -{ - if (irq > 256) { - char var; - printk("return %p stack %p ti %p\n", __builtin_return_address(0), &var, task_thread_info(current)); - - BUG(); - } - - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) && - irq_desc[irq].action) - enable_8259A_irq(irq); -} - -#define shutdown_8259A_irq disable_8259A_irq - static void mask_and_ack_8259A(unsigned int); -static unsigned int startup_8259A_irq(unsigned int irq) -{ - enable_8259A_irq(irq); - return 0; /* never anything pending */ -} - -static struct hw_interrupt_type i8259A_irq_type = { - .typename = "XT-PIC", - .startup = startup_8259A_irq, - .shutdown = shutdown_8259A_irq, - .enable = enable_8259A_irq, - .disable = disable_8259A_irq, - .ack = mask_and_ack_8259A, - .end = end_8259A_irq, +static struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, }; /* * 8259A PIC functions to handle ISA devices: */ +DEFINE_RAW_SPINLOCK(i8259A_lock); + /* * This contains the irq mask for both 8259A irq controllers, */ @@ -234,7 +182,7 @@ { disable_irq_nosync(irq); io_apic_irqs &= ~(1<= NR_IRQS) - break; if (vector != IA32_SYSCALL_VECTOR) set_intr_gate(vector, interrupt[i]); } @@ -557,7 +525,7 @@ * IRQ0 must be given a fixed assignment and initialized, * because it's used before the IO-APIC is set up. */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + __get_cpu_var(vector_irq)[FIRST_DEVICE_VECTOR] = 0; /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/io_apic.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/io_apic.c --- ./linux-2.6.18.1/arch/x86_64/kernel/io_apic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/io_apic.c 2007-05-19 23:58:35.000000000 +0900 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,9 @@ #include #include #include +#include + +static int assign_irq_vector(int irq, cpumask_t mask); #define __apicdebuginit __init @@ -55,8 +59,8 @@ /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -81,14 +85,6 @@ short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; -#ifdef CONFIG_PCI_MSI -#define vector_to_irq(vector) \ - (platform_legacy_irq(vector) ? vector : vector_irq[vector]) -#else -#define vector_to_irq(vector) (vector) -#endif - #define __DO_ACTION(R, ACTION, FINAL) \ \ { \ @@ -104,6 +100,9 @@ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ + /* Force POST flush by reading: */ \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -112,11 +111,35 @@ } #ifdef CONFIG_SMP +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + int apic, pin; + struct irq_pin_list *entry = irq_2_pin + irq; + + BUG_ON(irq >= NR_IRQS); + for (;;) { + unsigned int reg; + apic = entry->apic; + pin = entry->pin; + if (pin == -1) + break; + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~0x000000ff; + reg |= vector; + io_apic_modify(apic, reg); + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +} + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { unsigned long flags; unsigned int dest; cpumask_t tmp; + int vector; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -124,7 +147,13 @@ cpus_and(mask, tmp, CPU_MASK_ALL); - dest = cpu_mask_to_apicid(mask); + vector = assign_irq_vector(irq, mask); + if (vector < 0) + return; + + cpus_clear(tmp); + cpu_set(vector >> 8, tmp); + dest = cpu_mask_to_apicid(tmp); /* * Only the high 8 bits are valid. @@ -132,14 +161,12 @@ dest = SET_APIC_LOGICAL_ID(dest); spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - set_irq_info(irq, mask); + __target_IO_APIC_irq(irq, dest, vector & 0xff); + set_native_irq_info(irq, mask); spin_unlock_irqrestore(&ioapic_lock, flags); } #endif -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; - /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -170,10 +197,8 @@ static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -695,64 +720,6 @@ return MPBIOS_trigger(idx); } -static int next_irq = 16; - -/* - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number - * from ACPI, which can reach 800 in large boxen. - * - * Compact the sparse GSI space into a sequential IRQ series and reuse - * vectors if possible. - */ -int gsi_irq_sharing(int gsi) -{ - int i, tries, vector; - - BUG_ON(gsi >= NR_IRQ_VECTORS); - - if (platform_legacy_irq(gsi)) - return gsi; - - if (gsi_2_irq[gsi] != 0xFF) - return (int)gsi_2_irq[gsi]; - - tries = NR_IRQS; - try_again: - vector = assign_irq_vector(gsi); - - /* - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous - * use of vector and if found, return that IRQ. However, we never want - * to share legacy IRQs, which usually have a different trigger mode - * than PCI. - */ - for (i = 0; i < NR_IRQS; i++) - if (IO_APIC_VECTOR(i) == vector) - break; - if (platform_legacy_irq(i)) { - if (--tries >= 0) { - IO_APIC_VECTOR(i) = 0; - goto try_again; - } - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); - } - if (i < NR_IRQS) { - gsi_2_irq[gsi] = i; - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", - gsi, vector, i); - return i; - } - - i = next_irq++; - BUG_ON(i >= NR_IRQS); - gsi_2_irq[gsi] = i; - IO_APIC_VECTOR(i) = vector; - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", - gsi, vector, i); - return i; -} - static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -782,7 +749,6 @@ while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; - irq = gsi_irq_sharing(irq); break; } default: @@ -830,46 +796,83 @@ } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; +unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 }; -int assign_irq_vector(int irq) +static int __assign_irq_vector(int irq, cpumask_t mask) { - static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - unsigned long flags; - int vector; - - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); - - spin_lock_irqsave(&vector_lock, flags); - - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { - spin_unlock_irqrestore(&vector_lock, flags); - return IO_APIC_VECTOR(irq); - } + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + static struct { + int vector; + int offset; + } pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} }; + int old_vector = -1; + int cpu; + + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); + + if (IO_APIC_VECTOR(irq) > 0) + old_vector = IO_APIC_VECTOR(irq); + if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) { + return old_vector; + } + + for_each_cpu_mask(cpu, mask) { + int vector, offset; + vector = pos[cpu].vector; + offset = pos[cpu].offset; next: - current_vector += 8; - if (current_vector == IA32_SYSCALL_VECTOR) - goto next; - - if (current_vector >= FIRST_SYSTEM_VECTOR) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - current_vector = FIRST_DEVICE_VECTOR + offset; + vector += 8; + if (vector >= FIRST_SYSTEM_VECTOR) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(pos[cpu].vector == vector)) + continue; + if (vector == IA32_SYSCALL_VECTOR) + goto next; + if (per_cpu(vector_irq, cpu)[vector] != -1) + goto next; + /* Found one! */ + pos[cpu].vector = vector; + pos[cpu].offset = offset; + if (old_vector >= 0) { + int old_cpu = old_vector >> 8; + old_vector &= 0xff; + per_cpu(vector_irq, old_cpu)[old_vector] = -1; + } + per_cpu(vector_irq, cpu)[vector] = irq; + vector |= cpu << 8; + IO_APIC_VECTOR(irq) = vector; + return vector; } + return -ENOSPC; +} - vector = current_vector; - vector_irq[vector] = irq; - if (irq != AUTO_ASSIGN) - IO_APIC_VECTOR(irq) = vector; +static int assign_irq_vector(int irq, cpumask_t mask) +{ + int vector; + unsigned long flags; + spin_lock_irqsave(&vector_lock, flags); + vector = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); - return vector; } extern void (*interrupt[NR_IRQS])(void); -static struct hw_interrupt_type ioapic_level_type; -static struct hw_interrupt_type ioapic_edge_type; + +static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -877,16 +880,16 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { - unsigned idx; - - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - irq_desc[idx].chip = &ioapic_level_type; - else - irq_desc[idx].chip = &ioapic_edge_type; - set_intr_gate(vector, interrupt[idx]); + trigger == IOAPIC_LEVEL) { +#ifdef CONFIG_PREEMPT_HARDIRQS + set_irq_chip_and_handler(irq, &ioapic_chip, handle_level_irq); +#else + set_irq_chip_and_handler(irq, &ioapic_chip, handle_fasteoi_irq); +#endif + } else { + set_irq_chip_and_handler(irq, &ioapic_chip, handle_edge_irq); + } } static void __init setup_IO_APIC_irqs(void) @@ -936,8 +939,15 @@ continue; if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; + cpumask_t mask; + vector = assign_irq_vector(irq, TARGET_CPUS); + if (vector < 0) + continue; + + cpus_clear(mask); + cpu_set(vector >> 8, mask); + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.vector = vector & 0xff; ioapic_register_intr(irq, vector, IOAPIC_AUTO); if (!apic && (irq < 16)) @@ -987,7 +997,7 @@ * The timer IRQ doesn't have to know that behind the * scene we have a 8259A-master in AEOI mode ... */ - irq_desc[0].chip = &ioapic_edge_type; + set_irq_chip_and_handler(0, &ioapic_chip, handle_edge_irq); /* * Add it to the IO-APIC irq-routing table: @@ -1106,17 +1116,12 @@ ); } } - if (use_pci_vector()) - printk(KERN_INFO "Using vector-based indexing\n"); printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < NR_IRQS; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; - if (use_pci_vector() && !platform_legacy_irq(i)) - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); - else - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -1502,7 +1507,7 @@ * an edge even if it isn't on the 8259A... */ -static unsigned int startup_edge_ioapic_irq(unsigned int irq) +static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; @@ -1519,107 +1524,16 @@ return was_pending; } -/* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled - * devices. - */ -static void ack_edge_ioapic_irq(unsigned int irq) -{ - move_irq(irq); - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) - == (IRQ_PENDING | IRQ_DISABLED)) - mask_IO_APIC_irq(irq); - ack_APIC_irq(); -} - -/* - * Level triggered interrupts can just be masked, - * and shutting down and starting up the interrupt - * is the same as enabling and disabling them -- except - * with a startup need to return a "was pending" value. - * - * Level triggered interrupts are special because we - * do not touch any IO-APIC register while handling - * them. We ack the APIC in the end-IRQ handler, not - * in the start-IRQ-handler. Protection against reentrance - * from the same interrupt is still provided, both by the - * generic IRQ layer and by the fact that an unacked local - * APIC does not accept IRQs. - */ -static unsigned int startup_level_ioapic_irq (unsigned int irq) -{ - unmask_IO_APIC_irq(irq); - - return 0; /* don't check for pending */ -} - -static void end_level_ioapic_irq (unsigned int irq) -{ - move_irq(irq); - ack_APIC_irq(); -} - -#ifdef CONFIG_PCI_MSI -static unsigned int startup_edge_ioapic_vector(unsigned int vector) +static int ioapic_retrigger_irq(unsigned int irq) { - int irq = vector_to_irq(vector); + cpumask_t mask; + unsigned vector; - return startup_edge_ioapic_irq(irq); -} + vector = irq_vector[irq]; + cpus_clear(mask); + cpu_set(vector >> 8, mask); -static void ack_edge_ioapic_vector(unsigned int vector) -{ - int irq = vector_to_irq(vector); - - move_native_irq(vector); - ack_edge_ioapic_irq(irq); -} - -static unsigned int startup_level_ioapic_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - return startup_level_ioapic_irq (irq); -} - -static void end_level_ioapic_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - move_native_irq(vector); - end_level_ioapic_irq(irq); -} - -static void mask_IO_APIC_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - mask_IO_APIC_irq(irq); -} - -static void unmask_IO_APIC_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - unmask_IO_APIC_irq(irq); -} - -#ifdef CONFIG_SMP -static void set_ioapic_affinity_vector (unsigned int vector, - cpumask_t cpu_mask) -{ - int irq = vector_to_irq(vector); - - set_native_irq_info(vector, cpu_mask); - set_ioapic_affinity_irq(irq, cpu_mask); -} -#endif // CONFIG_SMP -#endif // CONFIG_PCI_MSI - -static int ioapic_retrigger(unsigned int irq) -{ - send_IPI_self(IO_APIC_VECTOR(irq)); + send_IPI_mask(mask, vector & 0xff); return 1; } @@ -1633,32 +1547,47 @@ * races. */ -static struct hw_interrupt_type ioapic_edge_type __read_mostly = { - .typename = "IO-APIC-edge", - .startup = startup_edge_ioapic, - .shutdown = shutdown_edge_ioapic, - .enable = enable_edge_ioapic, - .disable = disable_edge_ioapic, - .ack = ack_edge_ioapic, - .end = end_edge_ioapic, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity, +static void ack_apic_edge(unsigned int irq) +{ + move_native_irq(irq); + ack_APIC_irq(); +} + +static void ack_apic_level(unsigned int irq) +{ + int do_unmask_irq = 0; + +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } #endif - .retrigger = ioapic_retrigger, -}; -static struct hw_interrupt_type ioapic_level_type __read_mostly = { - .typename = "IO-APIC-level", - .startup = startup_level_ioapic, - .shutdown = shutdown_level_ioapic, - .enable = enable_level_ioapic, - .disable = disable_level_ioapic, - .ack = mask_and_ack_level_ioapic, - .end = end_level_ioapic, + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propogate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + move_masked_irq(irq); + if (unlikely(do_unmask_irq)) + unmask_IO_APIC_irq(irq); +} + +static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity_irq, #endif - .retrigger = ioapic_retrigger, + .retrigger = ioapic_retrigger_irq, }; static inline void init_IO_APIC_traps(void) @@ -1678,11 +1607,6 @@ */ for (irq = 0; irq < NR_IRQS ; irq++) { int tmp = irq; - if (use_pci_vector()) { - if (!platform_legacy_irq(tmp)) - if ((tmp = vector_to_irq(tmp)) == -1) - continue; - } if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { /* * Hmm.. We don't have an entry for this, @@ -1693,7 +1617,7 @@ make_8259A_irq(irq); else /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_type; + irq_desc[irq].chip = &no_irq_chip; } } } @@ -1812,8 +1736,6 @@ spin_unlock_irqrestore(&ioapic_lock, flags); } -int timer_uses_ioapic_pin_0; - /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -1831,8 +1753,7 @@ * get/set the timer IRQ vector: */ disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); + vector = assign_irq_vector(0, TARGET_CPUS); /* * Subtle, code in do_timer_interrupt() expects an AEOI @@ -1851,9 +1772,6 @@ pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - if (pin1 == 0) - timer_uses_ioapic_pin_0 = 1; - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); @@ -2069,6 +1987,124 @@ device_initcall(ioapic_init_sysfs); +/* + * Dynamic irq allocate and deallocation + */ +int create_irq(void) +{ + /* Allocate an unused irq */ + int irq; + int new; + int vector = 0; + unsigned long flags; + + irq = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for (new = (NR_IRQS - 1); new >= 0; new--) { + if (platform_legacy_irq(new)) + continue; + if (irq_vector[new] != 0) + continue; + vector = __assign_irq_vector(new, TARGET_CPUS); + if (likely(vector > 0)) + irq = new; + break; + } + spin_unlock_irqrestore(&vector_lock, flags); + + if (irq >= 0) { + dynamic_irq_init(irq); + } + return irq; +} + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + dynamic_irq_cleanup(irq); + + spin_lock_irqsave(&vector_lock, flags); + irq_vector[irq] = 0; + spin_unlock_irqrestore(&vector_lock, flags); +} + +/* + * MSI mesage composition + */ +#ifdef CONFIG_PCI_MSI +static int msi_msg_setup(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + /* For now always this code always uses physical delivery + * mode. + */ + int vector; + unsigned dest; + + vector = assign_irq_vector(irq, TARGET_CPUS); + if (vector >= 0) { + cpumask_t tmp; + + cpus_clear(tmp); + cpu_set(vector >> 8, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(vector); + } + return vector; +} + +static void msi_msg_teardown(unsigned int irq) +{ + return; +} + +static void msi_msg_set_affinity(unsigned int irq, cpumask_t mask, struct msi_msg *msg) +{ + int vector; + unsigned dest; + + vector = assign_irq_vector(irq, mask); + if (vector > 0) { + cpumask_t tmp; + + cpus_clear(tmp); + cpu_set(vector >> 8, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg->data &= ~MSI_DATA_VECTOR_MASK; + msg->data |= MSI_DATA_VECTOR(vector); + msg->address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg->address_lo |= MSI_ADDR_DEST_ID(dest); + } +} + +struct msi_ops arch_msi_ops = { + .needs_64bit_address = 0, + .setup = msi_msg_setup, + .teardown = msi_msg_teardown, + .target = msi_msg_set_affinity, +}; + +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -2107,6 +2143,8 @@ { struct IO_APIC_route_entry entry; unsigned long flags; + int vector; + cpumask_t mask; if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", @@ -2115,6 +2153,20 @@ } /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); + + + vector = assign_irq_vector(irq, TARGET_CPUS); + if (vector < 0) + return vector; + + cpus_clear(mask); + cpu_set(vector >> 8, mask); + + /* * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. * Note that we mask (disable) IRQs now -- these get enabled when the * corresponding device driver registers for this IRQ. @@ -2124,19 +2176,11 @@ entry.delivery_mode = INT_DELIVERY_MODE; entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); entry.trigger = triggering; entry.polarity = polarity; entry.mask = 1; /* Disabled (masked) */ - - irq = gsi_irq_sharing(irq); - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - entry.vector = assign_irq_vector(irq); + entry.vector = vector & 0xff; apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", ioapic, @@ -2151,7 +2195,7 @@ spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/irq.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/irq.c --- ./linux-2.6.18.1/arch/x86_64/kernel/irq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/irq.c 2007-05-19 23:58:35.000000000 +0900 @@ -79,7 +79,8 @@ for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].chip->typename); + seq_printf(p, " %8s", irq_desc[i].chip->name); + seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq)); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) @@ -116,7 +117,18 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) { /* high bit used in ret_from_ code */ - unsigned irq = ~regs->orig_rax; + unsigned vector = ~regs->orig_rax; + unsigned irq; + + exit_idle(); + irq_enter(); + irq = __get_cpu_var(vector_irq)[vector]; + +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->rip, irq, 0); if (unlikely(irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", @@ -124,12 +136,24 @@ BUG(); } - exit_idle(); - irq_enter(); #ifdef CONFIG_DEBUG_STACKOVERFLOW stack_overflow_check(regs); #endif - __do_IRQ(irq, regs); +#ifdef CONFIG_NO_HZ + if (idle_cpu(smp_processor_id())) { + update_jiffies(); + /* + * Force polling-idle loops to break out into + * the sched-timer setting code, to make sure + * that timer interval changes due to __mod_timer() + * in IRQ context get properly propagated: + */ + if (tsk_is_polling(current)) + set_need_resched(); + } +#endif + + generic_handle_irq(irq, regs); irq_exit(); return 1; diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/mpparse.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/mpparse.c --- ./linux-2.6.18.1/arch/x86_64/kernel/mpparse.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/mpparse.c 2007-05-19 23:58:35.000000000 +0900 @@ -909,20 +909,11 @@ return; } -#define MAX_GSI_NUM 4096 - int mp_register_gsi(u32 gsi, int triggering, int polarity) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; - static int pci_irq = 16; - /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, to the IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -955,42 +946,11 @@ if ((1< 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_fadt.sci_int) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/nmi.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/nmi.c --- ./linux-2.6.18.1/arch/x86_64/kernel/nmi.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/nmi.c 2007-05-19 23:58:35.000000000 +0900 @@ -37,7 +37,7 @@ * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); +static DEFINE_RAW_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -127,7 +127,9 @@ static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; +#ifndef CONFIG_PREEMPT_RT local_irq_enable_in_hardirq(); +#endif /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -526,12 +528,42 @@ touch_softlockup_watchdog(); } +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = safe_smp_processor_id(); sum = read_pda(apic_timer_irqs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -549,6 +581,11 @@ */ local_inc(&__get_cpu_var(alert_counter)); if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for (i = 0; i < NR_CPUS; i++) + nmi_show_regs[i] = 1; + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/pmtimer.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/pmtimer.c --- ./linux-2.6.18.1/arch/x86_64/kernel/pmtimer.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/pmtimer.c 2007-05-19 23:58:35.000000000 +0900 @@ -24,15 +24,6 @@ #include #include -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/kernel/acpi/boot.c */ -u32 pmtmr_ioport __read_mostly; - -/* value of the Power timer at last timer interrupt */ -static u32 offset_delay; -static u32 last_pmtmr_tick; - #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ static inline u32 cyc2us(u32 cycles) @@ -48,38 +39,6 @@ return (cycles >> 10); } -int pmtimer_mark_offset(void) -{ - static int first_run = 1; - unsigned long tsc; - u32 lost; - - u32 tick = inl(pmtmr_ioport); - u32 delta; - - delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); - - last_pmtmr_tick = tick; - monotonic_base += delta * NSEC_PER_USEC; - - delta += offset_delay; - - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - rdtscll(tsc); - vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } - - return lost - 1; -} - static unsigned pmtimer_wait_tick(void) { u32 a, b; @@ -101,23 +60,6 @@ } while (cyc2us(b - a) < us); } -void pmtimer_resume(void) -{ - last_pmtmr_tick = inl(pmtmr_ioport); -} - -unsigned int do_gettimeoffset_pm(void) -{ - u32 now, offset, delta = 0; - - offset = last_pmtmr_tick; - now = inl(pmtmr_ioport); - delta = (now - offset) & ACPI_PM_MASK; - - return offset_delay + cyc2us(delta); -} - - static int __init nopmtimer_setup(char *s) { pmtmr_ioport = 0; diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/process.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/process.c --- ./linux-2.6.18.1/arch/x86_64/kernel/process.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/process.c 2007-05-19 23:58:35.000000000 +0900 @@ -113,11 +113,15 @@ current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { local_irq_disable(); - if (!need_resched()) - safe_halt(); - else + if (!need_resched() && !need_resched_delayed()) { + if (!hrtimer_stop_sched_tick()) + safe_halt(); + else + local_irq_enable(); + hrtimer_restart_sched_tick(); + } else local_irq_enable(); } current_thread_info()->status |= TS_POLLING; @@ -131,6 +135,14 @@ static void poll_idle (void) { local_irq_enable(); + while (!need_resched() && !need_resched_delayed()) { + hrtimer_stop_sched_tick(); + local_irq_enable(); + while (!need_resched() && !need_resched_delayed() && !rcu_pending(smp_processor_id()) && !local_softirq_pending()) + rep_nop(); + hrtimer_restart_sched_tick(); + local_irq_enable(); + } asm volatile( "2:" @@ -206,7 +218,9 @@ current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -218,14 +232,16 @@ idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + stop_critical_timing(); enter_idle(); idle(); __exit_idle(); } - - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } @@ -240,13 +256,16 @@ { local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { + if (hrtimer_stop_sched_tick()) + break; __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (need_resched()) + if (need_resched() && !need_resched_delayed()) break; __mwait(0, 0); } + hrtimer_restart_sched_tick(); } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) @@ -346,13 +365,14 @@ struct thread_struct *t = &me->thread; if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; /* * Careful, clear this in the TSS too: */ + tss = &per_cpu(init_tss, get_cpu()); memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/setup64.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/setup64.c --- ./linux-2.6.18.1/arch/x86_64/kernel/setup64.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/setup64.c 2007-05-19 23:58:35.000000000 +0900 @@ -116,7 +116,7 @@ } } -void pda_init(int cpu) +void notrace pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -185,7 +185,7 @@ * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __cpuinit cpu_init (void) +void __cpuinit notrace cpu_init (void) { int cpu = stack_smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/signal.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/signal.c --- ./linux-2.6.18.1/arch/x86_64/kernel/signal.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/signal.c 2007-05-19 23:58:35.000000000 +0900 @@ -431,6 +431,13 @@ siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/smp.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/smp.c --- ./linux-2.6.18.1/arch/x86_64/kernel/smp.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/smp.c 2007-05-19 23:58:35.000000000 +0900 @@ -57,7 +57,7 @@ struct mm_struct *flush_mm; unsigned long flush_va; #define FLUSH_ALL -1ULL - spinlock_t tlbstate_lock; + raw_spinlock_t tlbstate_lock; }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned; @@ -296,10 +296,20 @@ } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/smpboot.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/smpboot.c --- ./linux-2.6.18.1/arch/x86_64/kernel/smpboot.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/smpboot.c 2007-05-19 23:58:35.000000000 +0900 @@ -204,7 +204,7 @@ latency and low latency is the primary objective here. -AK */ #define no_cpu_relax() barrier() -static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); +static __cpuinitdata __DEFINE_RAW_SPINLOCK(tsc_sync_lock); static volatile __cpuinitdata unsigned long go[SLAVE + 1]; static int notscsync __cpuinitdata; @@ -530,7 +530,7 @@ /* * Setup code on secondary processor (after comming out of the trampoline) */ -void __cpuinit start_secondary(void) +void __cpuinit notrace start_secondary(void) { /* * Dont put anything before smp_callin(), SMP diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/time.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/time.c --- ./linux-2.6.18.1/arch/x86_64/kernel/time.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/time.c 2007-05-19 23:58:35.000000000 +0900 @@ -39,149 +39,29 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif +#include -#ifdef CONFIG_CPU_FREQ -static void cpufreq_delayed_get(void); -#endif extern void i8254_timer_resume(void); extern int using_apic_timer; +extern struct clock_event pit_clockevent; -static char *time_init_gtod(void); DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); - -int nohpet __initdata = 0; -static int notsc __initdata = 0; +DEFINE_RAW_SPINLOCK(i8253_lock); #define USEC_PER_TICK (USEC_PER_SEC / HZ) #define NSEC_PER_TICK (NSEC_PER_SEC / HZ) -#define FSEC_PER_TICK (FSEC_PER_SEC / HZ) -#define NS_SCALE 10 /* 2^10, carefully chosen */ -#define US_SCALE 32 /* 2^32, arbitralrily chosen */ -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ -EXPORT_SYMBOL(cpu_khz); -static unsigned long hpet_period; /* fsecs / HPET clock */ -unsigned long hpet_tick; /* HPET clocks / interrupt */ -int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ -unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ -unsigned long long monotonic_base; - -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ - -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; -struct timespec __xtime __section_xtime; -struct timezone __sys_tz __section_sys_tz; - -/* - * do_gettimeoffset() returns microseconds since last timer interrupt was - * triggered by hardware. A memory read of HPET is slower than a register read - * of TSC, but much more reliable. It's also synchronized to the timer - * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a - * timer interrupt has happened already, but vxtime.trigger wasn't updated yet. - * This is not a problem, because jiffies hasn't updated either. They are bound - * together by xtime_lock. - */ - -static inline unsigned int do_gettimeoffset_tsc(void) -{ - unsigned long t; - unsigned long x; - t = get_cycles_sync(); - if (t < vxtime.last_tsc) - t = vxtime.last_tsc; /* hack */ - x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE; - return x; -} - -static inline unsigned int do_gettimeoffset_hpet(void) -{ - /* cap counter read to one tick to avoid inconsistencies */ - unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; - return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE; -} - -unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; - -/* - * This version of gettimeofday() has microsecond resolution and better than - * microsecond precision, as we're using at least a 10 MHz (usually 14.31818 - * MHz) HPET timer. - */ - -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq, t; - unsigned int sec, usec; - - do { - seq = read_seqbegin(&xtime_lock); - - sec = xtime.tv_sec; - usec = xtime.tv_nsec / NSEC_PER_USEC; - - /* i386 does some correction here to keep the clock - monotonous even when ntpd is fixing drift. - But they didn't work for me, there is a non monotonic - clock anyways with ntp. - I dropped all corrections now until a real solution can - be found. Note when you fix it here you need to do the same - in arch/x86_64/kernel/vsyscall.c and export all needed - variables in vmlinux.lds. -AK */ - - t = (jiffies - wall_jiffies) * USEC_PER_TICK + - do_gettimeoffset(); - usec += t; - - } while (read_seqretry(&xtime_lock, seq)); - - tv->tv_sec = sec + usec / USEC_PER_SEC; - tv->tv_usec = usec % USEC_PER_SEC; -} - -EXPORT_SYMBOL(do_gettimeofday); - -/* - * settimeofday() first undoes the correction that gettimeofday would do - * on the time, and then saves it. This is ugly, but has been like this for - * ages already. - */ - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - - nsec -= do_gettimeoffset() * NSEC_PER_USEC + - (jiffies - wall_jiffies) * NSEC_PER_TICK; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} -EXPORT_SYMBOL(do_settimeofday); +volatile unsigned long jiffies = INITIAL_JIFFIES; unsigned long profile_pc(struct pt_regs *regs) { @@ -277,84 +157,9 @@ } -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - unsigned long seq; - u32 last_offset, this_offset, offset; - unsigned long long base; - - if (vxtime.mode == VXTIME_HPET) { - do { - seq = read_seqbegin(&xtime_lock); - - last_offset = vxtime.last; - base = monotonic_base; - this_offset = hpet_readl(HPET_COUNTER); - } while (read_seqretry(&xtime_lock, seq)); - offset = (this_offset - last_offset); - offset *= NSEC_PER_TICK / hpet_tick; - } else { - do { - seq = read_seqbegin(&xtime_lock); - - last_offset = vxtime.last_tsc; - base = monotonic_base; - } while (read_seqretry(&xtime_lock, seq)); - this_offset = get_cycles_sync(); - /* FIXME: 1000 or 1000000? */ - offset = (this_offset - last_offset)*1000 / cpu_khz; - } - return base + offset; -} -EXPORT_SYMBOL(monotonic_clock); - -static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) -{ - static long lost_count; - static int warned; - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " - "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - if (hpet_use_timer) - vxtime.last = hpet_readl(HPET_T0_CMP) - - hpet_tick; - else - vxtime.last = hpet_readl(HPET_COUNTER); - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; - -#ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) - cpufreq_delayed_get(); -#endif -} - void main_timer_handler(struct pt_regs *regs) { static unsigned long rtc_update = 0; - unsigned long tsc; - int delay = 0, offset = 0, lost = 0; - /* * Here we are in the timer irq handler. We have irqs locally disabled (so we * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running @@ -362,92 +167,11 @@ * variables, because both do_timer() and us change them -arca+vojtech */ - write_seqlock(&xtime_lock); - - if (vxtime.hpet_address) - offset = hpet_readl(HPET_COUNTER); - - if (hpet_use_timer) { - /* if we're using the hpet timer functionality, - * we can more accurately know the counter value - * when the timer interrupt occured. - */ - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - delay = hpet_readl(HPET_COUNTER) - offset; - } else if (!pmtmr_ioport) { - spin_lock(&i8253_lock); - outb_p(0x00, 0x43); - delay = inb_p(0x40); - delay |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - delay = LATCH - 1 - delay; - } - - tsc = get_cycles_sync(); - - if (vxtime.mode == VXTIME_HPET) { - if (offset - vxtime.last > hpet_tick) { - lost = (offset - vxtime.last) / hpet_tick - 1; - } - - monotonic_base += - (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick; - - vxtime.last = offset; -#ifdef CONFIG_X86_PM_TIMER - } else if (vxtime.mode == VXTIME_PMTMR) { - lost = pmtimer_mark_offset(); -#endif - } else { - offset = (((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK; - - if (offset < 0) - offset = 0; - - if (offset > USEC_PER_TICK) { - lost = offset / USEC_PER_TICK; - offset %= USEC_PER_TICK; - } - - /* FIXME: 1000 or 1000000? */ - monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz; - - vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; - - if ((((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> US_SCALE) < offset) - vxtime.last_tsc = tsc - - (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; - } - - if (lost > 0) { - handle_lost_ticks(lost, regs); - jiffies += lost; - } - /* * Do the timer stuff. */ - do_timer(regs); -#ifndef CONFIG_SMP - update_process_times(user_mode(regs)); -#endif - -/* - * In the SMP case we use the local APIC timer interrupt to do the profiling, - * except when we simulate SMP mode on a uniprocessor system, in that case we - * have to call the local interrupt handler. - */ - -#ifndef CONFIG_X86_LOCAL_APIC - profile_tick(CPU_PROFILING, regs); -#else - if (!using_apic_timer) - smp_local_timer_interrupt(regs); -#endif - + pit_clockevent.event_handler(regs); /* * If we have an externally synchronized Linux clock, then update CMOS clock * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy @@ -462,13 +186,10 @@ rtc_update = xtime.tv_sec + 660; } - write_sequnlock(&xtime_lock); } static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - if (apic_runs_main_timer > 1) - return IRQ_HANDLED; main_timer_handler(regs); #ifdef CONFIG_X86_LOCAL_APIC if (using_apic_timer) @@ -477,39 +198,6 @@ return IRQ_HANDLED; } -static unsigned int cyc2ns_scale __read_mostly; - -static inline void set_cyc2ns_scale(unsigned long cpu_khz) -{ - cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> NS_SCALE; -} - -unsigned long long sched_clock(void) -{ - unsigned long a = 0; - -#if 0 - /* Don't do a HPET read here. Using TSC always is much faster - and HPET may not be mapped yet when the scheduler first runs. - Disadvantage is a small drift between CPUs in some configurations, - but that should be tolerable. */ - if (__vxtime.mode == VXTIME_HPET) - return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE; -#endif - - /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, - which means it is not completely exact and may not be monotonous between - CPUs. But the errors should be too small to matter for scheduling - purposes. */ - - rdtscll(a); - return cycles_2_ns(a); -} static unsigned long get_cmos_time(void) { @@ -562,142 +250,6 @@ return mktime(year, mon, day, hour, min, sec); } -#ifdef CONFIG_CPU_FREQ - -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency - changes. - - RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - not that important because current Opteron setups do not support - scaling on SMP anyroads. - - Should fix up last_tsc too. Currently gettimeofday in the - first tick after the change will be slightly wrong. */ - -#include - -static unsigned int cpufreq_delayed_issched = 0; -static unsigned int cpufreq_init = 0; -static struct work_struct cpufreq_delayed_get_work; - -static void handle_cpufreq_delayed_get(void *v) -{ - unsigned int cpu; - for_each_online_cpu(cpu) { - cpufreq_get(cpu); - } - cpufreq_delayed_issched = 0; -} - -/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries - * to verify the CPU frequency the timing core thinks the CPU is running - * at is still correct. - */ -static void cpufreq_delayed_get(void) -{ - static int warned; - if (cpufreq_init && !cpufreq_delayed_issched) { - cpufreq_delayed_issched = 1; - if (!warned) { - warned = 1; - printk(KERN_DEBUG - "Losing some ticks... checking if CPU frequency changed.\n"); - } - schedule_work(&cpufreq_delayed_get_work); - } -} - -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; - -static unsigned long cpu_khz_ref = 0; - -static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; - - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) - return 0; - - lpj = &dummy; - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) -#ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; -#else - lpj = &boot_cpu_data.loops_per_jiffy; -#endif - - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; - cpu_khz_ref = cpu_khz; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - *lpj = - cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); - - cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; - } - - set_cyc2ns_scale(cpu_khz_ref); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); - if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER)) - cpufreq_init = 1; - return 0; -} - -core_initcall(cpufreq_tsc); - -#endif - -/* - * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing - * it to the HPET timer of known frequency. - */ - -#define TICK_COUNT 100000000 - -static unsigned int __init hpet_calibrate_tsc(void) -{ - int tsc_start, hpet_start; - int tsc_now, hpet_now; - unsigned long flags; - - local_irq_save(flags); - local_irq_disable(); - - hpet_start = hpet_readl(HPET_COUNTER); - rdtscl(tsc_start); - - do { - local_irq_disable(); - hpet_now = hpet_readl(HPET_COUNTER); - tsc_now = get_cycles_sync(); - local_irq_restore(flags); - } while ((tsc_now - tsc_start) < TICK_COUNT && - (hpet_now - hpet_start) < TICK_COUNT); - - return (tsc_now - tsc_start) * 1000000000L - / ((hpet_now - hpet_start) * hpet_period / 1000); -} - /* * pit_calibrate_tsc() uses the speaker output (channel 2) of @@ -728,137 +280,84 @@ return (end - start) / 50; } -#ifdef CONFIG_HPET -static __init int late_hpet_init(void) -{ - struct hpet_data hd; - unsigned int ntimer; - - if (!vxtime.hpet_address) - return 0; - - memset(&hd, 0, sizeof (hd)); - - ntimer = hpet_readl(HPET_ID); - ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; - ntimer++; - - /* - * Register with driver. - * Timer0 and Timer1 is used by platform. - */ - hd.hd_phys_address = vxtime.hpet_address; - hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); - hd.hd_nirqs = ntimer; - hd.hd_flags = HPET_DATA_PLATFORM; - hpet_reserve_timer(&hd, 0); -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif - hd.hd_irq[0] = HPET_LEGACY_8254; - hd.hd_irq[1] = HPET_LEGACY_RTC; - if (ntimer > 2) { - struct hpet *hpet; - struct hpet_timer *timer; - int i; - - hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - timer = &hpet->hpet_timers[2]; - for (i = 2; i < ntimer; timer++, i++) - hd.hd_irq[i] = (timer->hpet_config & - Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; +#define PIT_MODE 0x43 +#define PIT_CH0 0x40 - } +static void __init __pit_init(int val, u8 mode) +{ + unsigned long flags; - hpet_alloc(&hd); - return 0; + spin_lock_irqsave(&i8253_lock, flags); + outb_p(mode, PIT_MODE); + outb_p(val & 0xff, PIT_CH0); /* LSB */ + outb_p(val >> 8, PIT_CH0); /* MSB */ + spin_unlock_irqrestore(&i8253_lock, flags); } -fs_initcall(late_hpet_init); -#endif -static int hpet_timer_stop_set_go(unsigned long tick) +static void init_pit_timer(int mode, struct clock_event *evt) { - unsigned int cfg; - -/* - * Stop the timers and reset the main counter. - */ + unsigned long flags; - cfg = hpet_readl(HPET_CFG); - cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); - hpet_writel(cfg, HPET_CFG); - hpet_writel(0, HPET_COUNTER); - hpet_writel(0, HPET_COUNTER + 4); + spin_lock_irqsave(&i8253_lock, flags); -/* - * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, - * and period also hpet_tick. - */ - if (hpet_use_timer) { - hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | - HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ - hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ - cfg |= HPET_CFG_LEGACY; + switch(mode) { + case CLOCK_EVT_PERIODIC: + /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(0x34, PIT_MODE); + udelay(10); + outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ + outb(LATCH >> 8 , PIT_CH0); /* MSB */ + break; + + case CLOCK_EVT_ONESHOT: + /* One shot setup */ + outb_p(0x38, PIT_MODE); + udelay(10); + break; + case CLOCK_EVT_SHUTDOWN: + outb_p(0x30, PIT_MODE); + outb_p(0, PIT_CH0); /* LSB */ + outb_p(0, PIT_CH0); /* MSB */ + disable_irq(0); + break; } -/* - * Go! - */ - - cfg |= HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); - - return 0; + spin_unlock_irqrestore(&i8253_lock, flags); } -static int hpet_init(void) +static void pit_next_event(unsigned long delta, struct clock_event *evt) { - unsigned int id; - - if (!vxtime.hpet_address) - return -1; - set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address); - __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); - -/* - * Read the period, compute tick and quotient. - */ - - id = hpet_readl(HPET_ID); - - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) - return -1; - - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < 100000 || hpet_period > 100000000) - return -1; - - hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; - - hpet_use_timer = (id & HPET_ID_LEGSUP); + unsigned long flags; - return hpet_timer_stop_set_go(hpet_tick); + spin_lock_irqsave(&i8253_lock, flags); + outb_p(delta & 0xff , PIT_CH0); /* LSB */ + outb(delta >> 8 , PIT_CH0); /* MSB */ + spin_unlock_irqrestore(&i8253_lock, flags); } -static int hpet_reenable(void) +struct clock_event pit_clockevent = { + .name = "pit", + .capabilities = CLOCK_CAP_TICK | CLOCK_CAP_PROFILE | CLOCK_CAP_UPDATE +#ifndef CONFIG_SMP + | CLOCK_CAP_NEXTEVT +#endif + , + .set_mode = init_pit_timer, + .set_next_event = pit_next_event, + .shift = 32, +}; + +void setup_pit_timer(void) { - return hpet_timer_stop_set_go(hpet_tick); + pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); + pit_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFF, &pit_clockevent); + pit_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &pit_clockevent); + register_global_clockevent(&pit_clockevent); } -#define PIT_MODE 0x43 -#define PIT_CH0 0x40 -static void __init __pit_init(int val, u8 mode) -{ - unsigned long flags; - spin_lock_irqsave(&i8253_lock, flags); - outb_p(mode, PIT_MODE); - outb_p(val & 0xff, PIT_CH0); /* LSB */ - outb_p(val >> 8, PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); -} void __init pit_init(void) { @@ -873,9 +372,9 @@ void __init stop_timer_interrupt(void) { char *name; - if (vxtime.hpet_address) { + if (hpet_address) { name = "HPET"; - hpet_timer_stop_set_go(0); + hpet_stop(); } else { name = "PIT"; pit_stop_interrupt(); @@ -890,119 +389,47 @@ } static struct irqaction irq0 = { - timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL + timer_interrupt, IRQF_DISABLED | IRQF_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL }; void __init time_init(void) { char *timename; - char *gtod; if (nohpet) - vxtime.hpet_address = 0; - + hpet_address = 0; xtime.tv_sec = get_cmos_time(); xtime.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - if (!hpet_init()) - vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period; - else - vxtime.hpet_address = 0; + if (hpet_arch_init()) + hpet_address = 0; + + setup_pit_timer(); if (hpet_use_timer) { /* set tick_nsec to use the proper rate for HPET */ tick_nsec = TICK_NSEC_HPET; cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; -#ifdef CONFIG_X86_PM_TIMER - } else if (pmtmr_ioport && !vxtime.hpet_address) { - vxtime_hz = PM_TIMER_FREQUENCY; - timename = "PM"; - pit_init(); - cpu_khz = pit_calibrate_tsc(); -#endif } else { pit_init(); cpu_khz = pit_calibrate_tsc(); timename = "PIT"; } - vxtime.mode = VXTIME_TSC; - gtod = time_init_gtod(); + if (unsynchronized_tsc()) + mark_tsc_unstable(); - printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", - vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; - vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; - vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz); } -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ -#ifdef CONFIG_SMP - if (apic_is_clustered_box()) - return 1; -#endif - /* Most intel systems have synchronized TSCs except for - multi node systems */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { -#ifdef CONFIG_ACPI - /* But TSC doesn't tick in C3 so don't use it there */ - if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100) - return 1; -#endif - return 0; - } - - /* Assume multi socket systems are not synchronized */ - return num_present_cpus() > 1; -} - -/* - * Decide what mode gettimeofday should use. - */ -__init static char *time_init_gtod(void) -{ - char *timetype; - - if (unsynchronized_tsc()) - notsc = 1; - if (vxtime.hpet_address && notsc) { - timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - if (hpet_use_timer) - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - vxtime.last = hpet_readl(HPET_COUNTER); - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; -#ifdef CONFIG_X86_PM_TIMER - /* Using PM for gettimeofday is quite slow, but we have no other - choice because the TSC is too unreliable on some systems. */ - } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { - timetype = "PM"; - do_gettimeoffset = do_gettimeoffset_pm; - vxtime.mode = VXTIME_PMTMR; - sysctl_vsyscall = 0; - printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); -#endif - } else { - timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; - vxtime.mode = VXTIME_TSC; - } - return timetype; -} __setup("report_lost_ticks", time_setup); @@ -1033,7 +460,7 @@ unsigned long ctime = get_cmos_time(); unsigned long sleep_length = (ctime - sleep_start) * HZ; - if (vxtime.hpet_address) + if (hpet_address) hpet_reenable(); else i8254_timer_resume(); @@ -1042,21 +469,9 @@ write_seqlock_irqsave(&xtime_lock,flags); xtime.tv_sec = sec; xtime.tv_nsec = 0; - if (vxtime.mode == VXTIME_HPET) { - if (hpet_use_timer) - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - vxtime.last = hpet_readl(HPET_COUNTER); -#ifdef CONFIG_X86_PM_TIMER - } else if (vxtime.mode == VXTIME_PMTMR) { - pmtimer_resume(); -#endif - } else - vxtime.last_tsc = get_cycles_sync(); - write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; - monotonic_base += sleep_length * (NSEC_PER_SEC/HZ); + write_sequnlock_irqrestore(&xtime_lock,flags); touch_softlockup_watchdog(); return 0; } @@ -1083,243 +498,3 @@ device_initcall(time_init_device); -#ifdef CONFIG_HPET_EMULATE_RTC -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET - * is enabled, we support RTC interrupt functionality in software. - * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. - */ -#include - -#define DEFAULT_RTC_INT_FREQ 64 -#define RTC_NUM_INTS 1 - -static unsigned long UIE_on; -static unsigned long prev_update_sec; - -static unsigned long AIE_on; -static struct rtc_time alarm_time; - -static unsigned long PIE_on; -static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; -static unsigned long PIE_count; - -static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ -static unsigned int hpet_t1_cmp; /* cached comparator register */ - -int is_hpet_enabled(void) -{ - return vxtime.hpet_address != 0; -} - -/* - * Timer 1 for RTC, we do not use periodic interrupt feature, - * even if HPET supports periodic interrupts on Timer 1. - * The reason being, to set up a periodic interrupt in HPET, we need to - * stop the main counter. And if we do that everytime someone diables/enables - * RTC, we will have adverse effect on main kernel timer running on Timer 0. - * So, for the time being, simulate the periodic interrupt in software. - * - * hpet_rtc_timer_init() is called for the first time and during subsequent - * interuppts reinit happens through hpet_rtc_timer_reinit(). - */ -int hpet_rtc_timer_init(void) -{ - unsigned int cfg, cnt; - unsigned long flags; - - if (!is_hpet_enabled()) - return 0; - /* - * Set the counter 1 and enable the interrupts. - */ - if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) - hpet_rtc_int_freq = PIE_freq; - else - hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - - local_irq_save(flags); - cnt = hpet_readl(HPET_COUNTER); - cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; - local_irq_restore(flags); - - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - return 1; -} - -static void hpet_rtc_timer_reinit(void) -{ - unsigned int cfg, cnt; - - if (unlikely(!(PIE_on | AIE_on | UIE_on))) { - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T1_CFG); - return; - } - - if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) - hpet_rtc_int_freq = PIE_freq; - else - hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - - /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_t1_cmp; - cnt += hpet_tick*HZ/hpet_rtc_int_freq; - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; -} - -/* - * The functions below are called from rtc driver. - * Return 0 if HPET is not being used. - * Otherwise do the necessary changes and return 1. - */ -int hpet_mask_rtc_irq_bit(unsigned long bit_mask) -{ - if (!is_hpet_enabled()) - return 0; - - if (bit_mask & RTC_UIE) - UIE_on = 0; - if (bit_mask & RTC_PIE) - PIE_on = 0; - if (bit_mask & RTC_AIE) - AIE_on = 0; - - return 1; -} - -int hpet_set_rtc_irq_bit(unsigned long bit_mask) -{ - int timer_init_reqd = 0; - - if (!is_hpet_enabled()) - return 0; - - if (!(PIE_on | AIE_on | UIE_on)) - timer_init_reqd = 1; - - if (bit_mask & RTC_UIE) { - UIE_on = 1; - } - if (bit_mask & RTC_PIE) { - PIE_on = 1; - PIE_count = 0; - } - if (bit_mask & RTC_AIE) { - AIE_on = 1; - } - - if (timer_init_reqd) - hpet_rtc_timer_init(); - - return 1; -} - -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) -{ - if (!is_hpet_enabled()) - return 0; - - alarm_time.tm_hour = hrs; - alarm_time.tm_min = min; - alarm_time.tm_sec = sec; - - return 1; -} - -int hpet_set_periodic_freq(unsigned long freq) -{ - if (!is_hpet_enabled()) - return 0; - - PIE_freq = freq; - PIE_count = 0; - - return 1; -} - -int hpet_rtc_dropped_irq(void) -{ - if (!is_hpet_enabled()) - return 0; - - return 1; -} - -irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct rtc_time curr_time; - unsigned long rtc_int_flag = 0; - int call_rtc_interrupt = 0; - - hpet_rtc_timer_reinit(); - - if (UIE_on | AIE_on) { - rtc_get_rtc_time(&curr_time); - } - if (UIE_on) { - if (curr_time.tm_sec != prev_update_sec) { - /* Set update int info, call real rtc int routine */ - call_rtc_interrupt = 1; - rtc_int_flag = RTC_UF; - prev_update_sec = curr_time.tm_sec; - } - } - if (PIE_on) { - PIE_count++; - if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { - /* Set periodic int info, call real rtc int routine */ - call_rtc_interrupt = 1; - rtc_int_flag |= RTC_PF; - PIE_count = 0; - } - } - if (AIE_on) { - if ((curr_time.tm_sec == alarm_time.tm_sec) && - (curr_time.tm_min == alarm_time.tm_min) && - (curr_time.tm_hour == alarm_time.tm_hour)) { - /* Set alarm int info, call real rtc int routine */ - call_rtc_interrupt = 1; - rtc_int_flag |= RTC_AF; - } - } - if (call_rtc_interrupt) { - rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); - rtc_interrupt(rtc_int_flag, dev_id, regs); - } - return IRQ_HANDLED; -} -#endif - -static int __init nohpet_setup(char *s) -{ - nohpet = 1; - return 1; -} - -__setup("nohpet", nohpet_setup); - -int __init notsc_setup(char *s) -{ - notsc = 1; - return 1; -} - -__setup("notsc", notsc_setup); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/traps.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/traps.c --- ./linux-2.6.18.1/arch/x86_64/kernel/traps.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/traps.c 2007-05-19 23:58:35.000000000 +0900 @@ -368,6 +368,7 @@ #undef HANDLE_STACK printk("\n"); + print_traces(tsk); } static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp) @@ -497,7 +498,7 @@ EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int die_owner = -1; static unsigned int die_nest_count; diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/tsc.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/tsc.c --- ./linux-2.6.18.1/arch/x86_64/kernel/tsc.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/tsc.c 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,229 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NS_SCALE 10 /* 2^10, carefully chosen */ +#define US_SCALE 32 /* 2^32, arbitralrily chosen */ + +static int notsc __initdata = 0; + +unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); + +static unsigned int cyc2ns_scale __read_mostly; + +void set_cyc2ns_scale(unsigned long khz) +{ + cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> NS_SCALE; +} + +unsigned long long sched_clock(void) +{ + unsigned long a = 0; + + /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, + which means it is not completely exact and may not be monotonous between + CPUs. But the errors should be too small to matter for scheduling + purposes. */ + + rdtscll(a); + return cycles_2_ns(a); +} + +static int tsc_unstable; + +static inline int check_tsc_unstable(void) +{ + return tsc_unstable; +} + +void mark_tsc_unstable(void) +{ + tsc_unstable = 1; +} +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + +#ifdef CONFIG_CPU_FREQ + +/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency + changes. + + RED-PEN: On SMP we assume all CPUs run with the same frequency. It's + not that important because current Opteron setups do not support + scaling on SMP anyroads. + + Should fix up last_tsc too. Currently gettimeofday in the + first tick after the change will be slightly wrong. */ + +#include + +static unsigned int cpufreq_delayed_issched = 0; +static unsigned int cpufreq_init = 0; +static struct work_struct cpufreq_delayed_get_work; + +static void handle_cpufreq_delayed_get(void *v) +{ + unsigned int cpu; + for_each_online_cpu(cpu) { + cpufreq_get(cpu); + } + cpufreq_delayed_issched = 0; +} + +static unsigned int ref_freq = 0; +static unsigned long loops_per_jiffy_ref = 0; + +static unsigned long cpu_khz_ref = 0; + +static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned long *lpj, dummy; + + if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + return 0; + + lpj = &dummy; + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) +#ifdef CONFIG_SMP + lpj = &cpu_data[freq->cpu].loops_per_jiffy; +#else + lpj = &boot_cpu_data.loops_per_jiffy; +#endif + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = *lpj; + cpu_khz_ref = cpu_khz; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + *lpj = + cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + mark_tsc_unstable(); + } + + set_cyc2ns_scale(cpu_khz_ref); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER)) + cpufreq_init = 1; + return 0; +} + +core_initcall(cpufreq_tsc); + +#endif +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +__cpuinit int unsynchronized_tsc(void) +{ +#ifdef CONFIG_SMP + if (apic_is_clustered_box()) + return 1; +#endif + /* Most intel systems have synchronized TSCs except for + multi node systems */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { +#ifdef CONFIG_ACPI + /* But TSC doesn't tick in C3 so don't use it there */ + if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100) + return 1; +#endif + return 0; + } + + /* Assume multi socket systems are not synchronized */ + return num_present_cpus() > 1; +} + +int __init notsc_setup(char *s) +{ + notsc = 1; + return 1; +} + +__setup("notsc", notsc_setup); + + +/* clock source code: */ + +static int tsc_update_callback(void); + +static cycle_t read_tsc(void) +{ + cycle_t ret = (cycle_t)get_cycles_sync(); + return ret; +} + +static cycle_t __vsyscall_fn vread_tsc(void) +{ + cycle_t ret = (cycle_t)get_cycles_sync(); + return ret; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = (cycle_t)-1, + .mult = 0, /* to be set */ + .shift = 22, + .update_callback = tsc_update_callback, + .is_continuous = 1, + .vread = vread_tsc, +}; + +static int tsc_update_callback(void) +{ + int change = 0; + + /* check to see if we should switch to the safe clocksource: */ + if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { + clocksource_tsc.rating = 50; + clocksource_reselect(); + change = 1; + } + return change; +} + +static int __init init_tsc_clocksource(void) +{ + if (!notsc) { + clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, + clocksource_tsc.shift); + return clocksource_register(&clocksource_tsc); + } + return 0; +} + +module_init(init_tsc_clocksource); diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/vmlinux.lds.S --- ./linux-2.6.18.1/arch/x86_64/kernel/vmlinux.lds.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/vmlinux.lds.S 2007-05-19 23:58:35.000000000 +0900 @@ -93,27 +93,11 @@ __vsyscall_0 = VSYSCALL_VIRT_ADDR; . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .xtime_lock : AT(VLOAD(.xtime_lock)) { *(.xtime_lock) } - xtime_lock = VVIRT(.xtime_lock); - - .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } - vxtime = VVIRT(.vxtime); - - .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } - wall_jiffies = VVIRT(.wall_jiffies); - - .sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) } - sys_tz = VVIRT(.sys_tz); - - .sysctl_vsyscall : AT(VLOAD(.sysctl_vsyscall)) { *(.sysctl_vsyscall) } - sysctl_vsyscall = VVIRT(.sysctl_vsyscall); - - .xtime : AT(VLOAD(.xtime)) { *(.xtime) } - xtime = VVIRT(.xtime); - + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } - jiffies = VVIRT(.jiffies); + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) } + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/vsyscall.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/vsyscall.c --- ./linux-2.6.18.1/arch/x86_64/kernel/vsyscall.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/vsyscall.c 2007-05-19 23:58:35.000000000 +0900 @@ -26,65 +26,50 @@ #include #include #include +#include #include #include #include +#include #include #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) - -int __sysctl_vsyscall __section_sysctl_vsyscall = 1; -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) notrace -#include - -static __always_inline void timeval_normalize(struct timeval * tv) -{ - time_t __sec; +struct vsyscall_gtod_data_t { + raw_seqlock_t lock; + int sysctl_enabled; + struct timeval wall_time_tv; + struct timezone sys_tz; + cycle_t offset_base; + struct clocksource clock; +}; - __sec = tv->tv_usec / 1000000; - if (__sec) { - tv->tv_usec %= 1000000; - tv->tv_sec += __sec; - } -} +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = { + .lock = __RAW_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), + .sysctl_enabled = 1, +}; -static __always_inline void do_vgettimeofday(struct timeval * tv) +void update_vsyscall(struct timespec* wall_time, struct clocksource* clock) { - long sequence, t; - unsigned long sec, usec; + unsigned long flags; - do { - sequence = read_seqbegin(&__xtime_lock); - - sec = __xtime.tv_sec; - usec = (__xtime.tv_nsec / 1000) + - (__jiffies - __wall_jiffies) * (1000000 / HZ); - - if (__vxtime.mode != VXTIME_HPET) { - t = get_cycles_sync(); - if (t < __vxtime.last_tsc) - t = __vxtime.last_tsc; - usec += ((t - __vxtime.last_tsc) * - __vxtime.tsc_quot) >> 32; - /* See comment in x86_64 do_gettimeofday. */ - } else { - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - - __vxtime.last) * __vxtime.quot) >> 32; - } - } while (read_seqretry(&__xtime_lock, sequence)); + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* copy vsyscall data */ + vsyscall_gtod_data.clock = *clock; + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; + vsyscall_gtod_data.sys_tz = sys_tz; - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ static __always_inline void do_get_tz(struct timezone * tz) { - *tz = __sys_tz; + *tz = __vsyscall_gtod_data.sys_tz; } static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) @@ -105,10 +90,44 @@ return secs; } +static __always_inline void do_vgettimeofday(struct timeval * tv) +{ + cycle_t now, base, mask, cycle_delta; + unsigned long seq, mult, shift, nsec_delta; + cycle_t (*vread)(void); + do { + seq = read_seqbegin(&__vsyscall_gtod_data.lock); + + vread = __vsyscall_gtod_data.clock.vread; + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { + gettimeofday(tv,0); + return; + } + now = vread(); + base = __vsyscall_gtod_data.clock.cycle_last; + mask = __vsyscall_gtod_data.clock.mask; + mult = __vsyscall_gtod_data.clock.mult; + shift = __vsyscall_gtod_data.clock.shift; + + *tv = __vsyscall_gtod_data.wall_time_tv; + + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); + + /* calculate interval: */ + cycle_delta = (now - base) & mask; + /* convert to nsecs: */ + nsec_delta = (cycle_delta * mult) >> shift; + + /* convert to usecs and add to timespec: */ + tv->tv_usec += nsec_delta / NSEC_PER_USEC; + while (tv->tv_usec > USEC_PER_SEC) { + tv->tv_sec += 1; + tv->tv_usec -= USEC_PER_SEC; + } +} + int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) { - if (!__sysctl_vsyscall) - return gettimeofday(tv,tz); if (tv) do_vgettimeofday(tv); if (tz) @@ -120,11 +139,11 @@ * unlikely */ time_t __vsyscall(1) vtime(time_t *t) { - if (!__sysctl_vsyscall) + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); else if (t) - *t = __xtime.tv_sec; - return __xtime.tv_sec; + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; + return __vsyscall_gtod_data.wall_time_tv.tv_sec; } long __vsyscall(2) venosys_0(void) @@ -163,7 +182,7 @@ ret = -ENOMEM; goto out; } - if (!sysctl_vsyscall) { + if (!vsyscall_gtod_data.sysctl_enabled) { *map1 = SYSCALL; *map2 = SYSCALL; } else { @@ -186,7 +205,7 @@ static ctl_table kernel_table2[] = { { .ctl_name = 99, .procname = "vsyscall64", - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, { 0, } diff -urN ./linux-2.6.18.1/arch/x86_64/kernel/x8664_ksyms.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/x8664_ksyms.c --- ./linux-2.6.18.1/arch/x86_64/kernel/x8664_ksyms.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/kernel/x8664_ksyms.c 2007-05-19 23:58:35.000000000 +0900 @@ -12,10 +12,12 @@ EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff -urN ./linux-2.6.18.1/arch/x86_64/lib/thunk.S linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/lib/thunk.S --- ./linux-2.6.18.1/arch/x86_64/lib/thunk.S 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/lib/thunk.S 2007-05-19 23:58:35.000000000 +0900 @@ -42,11 +42,13 @@ thunk rwsem_wake_thunk,rwsem_wake thunk rwsem_downgrade_thunk,rwsem_downgrade_wake #endif - - thunk __down_failed,__down - thunk_retrax __down_failed_interruptible,__down_interruptible - thunk_retrax __down_failed_trylock,__down_trylock - thunk __up_wakeup,__up + +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + thunk __compat_down_failed,__compat_down + thunk_retrax __compat_down_failed_interruptible,__compat_down_interruptible + thunk_retrax __compat_down_failed_trylock,__compat_down_trylock + thunk __compat_up_wakeup,__compat_up +#endif #ifdef CONFIG_TRACE_IRQFLAGS thunk trace_hardirqs_on_thunk,trace_hardirqs_on diff -urN ./linux-2.6.18.1/arch/x86_64/mm/fault.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/mm/fault.c --- ./linux-2.6.18.1/arch/x86_64/mm/fault.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/mm/fault.c 2007-05-19 23:58:35.000000000 +0900 @@ -79,6 +79,7 @@ { int loglevel_save = console_loglevel; if (yes) { + stop_trace(); oops_in_progress = 1; } else { #ifdef CONFIG_VT diff -urN ./linux-2.6.18.1/arch/x86_64/mm/init.c linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/mm/init.c --- ./linux-2.6.18.1/arch/x86_64/mm/init.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/arch/x86_64/mm/init.c 2007-05-19 23:58:35.000000000 +0900 @@ -51,7 +51,7 @@ static unsigned long dma_reserve __initdata; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the diff -urN ./linux-2.6.18.1/block/cfq-iosched.c linux-2.6.18.1-cabi-20070529-RT_HRT/block/cfq-iosched.c --- ./linux-2.6.18.1/block/cfq-iosched.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/block/cfq-iosched.c 2007-05-19 23:58:35.000000000 +0900 @@ -1283,7 +1283,7 @@ q = cfqd->queue; - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); spin_lock(q->queue_lock); diff -urN ./linux-2.6.18.1/block/ll_rw_blk.c linux-2.6.18.1-cabi-20070529-RT_HRT/block/ll_rw_blk.c --- ./linux-2.6.18.1/block/ll_rw_blk.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/block/ll_rw_blk.c 2007-05-19 23:58:35.000000000 +0900 @@ -1547,7 +1547,7 @@ */ void blk_plug_device(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -1570,7 +1570,7 @@ */ int blk_remove_plug(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; @@ -3584,13 +3584,15 @@ struct io_context *ioc; struct cfq_io_context *cic; - local_irq_save(flags); + // FIXME: unsafe upstream too? + + local_irq_save_nort(flags); task_lock(current); ioc = current->io_context; current->io_context = NULL; ioc->task = NULL; task_unlock(current); - local_irq_restore(flags); + local_irq_restore_nort(flags); if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); diff -urN ./linux-2.6.18.1/drivers/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/Makefile --- ./linux-2.6.18.1/drivers/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/Makefile 2007-05-20 14:14:28.000000000 +0900 @@ -76,3 +76,4 @@ obj-$(CONFIG_SUPERH) += sh/ obj-$(CONFIG_GENERIC_TIME) += clocksource/ obj-$(CONFIG_DMA_ENGINE) += dma/ +obj-$(CONFIG_CABI) += cabi/ diff -urN ./linux-2.6.18.1/drivers/acpi/executer/exmutex.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/executer/exmutex.c --- ./linux-2.6.18.1/drivers/acpi/executer/exmutex.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/executer/exmutex.c 2007-05-19 23:58:35.000000000 +0900 @@ -267,9 +267,9 @@ && (obj_desc->mutex.os_mutex != ACPI_GLOBAL_LOCK)) { ACPI_ERROR((AE_INFO, "Thread %X cannot release Mutex [%4.4s] acquired by thread %X", - (u32) walk_state->thread->thread_id, + (u32)(long) walk_state->thread->thread_id, acpi_ut_get_node_name(obj_desc->mutex.node), - (u32) obj_desc->mutex.owner_thread->thread_id)); + (u32)(long) obj_desc->mutex.owner_thread->thread_id)); return_ACPI_STATUS(AE_AML_NOT_OWNER); } diff -urN ./linux-2.6.18.1/drivers/acpi/osl.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/osl.c --- ./linux-2.6.18.1/drivers/acpi/osl.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/osl.c 2007-05-19 23:58:35.000000000 +0900 @@ -676,13 +676,13 @@ acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { - struct semaphore *sem = NULL; + struct compat_semaphore *sem = NULL; - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct compat_semaphore)); if (!sem) return AE_NO_MEMORY; - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct compat_semaphore)); sema_init(sem, initial_units); @@ -705,7 +705,7 @@ acpi_status acpi_os_delete_semaphore(acpi_handle handle) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; if (!sem) @@ -733,7 +733,7 @@ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; int ret = 0; @@ -820,7 +820,7 @@ */ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; if (!sem || (units < 1)) diff -urN ./linux-2.6.18.1/drivers/acpi/processor_idle.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/processor_idle.c --- ./linux-2.6.18.1/drivers/acpi/processor_idle.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/processor_idle.c 2007-05-19 23:58:35.000000000 +0900 @@ -38,9 +38,11 @@ #include #include #include /* need_resched() */ +#include #include #include +#include #include #include @@ -368,10 +370,12 @@ /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); +#ifndef CONFIG_IA64 #ifdef CONFIG_GENERIC_TIME /* TSC halts in C2, so notify users */ mark_tsc_unstable(); #endif +#endif /* Re-enable interrupts */ local_irq_enable(); current_thread_info()->status |= TS_POLLING; @@ -412,10 +416,12 @@ ACPI_MTX_DO_NOT_LOCK); } +#ifndef CONFIG_IA64 #ifdef CONFIG_GENERIC_TIME /* TSC halts in C3, so notify users */ mark_tsc_unstable(); #endif +#endif /* Re-enable interrupts */ local_irq_enable(); current_thread_info()->status |= TS_POLLING; @@ -453,7 +459,8 @@ */ if (cx->promotion.state && ((cx->promotion.state - pr->power.states) <= max_cstate)) { - if (sleep_ticks > cx->promotion.threshold.ticks) { + if (sleep_ticks > cx->promotion.threshold.ticks && + cx->promotion.state->latency <= system_latency_constraint()) { cx->promotion.count++; cx->demotion.count = 0; if (cx->promotion.count >= @@ -494,8 +501,10 @@ end: /* * Demote if current state exceeds max_cstate + * or if the latency of the current state is unacceptable */ - if ((pr->power.state - pr->power.states) > max_cstate) { + if ((pr->power.state - pr->power.states) > max_cstate || + pr->power.state->latency > system_latency_constraint()) { if (cx->demotion.state) next_state = cx->demotion.state; } @@ -1009,9 +1018,10 @@ seq_printf(seq, "active state: C%zd\n" "max_cstate: C%d\n" - "bus master activity: %08x\n", + "bus master activity: %08x\n" + "maximum allowed latency: %d usec\n", pr->power.state ? pr->power.state - pr->power.states : 0, - max_cstate, (unsigned)pr->power.bm_activity); + max_cstate, (unsigned)pr->power.bm_activity, system_latency_constraint()); seq_puts(seq, "states:\n"); @@ -1077,6 +1087,29 @@ .release = single_release, }; + +static void smp_callback(void *v) +{ + /* we already woke the CPU up, nothing more to do */ +} + +/* + * This function gets called when a part of the kernel has a new latency requirement. + * This means we need to get all processors out of their C-state, and then recalculate + * a new suitable C-state. Just do a cross-cpu IPI; that wakes them all right up. + */ +static int acpi_processor_latency_notify(struct notifier_block *b, + unsigned long l, void *v) +{ + smp_call_function(smp_callback, NULL, 0, 1); + return NOTIFY_OK; +} + +static struct notifier_block acpi_processor_latency_notifier = { + .notifier_call = acpi_processor_latency_notify, +}; + + int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { @@ -1093,6 +1126,7 @@ "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; + register_latency_notifier(&acpi_processor_latency_notifier); } if (!pr) @@ -1164,6 +1198,7 @@ * copies of pm_idle before proceeding. */ cpu_idle_wait(); + unregister_latency_notifier(&acpi_processor_latency_notifier); } return 0; diff -urN ./linux-2.6.18.1/drivers/acpi/tables/tbget.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/tables/tbget.c --- ./linux-2.6.18.1/drivers/acpi/tables/tbget.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/tables/tbget.c 2007-05-19 23:58:35.000000000 +0900 @@ -325,7 +325,7 @@ if (header->length < sizeof(struct acpi_table_header)) { ACPI_ERROR((AE_INFO, "Table length (%X) is smaller than minimum (%X)", - header->length, sizeof(struct acpi_table_header))); + header->length, (int)sizeof(struct acpi_table_header))); return_ACPI_STATUS(AE_INVALID_TABLE_LENGTH); } diff -urN ./linux-2.6.18.1/drivers/acpi/tables/tbrsdt.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/tables/tbrsdt.c --- ./linux-2.6.18.1/drivers/acpi/tables/tbrsdt.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/tables/tbrsdt.c 2007-05-19 23:58:35.000000000 +0900 @@ -189,7 +189,7 @@ ACPI_ERROR((AE_INFO, "RSDT/XSDT length (%X) is smaller than minimum (%X)", table_ptr->length, - sizeof(struct acpi_table_header))); + (int)sizeof(struct acpi_table_header))); return (AE_INVALID_TABLE_LENGTH); } diff -urN ./linux-2.6.18.1/drivers/acpi/utilities/utmutex.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/utilities/utmutex.c --- ./linux-2.6.18.1/drivers/acpi/utilities/utmutex.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/acpi/utilities/utmutex.c 2007-05-19 23:58:35.000000000 +0900 @@ -259,7 +259,7 @@ } else { ACPI_EXCEPTION((AE_INFO, status, "Thread %X could not acquire Mutex [%X]", - (u32) this_thread_id, mutex_id)); + (u32)(long) this_thread_id, mutex_id)); } return (status); diff -urN ./linux-2.6.18.1/drivers/block/paride/pseudo.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/block/paride/pseudo.h --- ./linux-2.6.18.1/drivers/block/paride/pseudo.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/block/paride/pseudo.h 2007-05-19 23:58:35.000000000 +0900 @@ -43,7 +43,7 @@ static int ps_tq_active = 0; static int ps_nice = 0; -static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused))); +static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock); static DECLARE_WORK(ps_tq, ps_tq_int, NULL); diff -urN ./linux-2.6.18.1/drivers/cabi/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/Kconfig --- ./linux-2.6.18.1/drivers/cabi/Kconfig 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/Kconfig 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,10 @@ +# +# CABI related configuration option +# + +config CABI + bool 'CPU Accounting and Binding Interface support' + default n + ---help--- + CPU Accounting and Binding Interface support. + diff -urN ./linux-2.6.18.1/drivers/cabi/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/Makefile --- ./linux-2.6.18.1/drivers/cabi/Makefile 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/Makefile 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,15 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + + +obj-$(CONFIG_CABI) = cabi_init.o cabi_account.o cabi_timer.o cabi_sched.o cabi_isr.o cabi_signal.o udivdi3.o cabi_overload.o cabi_syscalls.o cabi_debug.o cabi_cyclic.o cabi_defsrv.o +#cabi_dsv_replenish.o cabi_cyc_isr.o cabi_dsv_isr.o cabi_ovl_isr.o cabi_ovl_replenish.o +obj-$(CONFIG_PROC_FS) += cabi_procfs.o + + diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_account.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_account.c --- ./linux-2.6.18.1/drivers/cabi/cabi_account.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_account.c 2007-06-17 00:22:36.000000000 +0900 @@ -0,0 +1,1175 @@ +/* + * linux/drivers/cabi/cabi_account.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * 2006-9-12. Modified by Midori SUGAYA to fix bugs in tick calculation + * and make adjustment for 2.6. + * 2006-2. Porting to kernel-2.6 Takeharu KATO (ARM, SH) + * 2005-2. New release based on LinuxRK. by Midori SUGAYA, Hirotaka + * ISHIKAWA. + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Static variables for accounting + */ + +/* + * This values keeps the initial address of the accounting + * object and overload accounting object which currently + * running. + */ + +cabi_account_t cabi_current_account; +cabi_account_t cabi_current_overload_account; + +/* + * Current capacity which current accounting object holds. + */ +cpu_capacity_t cabi_account_current_capacity; + +/* + * Other global variables. + */ +struct list_head cabi_account_head; +int overload_cabi; +int last_cabi_id = 0; + +extern long sys_setpgid(pid_t pid, pid_t pgid); +extern rwlock_t tasklist_lock; + +/* + * Name: cabi_account_init + * + * Initialize the variables which are used in the accounting. + * This function is called from the cabi_init which is executed + * during kernel start boot. + */ +void +cabi_account_init(void) +{ + ENTER; + INIT_LIST_HEAD(&cabi_account_head); + cabi_account_current_capacity = 0; + cabi_current_account = NULL_ACCOUNT; + cabi_current_overload_account = NULL; + EXIT; +} + +/* + * Name: search_cabi (cabi_id) + * + * Search the accounting object (cabi) which is specified by the + * argument with cabi_id. Return the initial address of the + * accounting object. + */ + +cabi_account_t +search_cabi(unsigned long cabi_id) +{ + + struct list_head *cabi_list; + cabi_account_t cabi = NULL_ACCOUNT; + + cabi_list = &cabi_account_head; + + while (!list_empty(cabi_list)) { + cabi = list_entry(cabi_list, struct cabi_account, cpu_link); + + if (cabi->cabi_id == cabi_id) + break; + + cabi_list = cabi_list->next; + if (cabi_list == &cabi_account_head) { + cabi = NULL_ACCOUNT; + break; + } + } + return cabi; +} + +/* + * Name: get_cabi_id (void) + * + * Return unique accounting object id for the caller. In this + * function, duplicate ids and reserved ids are carafully + * asided from the id pools. CABI_ID_MAX keeps the maximum + * limit of the id which is defined in cabi.h header. + */ + +int +get_cabi_id(void) +{ + struct list_head *cabi_list; + cabi_account_t cabi = NULL_ACCOUNT; + int begin_cabi_id, cabi_id_error; + cabi_id_error = -1; + + begin_cabi_id = -1; + last_cabi_id++; + + cabi_list = &cabi_account_head; + cabi_list = cabi_list->next; + if (cabi_list == &cabi_account_head) { /* First AO create. */ + last_cabi_id = FIRST_CABI_ID; + return last_cabi_id; + } + while (!list_empty(cabi_list)) { /* Get cabi_id */ + cabi = list_entry(cabi_list, struct cabi_account, cpu_link); + /* cabi_id is full */ + if (last_cabi_id == begin_cabi_id) + return cabi_id_error; + /* selected cabi_id is for overload */ + if (OVERLOAD_CABI_ID == last_cabi_id) { + if (begin_cabi_id == -1) + begin_cabi_id = last_cabi_id; + ++last_cabi_id; + cabi_list = &cabi_account_head; + cabi_list = cabi_list->next; + continue; + } + /* selected cabi_id is already used */ + if (cabi->cabi_id == last_cabi_id) { + if (begin_cabi_id == -1) + begin_cabi_id = last_cabi_id; + ++last_cabi_id; + cabi_list = &cabi_account_head; + cabi_list = cabi_list->next; + continue; + } + /* selected cabi_id is MAX value */ + + if (last_cabi_id >= CABI_ID_MAX) { + if (begin_cabi_id == -1) + begin_cabi_id = last_cabi_id; + last_cabi_id = FIRST_CABI_ID; + cabi_list = &cabi_account_head; + cabi_list = cabi_list->next; + continue; + } + /* next cabi list */ + cabi_list = cabi_list->next; + + /* all lists were searched. */ + if (cabi_list == &cabi_account_head) + break; + } + return last_cabi_id; +} + +/* + * Capacity calculation of accounting object (cabi) + */ + +cpu_capacity_t +capacity_of(cpu_capacity_quad_t qc, cpu_capacity_quad_t qt) +{ + + unsigned long long qc_tmp, qt_tmp; /* unsigned long long */ + unsigned long result, rem; /* unsigned long */ + + ENTER; + + /* to store fine grained value, we use 10000 instead of + 100 for deviding. */ + qc_tmp = (unsigned long long) qc * 10000; + qt_tmp = (unsigned long long) qt; + + if (qt_tmp > 1000000000) { + qc_tmp = qc_tmp >> 8; + qt_tmp = qt_tmp >> 8; + } + + result = (unsigned long) + div_long_long_rem (qc_tmp, qt_tmp, &rem); + + cabi_debug_capacity(result); + + EXIT; + return result; +} + +void +set_terminate_operation (struct cabi_account *cabi) +{ + + switch (cabi->pm.operation){ + case OP_NONE: + break; + case OP_BLOCK: + cabi->opt_state = CABI_UNBLOCK; + break; + case OP_SIGNAL: + cabi->opt_state = CABI_SIGNAL; + break; + default: + break; + } +} + +int +capacity_admission_control (cabi_param_data_t *p, + cpu_capacity_t capacity) +{ + + cabi_account_current_capacity = + capacity + cabi_account_current_capacity; + + switch (p->policy) { + case PO_DEFSRV: + break; + case PO_EDF: + //if (cabi_account_current_capacity > 99) + // return 0; + break; + case PO_RM: + //if (cabi_account_current_capacity > 69) + // return 0; + break; + default: + break; + } + + return 1; +} + + +/* + * Name: cabi_account_create + * + * Create a accounting object. The function called from + * sys_cabi_account_create which is a interface of the system call. + */ + +cabi_account_t +cabi_account_create(struct timespec *c, struct timespec *t, + cabi_param_data_t * p) +{ + + cabi_account_t cabi; + cpu_capacity_t capacity; + cpu_capacity_quad_t qc, qt, overload_qc; + long get_id = 0; + + + ENTER; + cabi_debug_timespecs(c,t); + + /* + * if this is overload cabi, calculate parameters. + */ + if (p->bind_proc_type == BIND_IDLE_PROC){ + capacity_overload(c,t,&overload_qc); + qc = overload_qc; + } else { + qc = ((unsigned long long)c->tv_sec * NANOSEC) + + (unsigned long long)c->tv_nsec; + } + qt = ((unsigned long long)t->tv_sec * NANOSEC) + + (unsigned long long)t->tv_nsec; + + + /* capacity is the rate of qc/qt * 100 */ + cabi_debug_ct (qc, qt); + capacity = capacity_of (qc, qt); + cabi_debug_capacity (capacity); + + /* + * Add the new capacity to the global capacity + * This is the point that admission control should be taken + * by its policy + */ + capacity_admission_control(p, capacity); + + cabi_debug_capacity(cabi_account_current_capacity); + + /* create an accounting object */ + if((cabi = malloc(sizeof (struct cabi_account))) == NULL) + return NULL; + bzero(cabi, sizeof (struct cabi_account)); + + /* memory copy */ + memcpy(&cabi->pm, p, sizeof(struct cpu_param)); + + /* + * Set up the unique id for cabi. Firtst, check whether the + * the cabi is for the overload or not. If overload, a special + * id should be set. If not, set a sequencial number for it. + */ + if (cabi->pm.bind_proc_type | IDLE_PROCESS) { + /* if it is for overload, setup the overload id */ + setup_overload_id (cabi); + + } else { + if((get_id = get_cabi_id()) < 0) { + free(cabi); + return NULL; + } else { + cabi->cabi_id = (unsigned int)get_id; + } + } + CABI_DBG; + cabi_debug("[create] cabi_id %lu\n", cabi->cabi_id); + + /* + * Initialize the list head for proc list. + */ + INIT_LIST_HEAD(&cabi->cpu_proc_list); + memcpy (&cabi->cpu_time, c, sizeof(struct timespec)); + memcpy (&cabi->cpu_period, t, sizeof(struct timespec)); + cabi->cpu_capacity = capacity; + + /* + * if this is the first call as the requirement of reservation, + * we should set up cabi hooks. + */ + if (list_empty(&cabi_account_head)) { + /* enable scheduling hook */ + cabi_enable(); + } + list_add(&cabi->cpu_link, &cabi_account_head); + + /* calculate cpu ticks per capacity */ + /* to pass the nanosec value to the usec2tick interface, + * we devide the nanosec value by 1000 into the usec. + */ + nanosec2tick(&qc, &cabi->cpu_time_ticks); + nanosec2tick(&qt, &cabi->cpu_period_ticks); + + /* cabi debug ticks */ + + /* + * Let the account replenished from the next jiffy. + * See also cabi_timer.c:cabi_replenish_timer_create(). + * cpu_period_used_ticks = 0; + * cpu_period_available_ticks = 0; + */ + /* init waitqueue */ + init_waitqueue_head(&cabi->depleted_wait); + + /* create entry as /proc/cabi/ */ + INIT_LIST_HEAD(&cabi->cpu_proc_list); + cabi->cpu_state = CABI_IS_DEPLETED; + cabi->signal_block = SIGNAL_OFF; + +#ifdef CONFIG_PROC_FS + cabi_proc_account_create(cabi); +#endif + set_terminate_operation(cabi); + set_account_policy(cabi); + + return cabi; /* success */ +} + +/* + * Name: cabi_account_destroy + * + * Delete a accounting object from kernel memory. The function called from + * sys_cabi_account_destroy which is a interface of the system call. + */ + +int +cabi_account_destroy(unsigned long cabi_id) +{ + cabi_account_t cabi = NULL_ACCOUNT; + /* find the cabi address */ + if (!(cabi = search_cabi(cabi_id))) + return CABI_ENOEXIST; + + /* if account has attached pgrocesses */ + if (!list_empty(&cabi->cpu_proc_list)) + /* process is still attached. */ + return CABI_EATTACHED; + + /* destroy timer */ + cabi_replenish_timer_cancel(cabi); + + /* return capacity */ + if (cabi->cpu_capacity >= cabi_account_current_capacity) { + cabi_account_current_capacity = 0; + } else { + cabi_account_current_capacity -= cabi_account_current_capacity; + } + + /* delete cpu_link */ + list_del(&cabi->cpu_link); + INIT_LIST_HEAD(&cabi->cpu_link); /* for sure */ + cabi->pm.operation = OP_NONE; + CABI_DBG; + +#ifdef CONFIG_PROC_FS + /* + * remove entry under /proc/cabi/ + * must be before cabi_resource_set_detach_account() + * since resource_set is reffered in cabi_proc_account_destroy() + */ + cabi_proc_account_destroy(cabi); +#endif + + /* finally free a generic account object. */ + free(cabi); + + if (cabi_id == OVERLOAD_CABI_ID) + cabi_current_overload_account = NULL; + + /* if this is the last cabi, disable_isr */ + if (!cabi_account_current_capacity) + cabi_disable(); + + return CABI_SUCCESS; /* success */ +} + + +/* + * Name: cabi_account_set + * + * Set a parameter for accounting object. The function called from + * sys_cabi_account_set which is a interface of the system call. + */ + +int +cabi_account_set(unsigned long cabi_id, struct timespec *c, + struct timespec *t, cabi_param_data_t * p) +{ + cpu_capacity_t capacity; + cpu_capacity_quad_t qc, qt, overload_qc; + cabi_account_t cabi = NULL_ACCOUNT; + + ENTER; + + /* find the cabi address */ + if (!(cabi = search_cabi(cabi_id))) { + /* cabi dose not exist. */ + return CABI_ENOEXIST; + } + + /* check timespec */ + cabi_debug_timespecs(&cabi->cpu_time, &cabi->cpu_period); + cabi_debug_timespecs(c,t); + + /* cabi is depleted */ + cabi->cpu_state = CABI_IS_DEPLETED; + + /* set terminate action */ + memcpy(&cabi->pm, p, sizeof(cabi_param_data_t)); + + if(cabi->pm.operation == OP_BLOCK) { + cabi->opt_state = CABI_UNBLOCK; + } else { + cabi->opt_state = CABI_SIGNAL; + if (!CABI_SIGNUM(cabi)) { + cabi->pm.operation = OP_NONE; + } + } + + /* if this is overload cabi, re-calculate parameters. */ + if (cabi->pm.bind_proc_type == BIND_IDLE_PROC){ + + /* debug timespec */ + cabi_debug_timespecs (c,t); + + /* new parameter has been set in the overload_qc */ + capacity_overload(c,t,&overload_qc); + qc = overload_qc; + } else { + qc = (c->tv_sec * NANOSEC) + c->tv_nsec; + } + qt = (t->tv_sec * NANOSEC) + t->tv_nsec; + + /* calculate a new capacity */ + capacity = capacity_of(qc, qt); + + /* set current capacity */ + cabi_account_current_capacity = + capacity + cabi_account_current_capacity - cabi->cpu_capacity; + + /* check the result */ + cabi_debug_capacity (capacity); + cabi_debug_capacity (cabi_account_current_capacity); + + /* load the new parameters */ + memcpy(&cabi->cpu_time, c, sizeof(struct timespec)); + memcpy(&cabi->cpu_period, t, sizeof(struct timespec)); + cabi->cpu_capacity = capacity; + + /* initialize available ticks */ + cabi->cpu_period_available_ticks = 0; + + /* calculate cabi ticks per capacity */ + nanosec2tick(&qc, &cabi->cpu_time_ticks); + nanosec2tick(&qt, &cabi->cpu_period_ticks); + + /* cancel replenish timer */ + cabi_replenish_timer_cancel(cabi); + + /* reset a timer for it */ + cabi_replenish_timer_init(cabi, &cabi->cpu_period_ticks); + + return CABI_SUCCESS; /* success */ + +} + +/* + * Name: cabi_account_attach + * + * Attach a process to specified accounting object. + */ + +int +cabi_account_attach(unsigned long cabi_id, struct task_struct *tsk) +{ + struct rs_proc_list *rs_proc; + cabi_account_t cabi = NULL_ACCOUNT; + unsigned long flags; + + ENTER; + + /* find the cabi address */ + if (!(cabi = search_cabi(cabi_id))) { + CABI_DBG; + /* did not exist. */ + return CABI_ENOEXIST; + } + CABI_DBG; + /* + * !!! Need to make sure cabi is really a account. + */ + LVAL_TASK_ACCOUNT(tsk) = cabi; + + /* show info */ + cabi_debug_info(cabi); + CABI_DBG; + + if((rs_proc = malloc(sizeof (struct rs_proc_list))) == NULL) + return CABI_ENOMEM; + CABI_DBG; + bzero(rs_proc, sizeof (struct rs_proc_list)); + + /* If there is no process bind to this cabi, + * we should set the replenish timer for it. + */ + CABI_DBG; + + if (list_empty(&cabi->cpu_proc_list)) { + cabi_replenish_timer_init(cabi,&cabi->cpu_period_ticks); + } + CABI_DBG; + + /* set proc file system for the new belonged process */ + rs_proc->rs_proc_task = tsk; + rs_proc->rs_proc_pid = tsk->pid; + + CABI_DBG; + + local_save_flags(flags); + { + local_irq_disable(); + list_add(&rs_proc->rs_proc_list, &cabi->cpu_proc_list); + } + local_irq_restore(flags); + + CABI_DBG; + + /* attach! */ + /* NOT YET fixed. + * if we set the rdtsc time from here, there are lag time + * between the current cpu_period_start_time and the time + * which initial_rep_timer will be set one jiffy after. + */ + CABI_DBG; + if (cabi->cpu_period_start_ticks == 0 && + rs_proc->rs_proc_task == current) { + + /* + * XXX consider offset time for RT-processes! + */ + cabi_rdticks(&cabi->cpu_period_start_ticks); + } + CABI_DBG; + + /* if this is the current process, + * make it the current account and start accouting now */ + if (current == tsk) { + + /* XXX atomic ? */ + if (cabi_current_account != cabi) { + + /* XXX cabi が RUNNING の間は NULL であることを + 保証できているか?できていれば assertion で + チェックできる */ + cabi_current_account = cabi; + + /* check account info */ + cabi_debug_info (cabi_current_account); + } + CABI_DBG; + + cabi_start_account(cabi); + + if (cabi->cpu_state & CABI_IS_DEPLETED) { + + /* start accounting */ + cabi_account_enforce(cabi); + } + CABI_DBG; + } + if (cabi->cabi_id == OVERLOAD_CABI_ID) + overload_cabi++; + + CABI_DBG; + + return CABI_SUCCESS; +} + +/* + * Name: cabi_account_detach + * + * Detach a process from a specified accounting object. + */ + +int +cabi_account_detach(struct task_struct *tsk) +{ + cabi_account_t cabi = TASK_ACCOUNT(tsk); + struct rs_proc_list *rs_proc = NULL; + struct list_head *proc_list; + + + /* show cabi info */ + cabi_debug_info (cabi); + + /* find rs_proc */ + proc_list = cabi->cpu_proc_list.next; + while (proc_list != &cabi->cpu_proc_list) { + rs_proc = + list_entry(proc_list, struct rs_proc_list, rs_proc_list); + if (rs_proc->rs_proc_pid == tsk->pid) { + break; + } + /* next element */ + proc_list = proc_list->next; + } + + if (rs_proc && rs_proc->rs_proc_pid == tsk->pid) { + + /* show rs_proc info */ + cabi_debug_detach (rs_proc); + + /* remove rs_proc from the list */ + list_del(&rs_proc->rs_proc_list); + + /* free rs_proc */ + INIT_LIST_HEAD(&rs_proc->rs_proc_list); + free(rs_proc); + } + /* detach resource set from task */ + LVAL_TASK_ACCOUNT(tsk) = NULL_ACCOUNT; + + if (tsk->state != TASK_RUNNING) { + wake_up(&cabi->depleted_wait); + } + + return CABI_SUCCESS; +} + +void +set_account_policy (cabi_account_t cabi) +{ + + ENTER; + switch (cabi->pm.policy) { + case PO_NONE: + break; + case PO_CYCLIC: + CABI_DBG; + cabi->ops = &cyclic_ops; + break; + case PO_OVLD: + cabi->ops = &ovl_ops; + break; + case PO_RM: + break; + case PO_DEFSRV: + CABI_DBG; + cabi->ops = &dsv_ops; + break; + case PO_VPE: + break; + case PO_EDF: + break; + default: + CABI_DBG; + cabi->ops = &cyclic_ops; + break; + } + EXIT; +} + +/* + * Name: cabi_account_replenish + * + * This function is called when the periodic timer is expired. + * At this point, the accounting system update the status and + * values of the accounting object and initialize them for + * preparing the next period. + * + * T T + * |<---------->|<---------->| + * --|------------|------------|--->t + * t1 t2 t3.. + * + */ + +void +cabi_account_replenish(cabi_account_t cabi) +{ + cabi->ops->replenish (cabi); +} + + +/* + * Name: cabi_account_enforce + * + * This function will change the status of the accounting object + * to CABI_IS_DEPLETED which cpu_state has not been depleted yet. + */ + + +void +cabi_account_enforce(cabi_account_t cabi) +{ + + ENTER; + + if (cabi->cabi_id != OVERLOAD_CABI_ID && + !(cabi->cpu_state & CABI_IS_DEPLETED)) { + + /* check state */ + cabi_debug_state(cabi); + + /* + * Here, the capacity of accounting system has + * been used up already. So, we have to change + * the status to IS_DEPLETED. + */ + + cabi->cpu_state |= CABI_IS_DEPLETED; + cabi_debug_state (cabi); + + /* set current account */ + cabi_current_account = cabi; + } + EXIT; +} + + +/* + * Name: cabi_account_check_enforce + * + * Check the cpu_state of the accounting object. If the + * available ticks of TSC is less than the used ticks, + * the status should be changed. Call cabi_account_enforce. + */ +inline void +cabi_account_check_enforce(cabi_account_t cabi) +{ + ENTER; + + if ((cabi->cabi_id != OVERLOAD_CABI_ID) && + cabi->cpu_period_used_ticks >= cabi->cpu_period_available_ticks) { + + /* debug tikcs */ + cabi_debug_ticks(cabi); + + /* enforcing a account */ + cabi_account_enforce(cabi); + } + EXIT; +} + +/* + * Start & Stop the account of CPU utilization. + */ +/* + * Name: cabi_start_account + * + * It saves the start time of the process computation time (C) + * with TSC (Time Stamp Counter). + * + * T + * |<-------------------------->|<-.. + * | C C | + * | <----------> <---> | + * | start end s e | + * --|----------------------------|--> t + * s: cabi_start_account + * e: cabi_stop_account + */ + +void +cabi_start_account(cabi_account_t cabi) +{ + + unsigned long long next; + + /* debug */ + cabi_debug_ticks (cabi); + + /* + * This code is used for the illegal status for the + * accuonting. At this point, if both of the available_ticks + * and the used_ticks equal to zero and used_ticks exceeded + * by the available_ticks, their accounting object cpu_status + * should be CABI_IS_DEPLETED. No need to check the + * status. + */ + + if (cabi->cpu_period_available_ticks < cabi->cpu_period_used_ticks) { + cabi->cpu_state |= CABI_IS_DEPLETED; + goto start_account_out; + } + + /* + * We can't calculate the substraction if the number of the ticks + * larger than the 32bit. + */ + + if (CHECK_DEGIT(cabi->cpu_period_available_ticks) || + CHECK_DEGIT(cabi->cpu_period_used_ticks)) { + + next = cabi_subtract_ll(cabi->cpu_period_available_ticks, + cabi->cpu_period_used_ticks, + (cabi_account_t) cabi); + /* debug */ + cabi_debug_tick(next); + + } else { + next = cabi->cpu_period_available_ticks - + cabi->cpu_period_used_ticks; + /* debug */ + cabi_debug_tick(next); + + } + + /* + * To save the time of the start_account, set the rdticks at this + * point. After finishing the running, we will get the time of rdticks + * in cabi_stop_account function. + */ + cabi_rdticks(&cabi->cpu_period_start_ticks); + + switch (cabi->cpu_state) { + + case CABI_IS_NULL: + cabi_debug("[start:NULL] %x\n", (int)cabi->cpu_state); + cabi->cpu_state |= CABI_IS_RUNNING; + cabi_enforce_timer_start(cabi, &next); + cabi_debug("[start:NULL] %x\n", (int)cabi->cpu_state); + break; + case CABI_IS_DEPLETED: + cabi_debug("[start:DEPLETED] %x\n", (int)cabi->cpu_state); + cabi->cpu_state |= CABI_IS_RUNNING; + cabi_debug("[start:DEPLETED] %x\n", (int)cabi->cpu_state); + break; + case CABI_IS_RUNNING: /* must not be so */ + cabi_debug("[start:RUNNING] %x\n", (int)cabi->cpu_state); + cabi_debug_state (cabi); + cabi_enforce_timer_start(cabi, &next); + cabi_debug("[start:RUNNING] %x\n", (int)cabi->cpu_state); + break; + case CABI_IS_RUNNING | CABI_IS_DEPLETED: + cabi_debug("[start:RUN|DEP] %x\n", (int)cabi->cpu_state); + cabi_debug_state (cabi); + break; + default: + /* unkown */ + cabi_debug("[start:default] %x\n", (int)cabi->cpu_state); + cabi_debug_state (cabi); + cabi_debug_tick (next); + cabi->cpu_state = CABI_IS_RUNNING; + + if (next > 0) { + cabi_debug_tick(next); + cabi_enforce_timer_start(cabi, &next); + } else { + cabi->cpu_state |= CABI_IS_DEPLETED; + } + cabi_debug("[start:default] %x\n", (int)cabi->cpu_state); +start_account_out: + break; + + } +} + + +void +cabi_stop_account(cabi_account_t cabi, cpu_tick_t now) +{ + + long long left; + + cabi_enforce_timer_cancel(); + + cabi_account_summation_of_used_ticks(cabi, now); + + /* check state */ + cabi_debug_state (cabi); + + switch (cabi->cpu_state) { + + case CABI_IS_RUNNING: + cabi_debug ("[stop:b:RUN] %x\n", (int)cabi->cpu_state); + cabi->cpu_state &= ~CABI_IS_RUNNING; + cabi_account_check_enforce(cabi); + cabi_debug ("[stop:a:RUN] %x\n", (int)cabi->cpu_state); + break; + case CABI_IS_RUNNING | CABI_IS_DEPLETED: + cabi_debug ("[stop:b:RUN|DEP] %x\n", (int)cabi->cpu_state); + cabi->cpu_state &= ~CABI_IS_RUNNING; + cabi_debug ("[stop:a:RUN|DEP] %x\n", (int)cabi->cpu_state); + break; + case CABI_IS_NULL: /* must not be so */ + cabi_debug ("[stop:b:NULL] %x\n", (int)cabi->cpu_state); + cabi_debug_state(cabi); + cabi_account_check_enforce(cabi); + cabi_debug ("[stop:a:NULL] %x\n", (int)cabi->cpu_state); + break; + case CABI_IS_DEPLETED: /* must not be so */ + cabi_debug ("[stop:b:DEP] %x\n", (int)cabi->cpu_state); + cabi_debug_state(cabi); + cabi_debug ("[stop:a:DEP] %x\n", (int)cabi->cpu_state); + break; + default: /* unkown */ + left = + cabi->cpu_period_available_ticks - + cabi->cpu_period_used_ticks; + + /* check */ + cabi_debug_state(cabi); + cabi_debug_ticks(cabi); + + /* set new state */ + cabi->cpu_state = CABI_IS_NULL; + + + if (left <= 0) { + cabi->cpu_state |= CABI_IS_DEPLETED; + + /* check state */ + cabi_debug_state (cabi); + } + break; + } +} + + +#ifdef CONFIG_PROC_FS +/* this is test global variables. */ +int count = 0; +int gc =0; + +int +cabi_account_read_proc(cabi_account_t cabi, char *buf) +{ + char *p = buf; + cpu_tick_data_t used_cpu_time; + cpu_capacity_t ave, cur, prv; /* unsigned long */ + unsigned long rem; + struct rs_proc_list *rs_proc = NULL; + struct list_head *proc_list; + + ENTER; + + /* check before load */ + cabi_debug_ticks (cabi); + + /* set tick */ + used_cpu_time = TICK2USEC(&cabi->cpu_period_used_ticks); + + /* set capacity */ + cur = capacity_of (cabi->cpu_period_used_ticks, + cabi->cpu_period_ticks); + + /* check prev */ + cabi_debug_tick (cabi->cpu_period_prev_used_ticks); + + /* set capacity */ + prv = capacity_of (cabi->cpu_period_prev_used_ticks, + cabi->cpu_period_ticks); + + /* set total utils */ + if (cabi->cpu_average.total_count) { + /* + * ave : %ul + * total_utils : %ull + * total_count : %ul + */ + ave = (unsigned long) div_long_long_rem ( + (unsigned long long) cabi->cpu_average.total_utils, + (unsigned long) cabi->cpu_average.total_count,&rem); + + if(cabi->cpu_average.total_utils > 1000000000) { + /* this is not good solution, FIX ME */ + cabi->cpu_average.history[gc++] = ave; + cabi->cpu_average.total_utils = 0; + cabi->cpu_average.total_count = 0; + } + + } else { + ave = cur; + } + + /* 1st line: utilization statics */ + /* reseved, previous, average, max, min */ + p += sprintf(p, "%lu.%04lu ", + CAPACITY_INT(cabi->cpu_capacity), + CAPACITY_FRAC(cabi->cpu_capacity)); + + p += sprintf(p, "%lu.%04lu %lu.%04lu ", + CAPACITY_INT(prv), CAPACITY_FRAC(prv), + CAPACITY_INT(ave), CAPACITY_FRAC(ave)); + + p += sprintf(p, "%lu.%04lu ", + CAPACITY_INT(cabi->cpu_max_utilization), + CAPACITY_FRAC(cabi->cpu_max_utilization)); + + p += sprintf(p, "%lu.%04lu \n", + CAPACITY_INT(cabi->cpu_min_utilization), + CAPACITY_FRAC(cabi->cpu_min_utilization)); + + /* 2nd line: current CPU usage + * used available requested */ + + p += sprintf(p, "%llu %lu %lu\n", + used_cpu_time, + TICK2USEC(&cabi->cpu_period_available_ticks), + TICK2USEC(&cabi->cpu_time_ticks)); + /* debug */ + cabi_debug_ticks (cabi); + + /* 3rd line: count of replenishment */ + p += sprintf(p, "%lu\n", cabi->cpu_average.total_count); + + /* 4th line: account object id */ + p += sprintf(p, "%lu\n", cabi->cabi_id); + + /* 5th line: process id */ + /* find rs_proc */ + proc_list = cabi->cpu_proc_list.next; + while (proc_list != &cabi->cpu_proc_list) { + /* get a rs_proc */ + rs_proc = list_entry(proc_list, struct rs_proc_list, + rs_proc_list); + p += sprintf(p, "%d ", rs_proc->rs_proc_pid); + /* next element */ + proc_list = proc_list->next; + } + p += sprintf(p, "\n"); + + return (p - buf); +} + +int +cabi_account_read_bindpid_proc(cabi_account_t cabi, char *buf) +{ + char *p = buf; + struct rs_proc_list *rs_proc = NULL; + struct list_head *proc_list; + + /* find rs_proc */ + proc_list = cabi->cpu_proc_list.next; + while (proc_list != &cabi->cpu_proc_list) { + /* get a rs_proc */ + rs_proc = list_entry(proc_list, struct rs_proc_list, + rs_proc_list); + p += sprintf(p, "%d\n", rs_proc->rs_proc_pid); + /* next element */ + proc_list = proc_list->next; + } + return (p - buf); +} + +int +cabi_account_status_proc(char *buf) +{ + char *p = buf; + + /* cabi ticks per second, CPU capacity taken for accounts */ + p += sprintf(p, "%lu\n", + (unsigned long) cabi_cpu_ticks_per_second); + + return (p - buf); +} + +int +cabi_account_base_status_proc(char *buf) +{ + return 0; +} +#endif /* CONFIG_PROC_FS */ diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_cyclic.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_cyclic.c --- ./linux-2.6.18.1/drivers/cabi/cabi_cyclic.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_cyclic.c 2007-06-17 00:23:21.000000000 +0900 @@ -0,0 +1,345 @@ +/* + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University + * Nakajima Laboratory. MontaVista Software, Inc. + * + * Midori Sugaya + * + */ + +#include +#include +#include + +#include + +/* + * -------------------------------------------------------------- + * Policy : Default cyclic executive + * --------------------------------------------------------------- + * Status : RUNNING running when the replenish timer expires. + * : DEPLETED depleted when the replenish timer expires, + * : NULL + * --------------------------------------------------------------- + * Operation : BLOCK change status of the operation + * : SIGNAL change status of the operation + * --------------------------------------------------------------- + * Priority : Not control, use user defined priority + * --------------------------------------------------------------- + */ + +int is_running = 0; +void cyc_replenish (cabi_account_t); +void cyc_replenish_capacity (cabi_account_t); +void cyc_account_status_in (cabi_account_t); +void cyc_account_status_out (cabi_account_t); +void cyc_statistic_calculation (cabi_account_t); +void cyc_replenish_operation (cabi_account_t); +void cyc_isr (cabi_account_t); +void cyc_isr_operation (cabi_account_t); + +struct cabi_account_operations cyclic_ops = { + .replenish = cyc_replenish, + .isr = cyc_isr, + .isr_operation = cyc_isr_operation, +}; + +void +cyc_replenish (cabi_account_t cabi) +{ + + ENTER; + cyc_account_status_in(cabi); + cyc_statistic_calculation (cabi); + cyc_replenish_capacity(cabi); + cyc_account_status_out (cabi); + EXIT; + +} + +void +cyc_replenish_capacity (cabi_account_t cabi) +{ + /* debug */ + cabi_debug_ticks(cabi); + cabi_debug_info(cabi); + + /* reset the used ticks */ + cabi->cpu_period_used_ticks = 0; +} + +void +cyc_stop_account (cabi_account_t cabi) +{ + cpu_tick_data_t now; + + ENTER; + cabi_rdticks(&now); + cabi_stop_account(cabi, &now); + is_running++; + EXIT; +} +void +cyc_account_status_in (cabi_account_t cabi) +{ + + ENTER; + switch (cabi->cpu_state) { + case CABI_IS_NULL: + /* 00 : no bind process */ + CABI_DBG; + cabi_debug_state(cabi); + break; + case CABI_IS_RUNNING: + /* 01 : passed the period */ + CABI_DBG; + cabi_debug_state(cabi); + cyc_stop_account(cabi); + break; + case CABI_IS_DEPLETED: + /* 10 : correctly accounted */ + CABI_DBG; + cabi_debug_state(cabi); + break; + case CABI_IS_RUNNING|CABI_IS_DEPLETED: + /* 11 : fist time to account */ + CABI_DBG; + cabi_debug_state(cabi); + cyc_stop_account(cabi); + break; + default: + /* it should not be */ + CABI_DBG; + cabi_debug_state(cabi); + break; + } + + /* it should be changed to the initial status, CABI_IS_NULL. */ + cabi->cpu_state &= ~CABI_IS_DEPLETED; + + EXIT; +} + +void +cyc_replenish_operation (cabi_account_t cabi) +{ + + switch (cabi->pm.operation) { + case OP_BLOCK: + cabi->opt_state = CABI_UNBLOCK; + wake_up(&cabi->depleted_wait); + break; + case OP_SIGNAL: + break; + default: + break; + } +} + +void +cpu_time_replenish_init (cabi_account_t cabi) +{ + + cabi->cpu_period_available_ticks = cabi->cpu_time_ticks; +} + +int bonus; + +void +cyc_account_status_out (cabi_account_t cabi) +{ + + unsigned long long avail = cabi->cpu_period_available_ticks; + unsigned long long used = cabi->cpu_period_prev_used_ticks; + unsigned long long cpu_time = cabi->cpu_time_ticks; + + ENTER; + + if (avail > 0){ + + switch (cabi->pm.rep_policy) { + case REP_SOFT: + CABI_DBG; + if (avail != cabi->cpu_time_ticks) { + cabi_debug_ticks (cabi); + cpu_time_replenish_init(cabi); + } + break; + case REP_HARD: + CABI_DBG; + if (bonus) { + bonus = ~bonus; + cpu_time_replenish_init(cabi); + break; + } else { + bonus++; + if (cpu_time > used) { + CABI_DBG; + /* used less than the capacity */ + cpu_time_replenish_init(cabi); + cabi->cpu_period_available_ticks + += (cpu_time - used); + cabi_debug_avail_ticks (cabi); + } else if (cpu_time < used) { + CABI_DBG; + /* used more than the capacity */ + cpu_time_replenish_init(cabi); + cabi->cpu_period_available_ticks + -= (used - cpu_time); + cabi_debug_avail_ticks (cabi); + } else { + CABI_DBG; + /* equal to avail and used */ + cabi_debug_avail_ticks (cabi); + } + } + break; + default: + /* if not set anything, this path is taken. */ + break; + } + cyc_replenish_operation(cabi); + + } else { + CABI_DBG; + cabi_debug_avail_ticks(cabi); + cpu_time_replenish_init(cabi); + + if (is_running) + cabi_start_account(cabi); + } + + +} + +void +cyc_statistic_calculation(cabi_account_t cabi) +{ + + cpu_capacity_t c; + + ENTER; + + /* update statistics */ + cabi->cpu_period_prev_used_ticks = cabi->cpu_period_used_ticks; + cabi->cpu_total_used_ticks += cabi->cpu_period_used_ticks; + + /* check capacity */ + c = capacity_of(cabi->cpu_period_used_ticks, + cabi->cpu_period_ticks); + + /* debug */ + cabi_debug_capacity(c); + + if (cabi->cpu_max_utilization < c) { + cabi->cpu_max_utilization = c; + + /* capacity is exceed the max utilization */ + cabi_debug_capacity(c); + + } else if (cabi->cpu_min_utilization > c + || cabi->cpu_min_utilization == 0) { + + cabi->cpu_min_utilization = c; + + /* capacity: less than the min util */ + cabi_debug_capacity(c); + + } + + if (cabi->cpu_max_utilization < c) { + cabi->cpu_max_utilization = c; + + /* capacity is exceed the max utilization */ + cabi_debug_capacity(c); + + } else if (cabi->cpu_min_utilization > c + || cabi->cpu_min_utilization == 0) { + + cabi->cpu_min_utilization = c; + + /* capacity: less than the min util */ + cabi_debug_capacity(c); + } + + /* summation for average */ + cabi->cpu_average.total_utils += c; + cabi->cpu_average.total_count++; + + EXIT; +} + +extern cabi_account_t cabi_current_account; +void cyc_isr (cabi_account_t); +void cyc_isr_operation (cabi_account_t); + +#include +#include +#include + +void +cyc_isr (cabi_account_t cabi) +{ + + ENTER; + + if (unlikely(irqs_disabled())) { + printk(KERN_ERR "BUG3: CABI %s/0x%08x/%d\n", + current->comm, preempt_count(), current->pid); + } + + if (cabi_account_depleted(cabi)) + cyc_isr_operation(cabi); + + EXIT; +} + + +void +cyc_isr_operation (cabi_account_t cabi) +{ + + ENTER; + + if (unlikely(irqs_disabled())) { + printk(KERN_ERR "BUG2: CABI %s/0x%08x/%d\n", + current->comm, preempt_count(), current->pid); + } + switch (cabi->pm.operation) { + case OP_BLOCK: + CABI_DBG; + cabi->opt_state = CABI_BLOCKED; + cabi->block_count++; + cabi_account_sleep_on(cabi); + break; + case OP_SIGNAL: + CABI_DBG; + if (cabi->opt_state != CABI_SIGNAL) + cabi->opt_state = CABI_SIGNAL; + if (!CABI_SIGFLAG(cabi)) { + if (!cabi->signal_block) { + cabi_send_signal( + cabi, current->pid, + CABI_SIGNUM(cabi)); + } + } else { + if (!cabi->signal_block) { + cabi_send_signal( + cabi, CABI_SIGPID(cabi), + CABI_SIGNUM(cabi)); + } + } + break; + default: + cabi_current_account = NULL_ACCOUNT; + CABI_DBG; + cabi_debug ("cabi_ret_with_reschedule:" + "cabi(0x%x) operation(%d)\n", + (int) cabi, cabi->pm.operation); + + break; + } + EXIT; +} + diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_debug.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_debug.c --- ./linux-2.6.18.1/drivers/cabi/cabi_debug.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_debug.c 2007-06-17 00:25:02.000000000 +0900 @@ -0,0 +1,607 @@ +/* + * linux/drivers/cabi/cabi_debug.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University + * Nakajima Laboratory. + * + * 2007-04 Midori Sugaya + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * For test + */ +int cabi_len = 0; +int cabi_debug_counter = 0; +unsigned long *start_address; +int cabi_dump_flag = 0; +int cabi_mask = 0x3ff; +int serial_number = 0; +/* + * 0x3f (63) + * 0x7f (127) + * 0xff (255) + * 0x1ff (511) + * 0x3ff (1023) + * 0xfff (4095) + * 0x13ff (5120) 1024*5 + */ +struct cabi_dump_struct +{ + int counter; + int logid; + unsigned long long tsc_time; + pid_t pid; + unsigned long cabi_id; +}; +struct cabi_dump_struct *cs; + +unsigned long buffer_counter = 0; +unsigned long serial_counter = 0; + +int cabi_dump_init (void) +{ + + cs = malloc ((cabi_mask+1)* (sizeof (struct cabi_dump_struct))); + bzero(cs, (cabi_mask+1) * (sizeof (struct cabi_dump_struct))); + buffer_counter = 0; + + if (!cs) { + printk ("Can not allocate memory!\n"); + return -1; + } + + cabi_dump_flag = 1; + return 0; +} + +void cabi_dump (int logid) +{ + + unsigned long long now; + unsigned long flag; + + if (cabi_dump_flag) { + cabi_spin_lock (flag); + (cs + buffer_counter)->counter = serial_counter++; + (cs + buffer_counter)->logid = logid; + cabi_eval_rdtsc(&now); + (cs + buffer_counter)->tsc_time = now; + (cs + buffer_counter)->pid = 0; + (cs + buffer_counter)->cabi_id = 0; + buffer_counter++; + buffer_counter &= cabi_mask; + cabi_spin_unlock (flag); + } +} + +void cabi_dump_sched (int logid, struct task_struct *tsk) +{ + + unsigned long long now; + unsigned long flag; + + cabi_account_t cabi = TASK_ACCOUNT(tsk); + + + if (cabi_dump_flag) { + cabi_spin_lock (flag); + (cs + buffer_counter)->counter = serial_counter++; + (cs + buffer_counter)->logid = logid; + cabi_eval_rdtsc(&now); + (cs + buffer_counter)->tsc_time = now; + (cs + buffer_counter)->pid = tsk->pid; + + if (cabi == NULL) { + (cs + buffer_counter)->cabi_id = 0; + } else { + (cs + buffer_counter)->cabi_id = cabi->cabi_id; + } + buffer_counter++; + buffer_counter &= cabi_mask; + cabi_spin_unlock (flag); + } +} + +void cabi_dump_end (void) +{ + unsigned long flag; + cabi_dump_flag = 0; + buffer_counter = 0; + + cabi_spin_lock(flag); + free (cs); + cabi_spin_unlock(flag); + + +} +void cabi_dump_ex (int logid, struct task_struct *tsk, + cabi_account_t cabi) +{ + + unsigned long long now; + + cabi_eval_rdtsc(&now); + + if (cabi_dump_flag) { + (cs + buffer_counter)->counter = serial_counter++; + (cs + buffer_counter)->logid = logid; + (cs + buffer_counter)->tsc_time = now; + if (tsk) + (cs + buffer_counter)->pid = tsk->pid; + else + (cs + buffer_counter)->pid = 0; + if (cabi) + (cs + buffer_counter)->cabi_id = cabi->cabi_id; + else + (cs + buffer_counter)->cabi_id = 0; + + buffer_counter++; + buffer_counter &= cabi_mask; + } +} + +unsigned long read_counter; +void cabi_dump_read (int flag) +{ + + unsigned long temp_counter; + unsigned long local_counter; + unsigned long flags; + int i; + + cabi_dump_flag = 0; + local_counter = 0; + + if (flag) { + temp_counter = buffer_counter; + temp_counter &= cabi_mask; + printk("logid = %d\n", (cs+temp_counter)->logid); + + if ((cs + temp_counter)->logid == 0) { + printk("READ_0\n"); + read_counter = 0; + } else { + printk("READ_1\n"); + read_counter = buffer_counter; + } + } + printk ("CABI-DUMP: LOOP %d\n", cabi_mask+1); + for (i=0; i< 127; i++) { + local_counter++; + + if ((cs+read_counter)->pid) { + printk ("CABI-DMPRD:%lu %lu %d %llu %ld %lu\n", + local_counter, + (cs + read_counter)->counter, + (cs + read_counter)->logid, + (cs + read_counter)->tsc_time, + (cs + read_counter)->pid, + (cs + read_counter)->cabi_id); + } else { + printk ("CABI-DMPRD:%lu %lu %d %llu \n", + local_counter, + (cs + read_counter)->counter, + (cs + read_counter)->logid, + (cs + read_counter)->tsc_time); + } + read_counter++; + read_counter &= cabi_mask; + } + cabi_spin_unlock(flags); +} + + + +void +__debug_timespecs (struct timespec *c, struct timespec *t) +{ + cabi_debug("[debug_timespec] c (%lu %lu) t (%lu %lu)\n", + (unsigned long)c->tv_sec, + (unsigned long)c->tv_nsec, + (unsigned long)t->tv_sec, + (unsigned long)t->tv_nsec); +} + +void +cabi_debug_timespecs (struct timespec *c, struct timespec *t) +{ + __debug_timespecs (c,t); +} + +void +cabi_debug_timespec (struct timespec *t) +{ + + cabi_debug ("[debug_timespec] t (%lu %lu)\n", + (unsigned long)t->tv_sec, + (unsigned long)t->tv_nsec); +} + + +void cabi_debug_ct(cpu_capacity_quad_t qc, cpu_capacity_quad_t qt) +{ + cabi_debug("[debug_ct] qc %llu qt %llu\n", qc, qt); +} + +void +cabi_debug_capacity (cpu_capacity_t capacity) +{ + cabi_debug ("[capacity] capacity %lu.%02lu \n", + CAPACITY_INT(capacity), + CAPACITY_FRAC(capacity)); +} + + +void +cabi_debug_sig_param (cabi_account_t cabi) +{ + cabi_debug ("[cabi_sig_param] signal : pid %d sig %d flag %d\n", + (int) CABI_SIGPID(cabi), (int) CABI_SIGNUM(cabi), + (int) CABI_SIGFLAG(cabi)); +} + +void +cabi_sanity_check_euid (struct task_struct *tsk) +{ + + cabi_debug("cabi_create: Permission denied. EUID [%d], UID [%d]\n", + tsk->euid, tsk->uid); +} + +int +pid_check (pid_t pid) +{ + if (pid < 0) { + cabi_debug("[bind_pid]: invalid pid (%d)\n", + (int) pid ); + return 1; + } + return 0; +} + +int +pgid_check (pid_t pgid) +{ + if (pgid == 0 || pgid < 0) { + cabi_debug("[bind_pgid]: invalid pgid %d.\n", + (int) pgid); + return 1; + } + return 0; +} + +int +cabi_id_check (unsigned long cabi_id) +{ + if (cabi_id == 0 || cabi_id == OVERLOAD_CABI_ID) { + cabi_debug("invalid cabi id %d\n", + (int) cabi_id); + return 1; + } + return 0; +} + +void +bind_pid_error (pid_t pid) +{ + cabi_debug ("[bind_pid] error invalid pid(%d)\n", pid); +} + + +void +cabi_account_attached(void) +{ + + cabi_debug("account_attach_process: account already " + "attached pid(%d) cabi(0x%x) [%d]\n", + current->pid, (int) TASK_ACCOUNT(current), + (int) TASK_ACCOUNT(current)->cabi_id); +} + +int +cabi_sanity_check_ucabi (struct cabi_uaccount *user_cabi) +{ + + ENTER; + + + if (user_cabi->cpu_time.tv_sec <= 0 && + user_cabi->cpu_time.tv_nsec <= 0) { + cabi_debug("cpu_time sec %lu nsec %lu\n", + user_cabi->cpu_time.tv_sec, + user_cabi->cpu_time.tv_nsec); + return CABI_EINVAL; + } + + CABI_DBG; + if (user_cabi->cpu_period.tv_sec <= 0 && + user_cabi->cpu_period.tv_nsec <= 0) { + cabi_debug("period sec %lu nsec %lu\n", + user_cabi->cpu_period.tv_sec, + user_cabi->cpu_period.tv_nsec); + return CABI_EINVAL; + } + + CABI_DBG; + if (user_cabi->pm.operation == OP_BLOCK) { + if (user_cabi->pm.operation <= OP_NONE || + user_cabi->pm.operation >= OP_UNKNOWN) + return CABI_EINVAL; + + CABI_DBG; + if (user_cabi->pm.bind_proc_type < BIND_NORMAL_PROC || + user_cabi->pm.bind_proc_type > BIND_IDLE_PROC) + return CABI_EINVAL; + } + + if (user_cabi->pm.operation == CABI_SIGNAL) { + CABI_DBG; + if (user_cabi->pm.operation == OP_SIGNAL) { + if (user_cabi->pm.cabi_signal.pid < 0) + return CABI_EINVAL; + if (user_cabi->pm.cabi_signal.sig < 0 || + user_cabi->pm.cabi_signal.sig > 32) + return CABI_EINVAL; + } + } + EXIT; + return CABI_SUCCESS; +} + +int +cabi_sanity_check_operation (struct cabi_uaccount *ucabi) +{ + ENTER; + switch (ucabi->pm.operation) { + case OP_BLOCK: + CABI_DBG; + if (ucabi->pm.operation != OP_BLOCK) + return -1; + break; + case OP_SIGNAL: + CABI_DBG; + if (ucabi->pm.operation != OP_SIGNAL) + return -1; + + /* check signal validatations */ + if (!ucabi->pm.cabi_signal.pid) { + ucabi->pm.cabi_signal.flag = CABI_SEND_DEFL; + } else { + ucabi->pm.cabi_signal.flag = CABI_SEND_PID; + } + if (!ucabi->pm.cabi_signal.sig) { + /* if signal number is null, set default + signal which will do nothing. */ + ucabi->pm.operation = OP_NONE; + } + break; + default: + break; + } + EXIT; + + return 0; +} + +int +cabi_sanity_check_timespec_c_and_t ( + struct timespec *cpu_time, + struct timespec *cpu_period) +{ + + long time_sec, time_nsec, period_sec, period_nsec; + + ENTER; + + /* Initialize and check execution time parameter */ + time_sec = time_nsec = 0; + time_sec = cpu_time->tv_nsec / NANOSEC; + time_nsec = cpu_time->tv_nsec - time_sec * NANOSEC; + + if (time_sec > (long)(CABI_TIME_SEC_MAX - cpu_time->tv_sec)) { + cabi_debug("cpu_time is over %ld\n", time_sec); + return CABI_EINVAL; + } + time_sec += cpu_time->tv_sec; + + /* Initizalize and check the period parameter */ + period_sec = period_nsec = 0; + period_sec = cpu_period->tv_nsec / NANOSEC; + period_nsec = cpu_period->tv_nsec - period_sec * NANOSEC; + + + if (period_sec > (long)(CABI_TIME_SEC_MAX - cpu_period->tv_sec)) { + cabi_debug ("cpu_period is over max %ld\n", period_sec); + return CABI_EINVAL; + } + period_sec += cpu_period->tv_sec; + + + if (time_sec > period_sec) { + cabi_debug ("time_sec > period_sec error!.\n"); + return CABI_EINVAL; + } else if (time_sec == period_sec) { + if (time_nsec > period_nsec) { + cabi_debug ("time_nsec > period_nsec error.\n"); + return CABI_EINVAL; + + } + } + EXIT; + return CABI_SUCCESS; +} + +int +cabi_sanity_check_overload_id (void) +{ + + cabi_account_t cabi; + + if ((cabi = search_cabi(OVERLOAD_CABI_ID)) != NULL) { + cabi_debug ("[%d] idle cabi_id : %d\n", __LINE__, + (int)cabi->cabi_id); + return CABI_EINVAL; + } + return CABI_SUCCESS; +} + +int +cabi_sanity_check_sigoperation (struct cabi_uaccount *user_cabi) +{ + if (user_cabi->pm.operation != OP_SIGNAL) { + cabi_debug("invalid operation \n"); + return CABI_EINVAL; + } + cabi_debug ("[sys_create] idle cabi act : %d\n", + user_cabi->pm.operation); + return CABI_SUCCESS; +} + +void +cabi_sanity_check_parameter_to_copy (struct timespec *c, + struct timespec *t, struct cpu_param *p) +{ + cabi_debug ("[sys_create: %d] c(%d), t(%d), operation(%x)" + " bind_proc_type (%x)\n", + __LINE__, + (int) c->tv_nsec, + (int) t->tv_nsec, + p->operation, + p->bind_proc_type); +} + + +/* cabi account attach */ +void cabi_debug_info (cabi_account_t cabi) +{ + CABI_DBG; + cabi_debug("cabi %x, id %lu\n", + (int)cabi, cabi->cabi_id); +} + +void cabi_debug_state (cabi_account_t cabi) +{ + CABI_DBG; + cabi_debug ("cabi_cpu_state 0x%x \n", (int) cabi->cpu_state); +} + +void cabi_debug_detach (struct rs_proc_list *rs_proc) +{ + CABI_DBG; + cabi_debug("[detach] rs_proc(0x%x) pid (%d)\n", + (int) rs_proc, rs_proc->rs_proc_pid); +} + +/* replenish */ +void cabi_debug_ticks (cabi_account_t cabi) +{ + CABI_DBG; + cabi_debug ("[ticks] used %llu period %llu avail %llu cpu_time %llu\n", + cabi->cpu_period_used_ticks, + cabi->cpu_period_ticks, + cabi->cpu_period_available_ticks, + cabi->cpu_time_ticks); +} + +void cabi_debug_avail_ticks (cabi_account_t cabi) +{ + cabi_debug ("[avail_ticks] used %llu avail %llu cpu_time %llu\n", + cabi->cpu_period_used_ticks, + cabi->cpu_period_available_ticks, + cabi->cpu_time_ticks); +} + +void cabi_debug_tick (unsigned long long tick) +{ + cabi_debug("tick %llu\n", tick); +} + +/* cabi_sched.c */ +void cabi_debug_entities (cabi_account_t cabi01, + cabi_account_t cabi02) +{ + cabi_debug("cabi %x cabi %x\n", + (int) cabi01, (int) cabi02); +} + +/* cabi_signal.c */ +void cabi_debug_signal_pid (int pid) +{ + cabi_debug ("cabi_send_signal: invalid pid (%d).\n", pid); + cabi_debug ("No received process. (%d)\n", pid); +} + +void cabi_debug_signal_attrs (int pid, int sig, struct task_struct *tsk) +{ + cabi_debug ("cabi_send_signal: pid %d sig %d \n", pid, sig); + cabi_debug ("pid %d status %d\n", + (int) tsk->pid, (int)tsk->state); + cabi_debug ("signal_struct: count %d\n", + (int)tsk->signal->count.counter); +} + +/* cabi_timer.c */ +void cabi_debug_times (struct timer_list *tmr, unsigned long jiffies) +{ + cabi_debug (" expires %lu, jiffies %lu)\n", + tmr->expires, jiffies); +} + +void cabi_debug_timer (struct timer_list *tmr) +{ + cabi_debug (" expires %lu \n", tmr->expires); +} + +/* kernel/sched.c */ +void cabi_debug_ksched (struct task_struct *prev, + struct task_struct *next) +{ + + cabi_account_t prev_cabi = TASK_ACCOUNT(prev); + cabi_account_t next_cabi = TASK_ACCOUNT(next); + + /* check */ + if (prev_cabi) { + cabi_debug ("\n[ksched:01] prev_cabi[%d] prev[%d] next[%d]\n", + (int) prev_cabi->cabi_id, + prev->pid, next->pid); + } + if (next_cabi) { + cabi_debug ("[ksched:02]next_cabi[%d] prev[%d] next[%d]\n", + (int) next_cabi->cabi_id, + prev->pid, next->pid); + } + + if (prev->pid == 0 || next->pid == 0) { + if (TASK_ACCOUNT(prev) || TASK_ACCOUNT(next)) { + cabi_debug ("[ksched:03] prev[%d] next[%d]\n", + prev->pid, next->pid); + } + } +} + +/* cabi dsv */ + +void check_sched_param (struct rs_proc_list *rs_proc) +{ + + struct sched_param param; + sys_sched_getparam (rs_proc->rs_proc_task->pid, ¶m); + cabi_debug ("[boost:] pid %d prio %d\n", + rs_proc->rs_proc_task->pid, + (int) param.sched_priority); +} + diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_defsrv.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_defsrv.c --- ./linux-2.6.18.1/drivers/cabi/cabi_defsrv.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_defsrv.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,458 @@ +/* + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University + * / Nakajima Laboratory. MontaVista Software, Inc. + * + * Midori Sugaya + * + */ + +#include +#include +#include +#include +#include + +/* + * -------------------------------------------------------------- + * Policy : Defferrable Server Algorithm + * --------------------------------------------------------------- + * Status : RUNNING running when the replenish timer expires. + * : DEPLETED depleted when the replenish timer expires, + * : NULL + * --------------------------------------------------------------- + * Operation : BLOCK change status of the operation + * : SIGNAL change status of the operation + * : NONE nothing to do + * --------------------------------------------------------------- + * Priority : Boost until cpu_time + * --------------------------------------------------------------- + */ + +extern cabi_account_t cabi_current_account; +int dsv_is_running = 0; +extern int sched_setscheduler (struct task_struct *p, int policy, + struct sched_param *param); +extern spinlock_t cabi_lock; + + +/* prototypes */ +void dsv_replenish (cabi_account_t); +void dsv_replenish_capacity (cabi_account_t); +void dsv_account_status_in (cabi_account_t); +void dsv_account_status_out (cabi_account_t); +void dsv_statistic_calculation (cabi_account_t); +void dsv_replenish_operation (cabi_account_t); +void dsv_replenish_priority_boost (cabi_account_t); +void dsv_isr (cabi_account_t); +void dsv_isr_operation (cabi_account_t); +void dsv_isr_priority (cabi_account_t); +int setup_boost_priority (cabi_account_t); +int free_boost_priority (cabi_account_t); +int cabi_sched_setscheduler (pid_t,int policy, struct sched_param *); + + +/* operation table */ +struct cabi_account_operations dsv_ops = { + .replenish = dsv_replenish, + .isr = dsv_isr, + .isr_operation = dsv_isr_operation, + .isr_priority = dsv_isr_priority, +}; + +void +dsv_replenish (cabi_account_t cabi) +{ + ENTER; + dsv_account_status_in(cabi); + dsv_statistic_calculation (cabi); + dsv_replenish_capacity(cabi); + dsv_account_status_out (cabi); + EXIT; +} + +void +dsv_replenish_capacity (cabi_account_t cabi) +{ + ENTER; + + /* debug */ + cabi_debug_ticks(cabi); + cabi_debug_info(cabi); + + /* reset the used ticks */ + cabi->cpu_period_used_ticks = 0; + EXIT; +} + +void +dsv_stop_account (cabi_account_t cabi) +{ + cpu_tick_data_t now; + + ENTER; + cabi_rdticks(&now); + cabi_stop_account(cabi, &now); + dsv_is_running++; + EXIT; +} +void +dsv_account_status_in (cabi_account_t cabi) +{ + + ENTER; + /* it should be changed to the initial status, CABI_IS_NULL. */ + dsv_stop_account(cabi); + cabi->cpu_state &= ~CABI_IS_DEPLETED; + EXIT; +} + + + + +void +dsv_replenish_operation (cabi_account_t cabi) +{ + ENTER; + + /* even if there are set any operation, if policy is dsv, + one operation should be done. */ + + switch (cabi->pm.operation) { + case OP_BLOCK: + cabi_debug("[op_block] op: %d\n", + cabi->pm.operation); + break; + case OP_SIGNAL: + cabi_debug("[op_signal] op: %d\n", + cabi->pm.operation); + break; + case OP_NONE: + cabi_debug("[op_boost] op: %d\n", + cabi->pm.operation); + break; + default: + break; + } + EXIT; +} + +void +dsv_cpu_time_replenish_init (cabi_account_t cabi) +{ + + cabi->cpu_period_available_ticks = cabi->cpu_time_ticks; +} + +extern int bonus; + +void +dsv_account_status_out (cabi_account_t cabi) +{ + + unsigned long long avail = cabi->cpu_period_available_ticks; + unsigned long long used = cabi->cpu_period_prev_used_ticks; + unsigned long long cpu_time = cabi->cpu_time_ticks; + + ENTER; + + cabi->priority_boost = 1; + cabi->priority_depleted = 1; + + + if (avail > 0){ + switch (cabi->pm.rep_policy) { + case REP_SOFT: + CABI_DBG; + if (avail != cabi->cpu_time_ticks) { + cabi_debug_ticks (cabi); + dsv_cpu_time_replenish_init(cabi); + } + break; + case REP_HARD: + CABI_DBG; + if (bonus) { + bonus = ~bonus; + dsv_cpu_time_replenish_init(cabi); + break; + } else { + bonus++; + if (cpu_time > used) { + CABI_DBG; + /* used less than the capacity */ + dsv_cpu_time_replenish_init(cabi); + cabi->cpu_period_available_ticks + += (cpu_time - used); + cabi_debug_avail_ticks (cabi); + } else if (cpu_time < used) { + CABI_DBG; + /* used more than the capacity */ + dsv_cpu_time_replenish_init(cabi); + cabi->cpu_period_available_ticks + -= (used - cpu_time); + cabi_debug_avail_ticks (cabi); + } else { + CABI_DBG; + /* equal to avail and used */ + cabi_debug_avail_ticks (cabi); + } + } + break; + default: + /* if not set anything, + this path is taken. */ + break; + } + dsv_replenish_operation(cabi); + + } else { + CABI_DBG; + cabi_debug_avail_ticks(cabi); + dsv_cpu_time_replenish_init(cabi); + } + + if (dsv_is_running) { + cabi_start_account(cabi); + } + + + + EXIT; +} + +void +dsv_statistic_calculation(cabi_account_t cabi) +{ + + cpu_capacity_t c; + ENTER; + + /* update statistics */ + cabi->cpu_period_prev_used_ticks + = cabi->cpu_period_used_ticks; + cabi->cpu_total_used_ticks + += cabi->cpu_period_used_ticks; + + /* check capacity */ + c = capacity_of(cabi->cpu_period_used_ticks, + cabi->cpu_period_ticks); + + /* debug */ + cabi_debug_capacity(c); + + if (cabi->cpu_max_utilization < c) { + cabi->cpu_max_utilization = c; + + /* capacity is exceed the max utilization */ + cabi_debug_capacity(c); + + } else if (cabi->cpu_min_utilization > c + || cabi->cpu_min_utilization == 0) { + + cabi->cpu_min_utilization = c; + + /* capacity: less than the min util */ + cabi_debug_capacity(c); + + } + + if (cabi->cpu_max_utilization < c) { + cabi->cpu_max_utilization = c; + + /* capacity is exceed the max utilization */ + cabi_debug_capacity(c); + + } else if (cabi->cpu_min_utilization > c + || cabi->cpu_min_utilization == 0) { + + cabi->cpu_min_utilization = c; + + /* capacity: less than the min util */ + cabi_debug_capacity(c); + } + + /* summation for average */ + cabi->cpu_average.total_utils += c; + cabi->cpu_average.total_count++; + + EXIT; +} + + +void +dsv_isr (cabi_account_t cabi) +{ + + ENTER; + dsv_isr_operation(cabi); + EXIT; +} + +void +dsv_isr_operation (cabi_account_t cabi) +{ + + ENTER; + /* whenever the operation is set or not, we set the + same operation for the process */ + + switch (cabi->pm.operation) { + case OP_NONE: + CABI_DBG; + break; + case OP_BLOCK: + CABI_DBG; + break; + case OP_SIGNAL: + CABI_DBG; + if (cabi->opt_state != CABI_SIGNAL) + cabi->opt_state = CABI_SIGNAL; + if (!CABI_SIGFLAG(cabi)) { + if (!cabi->signal_block) { + cabi_send_signal( + cabi, current->pid, + CABI_SIGNUM(cabi)); + } + } else { + if (!cabi->signal_block) { + cabi_send_signal( + cabi, CABI_SIGPID(cabi), + CABI_SIGNUM(cabi)); + } + } + default: + cabi_current_account = NULL_ACCOUNT; + break; + } + dsv_isr_priority (cabi); + + EXIT; +} + +void +dsv_isr_priority (cabi_account_t cabi) +{ + + ENTER; + CABI_DBG; + + /* if this function called from the replenish_account, + the priority_boost bit is set. So we setup the + priority boost */ + if (cabi->priority_boost) { + setup_boost_priority(cabi); + cabi->priority_boost = 0; + + } + + /* if cabi is depleted, the flag is set + at fist time */ + if (cabi->priority_depleted) { + if (cabi->cpu_state & CABI_IS_DEPLETED) { + free_boost_priority (cabi); + cabi->priority_depleted = 0; + } + + } + EXIT; +} + +int +free_boost_priority (cabi_account_t cabi) +{ + + struct sched_param param = { .sched_priority = 0 }; + struct rs_proc_list *rs_proc; + struct list_head *proc_list; + + ENTER; + + cabi_debug_state(cabi); + if (!list_empty(&cabi->cpu_proc_list)) { + proc_list = cabi->cpu_proc_list.next; + while (proc_list != &cabi->cpu_proc_list) { + rs_proc = list_entry (proc_list, + struct rs_proc_list, rs_proc_list); + + /* if there are tasks in the list + * set static priority + */ + if(rs_proc->rs_proc_task) { + //check_sched_param (rs_proc->rs_proc_task); + cabi_sched_setscheduler( + rs_proc->rs_proc_task->pid, + SCHED_NORMAL, ¶m); + } + proc_list = proc_list->next; + } + } + EXIT; + return 0; + +} + +int +cabi_sched_setscheduler(pid_t pid, + int policy, struct sched_param *param) +{ + struct task_struct *p; + int retval; + + ENTER; + + if (!param || pid < 0) + return -EINVAL; + + p = __cabi_find_process_by_pid (pid); + if (!p) { + read_unlock_irq(&tasklist_lock); + return -ESRCH; + } + + retval = sched_setscheduler(p, policy, param); + + EXIT; + + return retval; +} + + +int +setup_boost_priority (cabi_account_t cabi) +{ + + struct rs_proc_list *rs_proc = NULL; + struct list_head *proc_list = NULL; + struct sched_param param; + + ENTER; + + if (!list_empty(&cabi->cpu_proc_list)) { + + proc_list = cabi->cpu_proc_list.next; + while (proc_list != &cabi->cpu_proc_list) { + rs_proc = list_entry (proc_list, + struct rs_proc_list, rs_proc_list); + /* if there are tasks in the list + * set static priority + */ + if(rs_proc->rs_proc_task) { + + //check_sched_param (rs_proc->rs_proc_task); + + param.sched_priority = + CABI_RT_MAX_PRIORITY; + + cabi_sched_setscheduler ( + current->pid, + SCHED_FIFO, ¶m); + } + proc_list = proc_list->next; + } + } + + EXIT; + return 0; +} + diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_init.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_init.c --- ./linux-2.6.18.1/drivers/cabi/cabi_init.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_init.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,154 @@ + /* + * linux/drivers/cabi/cabi_init.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * This project was developed under the direction of Dr. Tatsuo Nakajima. + * + * Authors: Midori Sugaya, Hirotaka Ishikawa + * + * Please send bug-reports/suggestions/comments to qos@dcl.info.waseda.ac.jp + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include + +extern void cabi_account_init(void); +extern void cabi_proc_init(void); + +spinlock_t cabi_lock; +cpu_tick_data_t cabi_cpu_ticks_per_second; +cpu_tick_data_t cabi_cpu_ticks_per_jiffy; +cpu_tick_data_t cabi_timer_adjust; + +#ifdef CONFIG_X86 +#include +#define CHECK_TIME 5 +void +cabi_cpu_calibration_i386(void) +{ + + unsigned long first_jiffies, rem, ctps; + int i = CHECK_TIME; + cpu_tick_data_t begin, end, err, result; + + ENTER; + printk("Cabi_cpu_calibration: Calibrating cpu "); + + cabi_rdticks(&begin); + cabi_rdticks(&end); + err = end - begin; + +/* for debugging */ + /* wait for "start of" clock tick */ + first_jiffies = jiffies; + while (first_jiffies == jiffies) { + /* nothing */ + } + cabi_rdticks(&begin); + while (i-- > 0) { + first_jiffies = jiffies; + printk("."); + while (jiffies - first_jiffies < HZ) { + + } + } + cabi_rdticks(&end); + /* */ + + result = (end - begin - err); + cabi_cpu_ticks_per_second = result; + + /* use 64bit interface */ + ctps = div_long_long_rem(result, CHECK_TIME, &rem); + /* set the calculated result */ + cabi_cpu_ticks_per_second = (unsigned long long) ctps; + + + EXIT; +} +#else /* #ifdef CONFIG_X86 */ +void +cabi_cpu_calibration(void) +{ + printk("cabi initialize...\n"); + cabi_cpu_ticks_per_second = CABI_CLOCK; + { + cpu_tick_data_t t; + t = CABI_USECS_PER_JIFFY; + usec2tick(&t, &cabi_cpu_ticks_per_jiffy); + } +} +#endif /* #ifdef CONFIG_X86 */ + +void +cabi_init(void) +{ + spin_lock_init(&cabi_lock); + cabi_account_init(); +#ifdef CONFIG_X86 + cabi_cpu_calibration_i386(); +#else + cabi_cpu_calibration(); +#endif /* #ifdef CONFIG_X86 */ + +#ifdef CONFIG_PROC_FS + cabi_proc_init(); +#endif +} diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_isr.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_isr.c --- ./linux-2.6.18.1/drivers/cabi/cabi_isr.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_isr.c 2007-06-17 00:26:20.000000000 +0900 @@ -0,0 +1,193 @@ +/* + * linux/drivers/cabi/cabi_isr.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * This project was developed under the direction of Dr. Tatsuo Nakajima. + * Authors: Midori Sugaya, Hirotaka Ishikawa + * Please send bug-reports/suggestions/comments to doly@dcl.info.waseda.ac.jp + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern cabi_account_t cabi_current_account; +/* + * Hooks + */ +int (*cabi_ret_with_reschedule_hook) (void); + +/* + * Checks + */ +void cabi_debug_current (cabi_account_t cabi_current) +{ + if (cabi_current) { + cabi_debug("current_account %x\n", + (int) cabi_current); + } +} +void +cabi_account_sleep_on (cabi_account_t cabi) +{ + + + if (unlikely(irqs_disabled())) { + printk(KERN_ERR "BUG1: CABI %s/0x%08x/%d\n", + current->comm, preempt_count(), current->pid); + } + sleep_on (&cabi->depleted_wait); + + +} + +/* + * For "cabi_ret_with_reschedule_hook", cabi_ret_with_reschedule is called when + * a process exits from the kernel after processing a system call. + */ + +asmlinkage int +cabi_ret_with_reschedule(void) +{ + cabi_account_t cabi; + cabi_current_account = (cabi_account_t)TASK_ACCOUNT(current); + + /* set current account */ + if ((cabi = cabi_current_account)) { + cabi->ops->isr (cabi); + } + return 0; +} + +void +cabi_enable_isr(void) +{ + unsigned long flags; + + ENTER; + cabi_debug(" cabi_enable_isr %x reschedule_hook %x reschedule\n", + (int) cabi_ret_with_reschedule_hook, + (int) cabi_ret_with_reschedule); + + if (cabi_ret_with_reschedule_hook == &cabi_ret_with_reschedule) { + cabi_debug("cabi_enable_isr : hook already set.\n"); + return; + } + + cabi_spin_lock(flags); + { + /* install hooks */ + cabi_ret_with_reschedule_hook = &cabi_ret_with_reschedule; + + cabi_debug ("enable_isr: set ret_with_reschedule_hook %x\n" + " <== address %x (reschedule)\n", + (int) cabi_ret_with_reschedule_hook, + (int) cabi_ret_with_reschedule); + + /* start cabi timer */ + cabi_timer_start(); + } + cabi_spin_unlock(flags); + + EXIT; +} + +void +cabi_disable_isr(void) +{ + unsigned long flags; + + ENTER; + if (cabi_ret_with_reschedule_hook == (void *) 0) + return; + + cabi_spin_lock(flags); + { + /* remove hooks */ + cabi_ret_with_reschedule_hook = (void *) 0; + } + cabi_spin_unlock(flags); + EXIT; +} + + +#ifdef CONFIG_PROC_FS + +int +cabi_account_read_block_proc(cabi_account_t cabi, char *buf) +{ + char *p = buf; + + if (cabi->pm.operation == OP_SIGNAL) { + p += sprintf(p, "%d\n%lu\n", cabi->opt_state, + cabi->signal_count); + } else { + p += sprintf(p, "%d\n%lu\n", cabi->opt_state, + cabi->block_count); + } + return (p - buf); +} + +#endif /* CONFIG_PROC_FS */ diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_overload.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_overload.c --- ./linux-2.6.18.1/drivers/cabi/cabi_overload.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_overload.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,432 @@ +/* + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University + * Nakajima Laboratory. MontaVista Software, Inc. + * + * Midori Sugaya + * + */ + +#include +#include +#include +#include + +/* + * -------------------------------------------------------------- + * Policy : Overload Monitoring + * --------------------------------------------------------------- + * Status : RUNNING running when the replenish timer expires. + * : DEPLETED depleted when the replenish timer expires, + * : NULL + * --------------------------------------------------------------- + * Operation : BLOCK change status of the operation + * : SIGNAL change status of the operation + * --------------------------------------------------------------- + * Priority : Not control, use user defined priority + * --------------------------------------------------------------- + */ + +extern int overload_cabi; +extern cabi_account_t cabi_current_account; +extern cabi_account_t cabi_current_overload_account; +int is_ovl_running; + +void ovl_replenish (cabi_account_t); +void ovl_replenish_capacity(cabi_account_t); +void ovl_account_status_in(cabi_account_t); +void ovl_account_status_out(cabi_account_t); +void ovl_statistic_calculation(cabi_account_t); +void ovl_replenish_operation (cabi_account_t); +void ovl_isr (cabi_account_t); +void ovl_isr_operation (cabi_account_t); + + +struct cabi_account_operations ovl_ops = { + .replenish = ovl_replenish, + .isr = ovl_isr, + .isr_operation = ovl_isr_operation, +}; + +void +ovl_replenish (cabi_account_t cabi) +{ + + ENTER; + ovl_account_status_in(cabi); + ovl_statistic_calculation (cabi); + ovl_replenish_capacity(cabi); + ovl_account_status_out (cabi); + EXIT; + +} + +void +ovl_replenish_capacity (cabi_account_t cabi) +{ + + ENTER; + /* debug */ + cabi_debug_ticks(cabi); + cabi_debug_info(cabi); + + /* reset the used ticks */ + cabi->cpu_period_used_ticks = 0; + EXIT; +} + +void +ovl_stop_account (cabi_account_t cabi) +{ + cpu_tick_data_t now; + + ENTER; + cabi_rdticks(&now); + cabi_stop_account(cabi, &now); + is_ovl_running++; + EXIT; +} +void +ovl_account_status_in (cabi_account_t cabi) +{ + + ENTER; + switch (cabi->cpu_state) { + case CABI_IS_NULL: + /* 00 : no bind process */ + CABI_DBG; + cabi_debug_state(cabi); + break; + case CABI_IS_RUNNING: + /* 01 : passed the period */ + CABI_DBG; + cabi_debug_state(cabi); + ovl_stop_account(cabi); + break; + case CABI_IS_DEPLETED: + /* 10 : correctly accounted */ + CABI_DBG; + cabi_debug_state(cabi); + break; + case CABI_IS_RUNNING|CABI_IS_DEPLETED: + /* 11 : fist time to account */ + CABI_DBG; + cabi_debug_state(cabi); + ovl_stop_account(cabi); + break; + default: + /* it should not be */ + CABI_DBG; + cabi_debug_state(cabi); + break; + } + + /* it should be changed to the initial status, CABI_IS_NULL. */ + cabi->cpu_state &= ~CABI_IS_DEPLETED; + + /* check overload condition */ + if (overload_cabi) + cabi_account_check_overload(); + + EXIT; +} + +void +ovl_replenish_operation (cabi_account_t cabi) +{ + ENTER; + + switch (cabi->pm.operation) { + case OP_BLOCK: + cabi_debug("[op_block] op: %d\n", + cabi->pm.operation); + cabi->opt_state = CABI_UNBLOCK; + wake_up(&cabi->depleted_wait); + break; + case OP_SIGNAL: + cabi_debug("[op_signal] op: %d\n", + cabi->pm.operation); + break; + default: + break; + } + EXIT; +} + +int ovl_bonus; + +void +ovl_account_status_out (cabi_account_t cabi) +{ + + unsigned long long avail = cabi->cpu_period_available_ticks; + unsigned long long used = cabi->cpu_period_prev_used_ticks; + unsigned long long cpu_time = cabi->cpu_time_ticks; + + ENTER; + if (avail > 0){ + switch (cabi->pm.rep_policy) { + case REP_SOFT: + CABI_DBG; + if (avail != cabi->cpu_time_ticks) { + cabi_debug_ticks (cabi); + cpu_time_replenish_init(cabi); + } + break; + case REP_HARD: + CABI_DBG; + if (ovl_bonus) { + ovl_bonus = ~ovl_bonus; + cpu_time_replenish_init(cabi); + break; + } else { + ovl_bonus++; + if (cpu_time > used) { + CABI_DBG; + /* used less than the capacity */ + cpu_time_replenish_init(cabi); + cabi->cpu_period_available_ticks + += (cpu_time - used); + cabi_debug_avail_ticks (cabi); + } else if (cpu_time < used) { + CABI_DBG; + /* used more than the capacity */ + cpu_time_replenish_init(cabi); + cabi->cpu_period_available_ticks + -= (used - cpu_time); + cabi_debug_avail_ticks (cabi); + } else { + CABI_DBG; + /* equal to avail and used */ + cabi_debug_avail_ticks (cabi); + } + } + break; + default: + /* if not set anything, this path is taken. */ + break; + } + ovl_replenish_operation(cabi); + + } else { + CABI_DBG; + cabi_debug_avail_ticks(cabi); + cpu_time_replenish_init(cabi); + + if (is_ovl_running) + cabi_start_account(cabi); + + } + EXIT; +} + + +void +ovl_statistic_calculation(cabi_account_t cabi) +{ + + cpu_capacity_t c; + + ENTER; + /* update statistics */ + cabi->cpu_period_prev_used_ticks = + cabi->cpu_period_used_ticks; + cabi->cpu_total_used_ticks += + cabi->cpu_period_used_ticks; + + /* check capacity */ + c = capacity_of(cabi->cpu_period_used_ticks, + cabi->cpu_period_ticks); + + /* debug */ + cabi_debug_capacity(c); + + if (cabi->cpu_max_utilization < c) { + cabi->cpu_max_utilization = c; + + /* capacity is exceed the max utilization */ + cabi_debug_capacity(c); + + } else if (cabi->cpu_min_utilization > c + || cabi->cpu_min_utilization == 0) { + + cabi->cpu_min_utilization = c; + + /* capacity: less than the min util */ + cabi_debug_capacity(c); + + } + + if (cabi->cpu_max_utilization < c) { + cabi->cpu_max_utilization = c; + + /* capacity is exceed the max utilization */ + cabi_debug_capacity(c); + + } else if (cabi->cpu_min_utilization > c + || cabi->cpu_min_utilization == 0) { + + cabi->cpu_min_utilization = c; + + /* capacity: less than the min util */ + cabi_debug_capacity(c); + } + + /* summation for average */ + cabi->cpu_average.total_utils += c; + cabi->cpu_average.total_count++; + + EXIT; +} + + +void +ovl_isr (cabi_account_t cabi) +{ + + ENTER; + if (cabi_account_overload(cabi)) + ovl_isr_operation(cabi); + + EXIT; +} + +void +ovl_isr_operation (cabi_account_t cabi) +{ + + ENTER; + switch (cabi->pm.operation) { + case OP_SIGNAL: + CABI_DBG; + if (cabi->opt_state != CABI_SIGNAL) + cabi->opt_state = CABI_SIGNAL; + if (!cabi->signal_block) { + cabi_send_signal( + cabi, CABI_SIGPID(cabi), + CABI_SIGNUM(cabi)); + } + break; + default: + cabi_current_account = NULL_ACCOUNT; + CABI_DBG; + cabi_debug ("cabi_ret_with_reschedule:" + "cabi(0x%x) operation(%d)\n", + (int) cabi, cabi->pm.operation); + + break; + } + EXIT; +} + + + + +/* + * Name: capacity_overload + * + * This function return the new_qc value as a cpu time within + * a period. + */ +void +capacity_overload(struct timespec *c, struct timespec * t, + cpu_capacity_quad_t * overload_qc) +{ + + cpu_capacity_quad_t qc, qt, new_nano_qc; + cpu_capacity_t capacity; + + /* realculate requested capacity */ + qc = (c->tv_sec * NANOSEC) + c->tv_nsec; + qt = (t->tv_sec * NANOSEC) + t->tv_nsec; + + /* current requested capacity */ + capacity = capacity_of(qc, qt); + cabi_debug("[over] capacity(%lu.%02lu)\n", + CAPACITY_INT(capacity), CAPACITY_FRAC(capacity)); + + new_nano_qc = qt - qc; + cabi_debug("[over] new nano_qc %llu\n", + new_nano_qc); + overload_qc = &new_nano_qc; + cabi_debug("[over] overload_qc (%p),\n", + (cpu_capacity_quad_t *)overload_qc); + + +} + +int setup_overload_id (cabi_account_t cabi) +{ + + + if (!cabi_current_overload_account) { + /* + * set reserved id for the overload cabi + */ + cabi->cabi_id = OVERLOAD_CABI_ID; + cabi_current_overload_account = cabi; + + cabi_debug("[overload] overload id set %ld %x\n", + cabi->cabi_id, (int) cabi_current_overload_account); + + } else { + /* + * if there has been existed a overload cabi, + * return with error. + */ + cabi_debug("[ovl_set:02] overload already exist %x. error return.\n", + (int) cabi_current_overload_account); + + free (cabi); + return CABI_ERROR; + } + + return CABI_SUCCESS; +} + +int +cabi_account_check_overload(void) +{ + cabi_account_t cabi = NULL_ACCOUNT; + + /* search overload cabi */ + if (!cabi_current_overload_account) { + return CABI_ENOEXIST; + } else { + cabi = cabi_current_overload_account; + } + + if (cabi != cabi_current_account) + return CABI_ENOAVLE; + + /* compare the total used ticks with available ticks. */ + if (cabi->cpu_period_used_ticks < cabi->cpu_period_available_ticks) { + + cabi_debug("cabi_account_check_overload:" + " cabi(0x%x) used(%lu) < available(%lu)\n", + (int) cabi, + (unsigned long) cabi->cpu_period_used_ticks, + (unsigned long) cabi->cpu_period_available_ticks); + + if (cabi->cabi_id == OVERLOAD_CABI_ID) { + cabi->overload |= CABI_IS_OVERLOAD; + + cabi_debug("cabi state overload(0x%x), signal sent.\n", + (int) cabi->overload); + + cabi_send_signal(cabi, CABI_SIGPID(cabi), + CABI_SIGNUM(cabi)); + } + } else { + if (cabi->cabi_id == OVERLOAD_CABI_ID) { + + cabi_debug("overload(0x%x), no signal sent.\n", + (int) cabi->overload); + + cabi->overload = CABI_IS_NULL; + } + } + return CABI_SUCCESS; +} + diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_procfs.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_procfs.c --- ./linux-2.6.18.1/drivers/cabi/cabi_procfs.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_procfs.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,209 @@ + /* + * linux/drivers/cabi/cabi_procfs.c - implements the interfaces for procfs. + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * This project was developed under the direction of Dr. Tatsuo Nakajima. + * + * Authors: Midori Sugaya, Hirotaka Ishikawa + * + * Please send bug-reports/suggestions/comments to doly@dcl.info.waseda.ac.jp + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +#include +#include +#include + +#include + +#define MAX_LEN 10 +#define NR_CABI 1000 +#define ENTRIES 4 + + +#ifdef CONFIG_PROC_FS + +/* proc root directory */ +static struct proc_dir_entry *proc_cabi_dir; +/* proc object_id directories */ +static struct proc_dir_entry *cabi_id_dir [NR_CABI]; + +static int +proc_cabi_status_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + cabi_account_t cabi = (cabi_account_t) data; + return cabi_account_read_proc(cabi, page); +} + +static int +proc_cabi_base_status_read (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char *p = page; + extern int cabi_account_base_status_proc(char *); + + p += cabi_account_base_status_proc(p); + + return (p - page); +} + +static int +proc_cabi_block_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + cabi_account_t cabi = (cabi_account_t) data; + return cabi_account_read_block_proc(cabi, page); +} + +static int +proc_cabi_bindpid_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + cabi_account_t cabi = (cabi_account_t) data; + return cabi_account_read_bindpid_proc (cabi, page); +} + +static int +proc_cabi_time_read (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + cabi_account_t cabi = (cabi_account_t) data; + return cabi_account_read_time_proc(cabi, page); +} + +void __init +cabi_proc_init(void) +{ + struct proc_dir_entry *proc_cabi_status; + + /* proc directory entry */ + proc_cabi_dir = create_proc_entry("cabi", S_IFDIR, 0); + + if (!proc_cabi_dir) { + printk("Cannot create /proc/cabi\n"); + } + + proc_cabi_status = create_proc_entry("cabi_status", S_IFREG | S_IRUGO, + proc_cabi_dir); + proc_cabi_status->read_proc = proc_cabi_base_status_read; +} + + +struct cabi_entry_list { + const char *name; + int (*f)(char *, char **, off_t, int, int *, void *); +} CABI_PROC_ENTRY[] = { + { "status", proc_cabi_status_read}, + { "term_act", proc_cabi_block_read}, + { "bind_pid", proc_cabi_bindpid_read}, + { "time_set", proc_cabi_time_read}, +}; + +void +cabi_register_proc_account(cabi_account_t cabi) +{ + int i; + struct proc_dir_entry *entry; + unsigned char string[MAX_LEN]; + + sprintf(string, "%d", (int) cabi->cabi_id); + cabi_id_dir[cabi->cabi_id] = proc_mkdir (string, proc_cabi_dir); + + if (!cabi_id_dir[cabi->cabi_id]) + printk("Cannot create /proc/cabi/%d\n", (int)cabi->cabi_id); + + + for (i = 0; i < ENTRIES; i++) { + entry = create_proc_entry (CABI_PROC_ENTRY[i].name, + S_IFREG|S_IRUGO, cabi_id_dir[cabi->cabi_id]); + + if (!entry) + printk("Cannot create /proc/cabi/%d/%s\n", + (int) cabi->cabi_id, CABI_PROC_ENTRY[i].name); + + entry->nlink = 1; + entry->data = cabi; + entry->read_proc = *CABI_PROC_ENTRY[i].f; + } +} + + +void +cabi_proc_account_create(cabi_account_t cabi) +{ + + cabi_debug ("cabi_proc_account_create: cabi(0x%x) cabi_id (%d) \n", + (int) cabi, (int) cabi->cabi_id); + cabi_register_proc_account(cabi); + + +} + +void +cabi_proc_account_destroy(cabi_account_t cabi) +{ + char buf[16]; + + cabi_debug ("cabi_proc_account_destroy: cabi(%d)", + (int) cabi->cabi_id); + sprintf(buf, "%d", (int) cabi->cabi_id); + remove_proc_entry(buf, proc_cabi_dir); +} + +#endif /* CONFIG_PROC_FS */ diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_sched.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_sched.c --- ./linux-2.6.18.1/drivers/cabi/cabi_sched.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_sched.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,158 @@ + /* + * linux/drivers/cabi/cabi_sched.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * This project was developed under the direction of Dr. Tatsuo Nakajima. + * Authors: Midori Sugaya, Hirotaka Ishikawa + * Please send bug-reports/suggestions/comments to qos@dcl.info.waseda.ac.jp + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANYARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include + + /**/ +/* #undef DEBUG_CABI_SCHED */ + /**/ +extern cabi_account_t cabi_current_account; + +void (*cabi_schedule_hook) (struct task_struct *, struct task_struct *); + +void +cabi_schedule_cpu(struct task_struct *prev, struct task_struct *next) +{ + cpu_tick_data_t now; + cabi_account_t prev_cabi = TASK_ACCOUNT(prev); + cabi_account_t next_cabi = TASK_ACCOUNT(next); + + + if (prev_cabi || next_cabi) { + cabi_debug + ("pid(%d) cabi(0x%x)" + " -> pid(%d) cabi(0x%x) \n", + prev->pid, (int) prev_cabi, + next->pid, (int) next_cabi); + } + + /* + * In this point, if the process is belong to the different + * accounting object (cabi), the enforce timer should be + * stopped at this time. + */ + + if (prev_cabi != next_cabi) { + //cabi_enforce_timer_cancel(); + /* if there is a current cabi, stop it */ + + /* debug */ + cabi_debug_entities(cabi_current_account, prev_cabi); + + /* + * if the previous scheduled process is belong to the + * prev_cabi, accounting object (cabi) should be checked + * whether it is deplated or not through the function + * cabi_stop_account (cabi_account_check_enforce) + */ + if (prev_cabi == cabi_current_account) { + + /* debug */ + cabi_debug_entities (cabi_current_account, + prev_cabi); + + if (cabi_current_account) { + + /* get time and stop accounting */ + cabi_rdticks(&now); + cabi_stop_account(cabi_current_account, &now); + } + } + + /* if there is a cabi for next, start it */ + if ((cabi_current_account = next_cabi)) { + + + /* debug */ + cabi_debug_entities (cabi_current_account, + next_cabi); + /* start accounting */ + cabi_start_account(next_cabi); + } + } + //cabi_current_account = next_cabi; +} + +void +cabi_enable_schedule_cpu(void) +{ + if (!cabi_schedule_hook) { + cabi_schedule_hook = cabi_schedule_cpu; + cabi_debug + ("cabi_enable_schedule_cpu: cabi_schedule_hook enabled\n"); + } +} + +void +cabi_disable_schedule_cpu(void) +{ + if (cabi_schedule_hook) { + cabi_schedule_hook = (void *) 0; + + cabi_debug + ("cabi_disable_schedule_cpu:cabi_schedule_hook disabled\n"); + } +} diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_signal.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_signal.c --- ./linux-2.6.18.1/drivers/cabi/cabi_signal.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_signal.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,118 @@ + /* + * linux/drivers/cabi/cabi_signal.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * This project was developed under the direction of Dr. Tatsuo Nakajima. + * + * Authors: Midori Sugaya, Hirotaka Ishikawa + * + * Please send bug-reports/suggestions/comments to qos@dcl.info.waseda.ac.jp + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include + +int +cabi_send_signal(cabi_account_t cabi, int pid, int sig) +{ + + int error; + struct siginfo info; + struct task_struct *tsk; + + /* Set the siginfo */ + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_KERNEL; + info.si_pid = 0; + info.si_uid = 0; + + if (sig <= 0) + return -EINVAL; + + /* This is only valid for normal process(not idle process). */ + if (pid <= 0) + return -EINVAL; + + /* search process by pid */ + if (!(tsk = __cabi_find_process_by_pid(pid))) { + + /* debug */ + cabi_debug_signal_pid (pid); + + /* return */ + error = -ESRCH; + return error; + } else { + + /* debug */ + cabi_debug_signal_attrs (pid, sig, tsk); + + /* send signal */ + error = send_sig_info(sig, &info, tsk); + + if (error > 0) { + cabi->signal_block = SIGNAL_ON; + cabi->signal_count++; + } + + } + return error; +} diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_syscalls.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_syscalls.c --- ./linux-2.6.18.1/drivers/cabi/cabi_syscalls.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_syscalls.c 2007-06-17 00:34:48.000000000 +0900 @@ -0,0 +1,597 @@ +/* + * linux/drivers/cabi/cabi_account.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * 2007 Modified by Midori Sugaya to change interfaces. + * 2006-9-12. Modified by Midori Sugaya to fix bugs in tick calculation + * and make adjustment for 2.6. + * 2006-2. Porting to kernel-2.6 Takeharu KATO (ARM, SH) + * 2005-2. New release based on LinuxRK. by Midori SUGAYA, Hirotaka + * ISHIKAWA. + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * This file is derived from software distributed under the following terms: + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +/* + * - struct regist - is for manipulating the group of + * processes. Especially used in the binding procedure + * of the group of processes. + */ +struct rglist { + int count; + pid_t pid; + struct task_struct *tsk; + struct list_head rg_link; +}; +typedef struct rglist *rglist_t; +struct list_head rglist_head; + +/* + * Linux proc file system interface + */ +#include +#include + +extern int cabi_sanity_check_euid(struct task_struct *tsk); + +/* + * Name: sys_cabi_account_create + * + * Create a accounting object named cabi. Mainly sanitary checks + * of the parameters. + * + */ +asmlinkage int +sys_cabi_account_create(struct cabi_uaccount *ucabi) +{ + struct cabi_uaccount user_cabi; + cabi_account_t cabi; + unsigned long ret; + + /* permission check */ + if (current->euid) { + cabi_sanity_check_euid (current); + goto error_return; + } + + CABI_DBG; + /* parameter copy */ + if (copy_from_user(&user_cabi, ucabi, + sizeof (struct cabi_uaccount))) + return CABI_EINVAL; + + /* parameter check */ + if ((ret = cabi_sanity_check_ucabi (&user_cabi)) > 0) { + goto error_return; + } + + /* cpu_time and cpu_period should be checked. */ + if ((ret = cabi_sanity_check_timespec_c_and_t + (&user_cabi.cpu_time, + &user_cabi.cpu_period)) > 0) { + goto error_return; + } + + /* check policy integrity */ + cabi_sanity_check_operation (&user_cabi); + + /* + * for overload cabi, check parameters and set a new cpu_time. + */ + if (user_cabi.pm.bind_proc_type == BIND_IDLE_PROC) { + cabi_sanity_check_overload_id (); + cabi_sanity_check_sigoperation (&user_cabi); + + } + + /* parameter check */ + cabi_sanity_check_parameter_to_copy + (&user_cabi.cpu_time, + &user_cabi.cpu_period, + &user_cabi.pm); + + /* call create API */ + if (!(cabi = cabi_account_create(&user_cabi.cpu_time, + &user_cabi.cpu_period, + &user_cabi.pm))) + goto error_return; + + /* copy the result vaule to return the caller */ + ret = copy_to_user(&ucabi->cabi_id, &cabi->cabi_id, + sizeof (cabi_object_t)); + + if (ret < 0) + goto error_return; + + return CABI_SUCCESS; + +error_return: + free (&user_cabi); + free (&ucabi); + EXIT; + + + return CABI_CREATE_ERR; + +} + + +/* + * Name: sys_cabi_account_set + * + * Set the parameters to a specified accounting object (cabi). + */ +asmlinkage int +sys_cabi_account_set(unsigned long cabi_id, struct cabi_uaccount *ucabi) +{ + struct cabi_uaccount user_cabi; + cabi_account_t cabi; + int ret; + + /* permission check */ + if (current->euid) { + cabi_sanity_check_euid (current); + return CABI_EACCESS; + } + + /* null pointer check */ + if (!ucabi) + return CABI_EINVAL; + + /* parameter copy */ + if (copy_from_user(&user_cabi, ucabi, + sizeof (struct cabi_uaccount))) + return CABI_EINVAL; + + /* parameter check */ + if ((ret = cabi_sanity_check_ucabi (&user_cabi)) > 0) { + goto error_return; + } + + /* cpu_time and cpu_period should be checked. */ + if ((ret = cabi_sanity_check_timespec_c_and_t + (&user_cabi.cpu_time, + &user_cabi.cpu_period)) > 0) { + goto error_return; + } + + if ((cabi = search_cabi(cabi_id)) == NULL) { + return CABI_ENOEXIST; + } else { + if (user_cabi.pm.bind_proc_type != + cabi->pm.bind_proc_type) + return CABI_EINVAL; + } + + /* + * for overload cabi, check parameters and set a new cpu_time. + */ + if (user_cabi.pm.bind_proc_type == BIND_IDLE_PROC) { + if ((cabi = search_cabi(OVERLOAD_CABI_ID)) == NULL) { + return CABI_EINVAL; + } + if (user_cabi.pm.operation != OP_SIGNAL) { + return CABI_EINVAL; + } + } + + /* pass the new parameter to cabi */ + ret = cabi_account_set(cabi_id, + &user_cabi.cpu_time, + &user_cabi.cpu_period, + &user_cabi.pm); + + if (ret < 0) { + return CABI_ERROR; + } else + return CABI_SUCCESS; +error_return: + free (&ucabi); + free (&user_cabi); + EXIT; + + TEXIT; + return CABI_ERROR; +} + + +/* + * Name: sys_cabi_account_get + * + * Get the parameters from a specified accounting object (cabi). + */ + +asmlinkage int +sys_cabi_account_get(unsigned long cabi_id, struct cabi_uaccount *ucabi) +{ + struct list_head *proc_list; + struct rs_proc_list *rs_proc = NULL; + unsigned long ret; + cabi_account_t cabi = NULL_ACCOUNT; + cpu_capacity_quad_t rev_qc; + + /* null pointer check */ + if (!ucabi) + return CABI_EINVAL; + + /* find cabi address */ + if (!(cabi = search_cabi(cabi_id))) + return CABI_ENOEXIST; + + proc_list = cabi->cpu_proc_list.next; + while (proc_list != &cabi->cpu_proc_list) { + /* get a rs_proc */ + rs_proc = + list_entry(proc_list, + struct rs_proc_list, rs_proc_list); + + /* next element */ + proc_list = proc_list->next; + } + + /* if this is overload cabi, re-calculate parameters. */ + if (cabi->pm.bind_proc_type == BIND_IDLE_PROC){ + capacity_overload(&cabi->cpu_time, + &cabi->cpu_period, &rev_qc); + ret = copy_to_user(&ucabi->cpu_time, &rev_qc, + sizeof (ucabi->cpu_time)); + } else { + ret = copy_to_user(&ucabi->cpu_time, &cabi->cpu_time, + sizeof(ucabi->cpu_time)); + } + + ret = copy_to_user(&ucabi->cpu_period, &cabi->cpu_period, + sizeof (ucabi->cpu_period)); + ret = copy_to_user(&ucabi->pm, &cabi->pm, + sizeof (ucabi->pm)); + ret = copy_to_user(&ucabi->cabi_id, &cabi->cabi_id, + sizeof (ucabi->cabi_id)); + + if (ret < 0) + return CABI_ERROR; + + return CABI_SUCCESS; +} + + +/* + * Name: sys_cabi_account_destory + * + * Destory a accounting object. + * + */ +asmlinkage int +sys_cabi_account_destroy(unsigned long cabi_id) +{ + int ret; + + /* permission check */ + if (current->euid) { + cabi_sanity_check_euid (current); + return CABI_EACCESS; + } + if (cabi_id <= 0) + return CABI_EINVAL; /* invalid argument */ + + ret = cabi_account_destroy(cabi_id); + return ret; +} + + +/* + * Name: sys_cabi_account_bind_pid + * + * Bind a process to a accounting object. + * + */ +asmlinkage int +sys_cabi_account_bind_pid(unsigned long cabi_id, pid_t pid) +{ + + int ret; + struct task_struct *tsk; + + /* permission check */ + if (current->euid) { + cabi_sanity_check_euid (current); + return CABI_EACCESS; + } + + /* parameter check */ + if (pid_check(pid)) { + return CABI_EINVAL; /* invalid argument */ + } + + /* find overload cabi address. */ + if (cabi_id == OVERLOAD_CABI_ID) { + if (pid != IDLE_PROCESS) { + bind_pid_error(pid); + return CABI_EINVAL; + } + if (!(tsk = __cabi_find_idle_process(pid))) { + bind_pid_error(pid); + return CABI_EPNOEXIST; /* PID(0) dose not exist. */ + } + /* idle process attached. */ + } else { + if (!(tsk = __cabi_find_process_by_pid(pid))) { + bind_pid_error(pid); + return CABI_EPNOEXIST; /* PID dose not exist. */ + } + } + + /* if the task has been registered, rerun. */ + if (TASK_ACCOUNT(tsk)) { + cabi_account_attached (); + if (cabi_id == TASK_ACCOUNT(tsk)->cabi_id) { + /* PID is already registered into AO. */ + return CABI_EREGIST; + } else { + /* PID is registered into another AO. */ + return CABI_ENOAVLE; + } + } + + /* call a function to attach */ + ret = cabi_account_attach(cabi_id, tsk); + return ret; + +} + + +/* + * Name: sys_cabi_account_bind_pgid + * + * Bind a grounp of processes to a accounting object. + * + */ +asmlinkage int +sys_cabi_account_bind_pgid(unsigned long cabi_id, pid_t pgid) +{ + + cabi_account_t cabi; + struct rglist *rg, *tmp; + struct list_head *reg_list; + struct task_struct *tsk; + int i, ret; + int already_registered, shouldbe_registered, + group_member, rglist_count; + + + /* permission check */ + if (current->euid) { + cabi_sanity_check_euid (current); + return CABI_EACCESS; + } + + /* if pgid is 0 or < 0, error return */ + if (pgid_check (pgid)) { + return CABI_EINVAL; + } + + already_registered = shouldbe_registered = + group_member = rglist_count = 0; + + INIT_LIST_HEAD(&rglist_head); + + /* check cabi_id */ + if (cabi_id_check(cabi_id)) { + return CABI_EINVAL; /* invalid argument */ + } + + /* current(caller) check */ + if (TASK_ACCOUNT(current)) { + cabi = TASK_ACCOUNT(current); + if (cabi->cabi_id == cabi_id) { + already_registered++; + } else { + /* If the caller was already binded to other AO, we + * return error. */ + return CABI_ENOAVLE; + } + } else { + /* + * If the caller process has not bind any AO, we shoud + * bind it to the requested AO. But we will not do + * anything here, because for_each_task() routine picks + * up a process as a member of the target group process. + */ + } + + /* + * If the process which is belong to the same group should be + * bind the requested AO. + */ + read_lock(&tasklist_lock); + for_each_process(tsk) { + if (process_group(tsk) == pgid) { + group_member++; + printk("bind_pgid: tsk->pgid %d group_member %d\n", + process_group(tsk), group_member); + /* + * The process which has the right pgid was found. + * Next, we shoud check if this process has been + * already bind to other cabi, if so, error return. + */ + if (TASK_ACCOUNT(tsk)) { + printk("bind_pgid: this account already " + "attached to this process. " + " pid(%d) cabi(0x%x)\n", + tsk->pid, (int) TASK_ACCOUNT(tsk)); + /* + * If the process has been registered + * the requested cabi, count up, else + * error return. + */ + cabi = TASK_ACCOUNT(tsk); + if (cabi->cabi_id == cabi_id) { + already_registered++; + } else { + read_unlock(&tasklist_lock); + return CABI_ENOAVLE; + } + } else { + /* If the task has not been registered, + * we should bind it. But here, we only + * registerd the list. + */ + if((rg = malloc(sizeof + (struct rglist))) == NULL) { + read_unlock(&tasklist_lock); + return CABI_ENOMEM; + } + bzero(rg, sizeof (struct rglist)); + INIT_LIST_HEAD(&rg->rg_link); + + rg->pid = tsk->pid; + rg->tsk = tsk; + + list_add(&rg->rg_link, &rglist_head); + rglist_count++; + shouldbe_registered++; + } + } + } + read_unlock(&tasklist_lock); + + if (group_member == 0) { + return CABI_EPGNOEXIST; + } else if (group_member == already_registered) { + return CABI_EREGIST; + } else { + /* regist processes */ + reg_list = &rglist_head; + for (i = 0; i < shouldbe_registered + 1; i++) { + if (reg_list == &rglist_head) { + reg_list = reg_list->next; + } else { + tmp = list_entry(reg_list, + struct rglist, rg_link); + if ((ret = + cabi_account_attach(cabi_id, + tmp->tsk))!= CABI_SUCCESS) { + return ret; + } + reg_list = reg_list->next; + } + } + } + + TEXIT; + + return CABI_SUCCESS; +} + + +/* + * Name: sys_cabi_account_unbind + * + * Unbind a process which has been attached to a specified + * accounting object. + * + */ +asmlinkage int +sys_cabi_account_unbind(pid_t pid) +{ + + struct task_struct *tsk; + int ret; + + /* permission check */ + if (current->euid) { + cabi_sanity_check_euid (current); + return CABI_EACCESS; + } + + /* pid check */ + if (pid_check(pid)) { + return CABI_EINVAL; /* Invalid argument */ + } + + /* find overload cabi address. */ + if (pid == IDLE_PROCESS) { + if (!(tsk = __cabi_find_idle_process(pid))) { + printk("account_unbind: idle (%d) failed.\n", pid); + return CABI_EPNOEXIST; /* PID(0) dose not exist. */ + } + } else { + if (!(tsk = __cabi_find_process_by_pid(pid))) { + printk("account_unbind: invalid pid(%d)\n", pid); + return CABI_EPNOEXIST; /* PID dose not exist. */ + } + } + + if (!TASK_ACCOUNT(tsk)) { + printk("account_unbind: no account attached to " + "this process. pid(%d)\n", tsk->pid); + return CABI_ENOBIND; + } + + ret = cabi_account_detach(tsk); + return ret; +} + +asmlinkage int +sys_cabi_account_eval (int eventtype) +{ + + int flag; + + switch (eventtype) { + case 0: + cabi_dump_init(); + break; + case 1: /* this is the first time */ + flag = 1; + cabi_dump_read(flag); + break; + case 2: + flag = 0; + cabi_dump_read(flag); + break; + case 3: + cabi_dump_end(); + break; + default: + break; + + } + return 0; +} + diff -urN ./linux-2.6.18.1/drivers/cabi/cabi_timer.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_timer.c --- ./linux-2.6.18.1/drivers/cabi/cabi_timer.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/cabi_timer.c 2007-06-17 00:37:01.000000000 +0900 @@ -0,0 +1,255 @@ + /* + * linux/drivers/cabi/cabi_timer.c + * + * CABI -- CPU Accounting and Blocking Interfaces. + * + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * + * This software was developed by the Waseda University and Montavista Software, + * Inc. Funding for this project was provided by IPA (Information-technology + * Promotion Agency, Japan). This software may be used and distributed + * according to the terms of the GNU Public License, incorporated herein by + * reference. + * + * This project was developed under the direction of Dr. Tatsuo Nakajima. + * + * Authors: Midori Sugaya, Hirotaka Ishikawa + * + * Please send bug-reports/suggestions/comments to qos@dcl.info.waseda.ac.jp + * + * Futher details about this project can be obtained at + * http://dcl.info.waseda.ac.jp/osrg/ + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This file is derived from software distributed under the following terms: + */ + /* + * Real-time and Multimedia Systems Laboratory + * Copyright (c) 1999 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Real-Time and Multimedia Systems Laboratory + * Attn: Prof. Raj Rajkumar + * Electrical and Computer Engineering, and Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * or via email to raj@ece.cmu.edu + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +__inline__ void +cabi_tstojiffie(struct timespec *tp, unsigned long *jiff) +{ + *jiff = HZ * tp->tv_sec; + *jiff += (tp->tv_nsec + (NANOSEC / HZ)) / (NANOSEC / HZ); +} +extern cabi_account_t cabi_current_account; + +/* + * cabi_replenish_timer_process(cabi_timer_t tmr) + */ +enum hrtimer_restart +cabi_replenish_timer_process(struct hrtimer *tmr) +{ + int ret; + struct timespec res; + cabi_account_t cabi; + + ENTER; + cabi = (cabi_account_t ) tmr->data; + + /* replenish it, and set timer expiration `ticks' later */ + cabi_account_replenish(cabi); + + /* debug parameters */ + cabi_debug_info (cabi); + hrtimer_get_res (CLOCK_REALTIME, &res); + cabi_debug("[rep_prcess] hrtimer_get_res %llu %llu \n", + (unsigned long long) res.tv_sec, + (unsigned long long) res.tv_nsec); + + tmr->expires.tv64 = (cabi->cpu_period.tv_sec * NANOSEC) + + cabi->cpu_period.tv_nsec; + cabi_debug("[rep_process] expire %llu\n", + tmr->expires.tv64); + + /* if there are bit in signal_block, off it */ + if (cabi->pm.operation == OP_SIGNAL && cabi->signal_block) + cabi->signal_block = SIGNAL_OFF; + + //ret = hrtimer_forward (tmr, tmr->get_time(), tmr->expires); + ret = hrtimer_start (tmr, tmr->expires, HRTIMER_REL); + cabi_debug ("[rep_pros_tmr] hrtimer_start %d %d.\n", + tmr->state, ret); + + EXIT; + return tmr->state; +} + +/* + * cabi_replenish_timer_create(): + * Create a timer to replenish a account, add it to the queue. + * Set a linux timer if necessary. + */ +#include +#define INIT_TIMEOUT 10000000 +void +cabi_replenish_timer_init(cabi_account_t cabi, cpu_tick_t ticks) +{ + + //struct timer_list *tmr = &cabi->cpu_replenish_tmr; + struct hrtimer *tmr = &cabi->cpu_replenish_hrtmr; + struct timespec res; + int ret; + + ENTER; + hrtimer_init(tmr, CLOCK_REALTIME, HRTIMER_REL); + hrtimer_get_res (CLOCK_REALTIME, &res); + tmr->expires.tv64 = INIT_TIMEOUT; + tmr->function = cabi_replenish_timer_process; + tmr->data = (char *)cabi; + + cabi_debug_info (cabi); + + /* Enqueue the timer */ + ret = hrtimer_start(tmr, tmr->expires, HRTIMER_REL); + EXIT; +} + +void +cabi_replenish_timer_cancel(cabi_account_t cabi) +{ + struct hrtimer *tmr = &cabi->cpu_replenish_hrtmr; + hrtimer_cancel(tmr); +} + +/* + * Enforcement timer management + */ +extern void cabi_enforce_timer_process (struct hrtimer *tmr); +static struct hrtimer cabi_enforce_hrtmr; + +/* called from "cabi->cpu_ops->start_account()" */ +void +cabi_enforce_timer_start(cabi_account_t cabi, cpu_tick_t next_available_ticks) +{ + struct hrtimer *tmr = &cabi_enforce_hrtmr; + struct timespec ts; + unsigned long flags; + int ret; + + ENTER; + + /* debug */ + cabi_debug_tick(*next_available_ticks); + + /* convert tick to timespec */ + tick2ts(next_available_ticks, &ts); + + /* set the parameters to tmr */ + cabi_spin_lock(flags); + tmr->expires.tv64 = (ts.tv_sec*NANOSEC + ts.tv_nsec); + tmr->data = (char *)cabi; + + if (tmr->expires.tv64 <= 0) { + if (cabi->cpu_state != CABI_IS_DEPLETED) + cabi->cpu_state &= CABI_IS_DEPLETED; + } + ret = hrtimer_start (tmr, tmr->expires, HRTIMER_REL); + cabi_spin_unlock (flags); + + EXIT; +} + +/* + * Cancel enforce_timer and set timer to next jiffy + */ +void +cabi_enforce_timer_cancel(void) +{ + struct hrtimer *tmr = &cabi_enforce_hrtmr; + int ret; + + ENTER; + ret = hrtimer_try_to_cancel(&cabi_enforce_hrtmr); + EXIT; +} + +/* + * Process enforce timer expiration. + */ +void +cabi_enforce_timer_process(struct hrtimer *tmr) +{ + + cabi_account_t cabi = (cabi_account_t) tmr->data; + + ENTER; + /* make sure if cabi corresponds to the current cabi */ + if (cabi_current_account == cabi) { + + /* debug */ + cabi_debug_entities (cabi_current_account, cabi); + + /* enforce the account */ + cabi_account_enforce(cabi_current_account); + } + tmr->state = HRTIMER_NORESTART; + EXIT; +} + +void +cabi_timer_start(void) +{ + ENTER; + hrtimer_init(&cabi_enforce_hrtmr, CLOCK_REALTIME, HRTIMER_REL); + cabi_enforce_hrtmr.function = cabi_enforce_timer_process; + EXIT; +} + +int +cabi_account_read_time_proc(cabi_account_t cabi, char *buf) +{ + char *p = buf; + p += sprintf(p, "timespec \n"); + return (p - buf); +} + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/Makefile --- ./linux-2.6.18.1/drivers/cabi/examples/Makefile 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/Makefile 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,69 @@ +CABIDIR = ../.. +CC = $(CROSS_COMPILE)gcc -g +CFLAGS = -O2 -Wall +LIBS = -L../lib -lcabi +INCLUDES = -I../../../include -I/usr/include + +OBJS = cabi_create cabi_destroy cabi_bind cabi_unbind cabi_set cabi_get \ + cabi_overload_create cabi_overload_destroy \ + cabi_create_bind cabi_exec_bind cabi_fifo_crebid \ + cabi_defsrv_crebid cabi_ts_crebid cabi_fifo_crebid_soft \ + cabi_fifo_crebid_hard cabi_ctimer cabi_create_loop\ + +all: cabi_create cabi_destroy cabi_bind cabi_unbind cabi_set cabi_get cabi_overload_create cabi_overload_destroy cabi_create_bind cabi_exec_bind cabi_fifo_crebid cabi_defsrv_crebid cabi_ts_crebid cabi_fifo_crebid_soft cabi_fifo_crebid_hard cabi_ctimer cabi_create_loop \ + +cabi_create: cb_create.c + $(CC) -o cabi_create cb_create.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_destroy: cb_destroy.c + $(CC) -o cabi_destroy cb_destroy.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_bind: cb_bind.c + $(CC) -o cabi_bind cb_bind.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_unbind: cb_unbind.c + $(CC) -o cabi_unbind cb_unbind.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_set: cb_set.c + $(CC) -o cabi_set cb_set.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_get: cb_get.c + $(CC) -o cabi_get cb_get.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_overload_create: cb_overload_create.c + $(CC) -o cabi_overload_create cb_overload_create.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_overload_destroy: cb_overload_destroy.c + $(CC) -o cabi_overload_destroy cb_overload_destroy.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_create_bind: cb_create_bind.c + $(CC) -o cabi_create_bind cb_create_bind.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_exec_bind: cb_exec_bind.c + $(CC) -o cabi_exec_bind cb_exec_bind.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_fifo_crebid_soft: cb_fifo_crebid_soft.c + $(CC) -o cabi_fifo_crebid_soft cb_fifo_crebid_soft.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_fifo_crebid_hard: cb_fifo_crebid_hard.c + $(CC) -o cabi_fifo_crebid_hard cb_fifo_crebid_hard.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_fifo_crebid: cb_fifo_crebid.c + $(CC) -o cabi_fifo_crebid cb_fifo_crebid.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_defsrv_crebid: cb_defsrv_crebid.c + $(CC) -o cabi_defsrv_crebid cb_defsrv_crebid.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_ts_crebid: cb_ts_crebid.c + $(CC) -o cabi_ts_crebid cb_ts_crebid.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_ctimer: cb_timer.c + $(CC) -o cabi_timer cb_timer.c $(CFLAGS) $(INCLUDES) $(LIBS) + +cabi_create_loop: cb_create_loop.c + $(CC) -o cabi_create_loop cb_create_loop.c $(CFLAGS) $(INCLUDES) $(LIBS) + +clean: + rm -rf $(OBJS) *~ + + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_bind.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_bind.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_bind.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_bind.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void usage(void) { + printf ("Usage: cabi_bind [object_id] [pid]\n"); + printf ("--------------------------------------------------\n"); + printf (" [object_id] : cabi object id\n"); + printf (" [pid] : bind process id\n"); +} + +int main (int argc, char *argv[]) +{ + int ret; + pid_t pid; + + unsigned long cabi_id; + + if (argc != 3) { + usage(); + return 0; + } + + /* set accounting object id */ + cabi_id = atol (argv[1]); + pid = (pid_t) atoi (argv[2]); + + switch (cabi_id) { + case 0: + printf ("object id is 0.\n"); + return 1; + case 1: + printf ("[object_id]=1 is only for overload."); + return 1; + default: + printf ("cabi bind pid\n"); + break; + } + + // Attach this process to the resource set + if ((ret = cabi_account_bind_pid(cabi_id, pid)) != CABI_SUCCESS) { + printf ("cabi_account_bind_pid() faild.(%d)\n", ret); + } else { + printf ("cabi_account_bind_pid: object_id(%d) pid[%d]\n", + (int)cabi_id, pid); + } + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_create.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_create.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_create.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_create.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,119 @@ +/* + * This is sample program for CABI system. + * create accounting object. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE_SEC 1000000 + +void usage(void) { + printf ("cabi_create:\n"); + printf (" cabi terminate action [block]:\n"); + printf (" Usage: cabi_create [cpu_time(us)] [cpu_period(us)]\n"); + printf (" cabi terminate action [signal]:\n"); + printf (" Usage: cabi_create [cpu_time(us)] [cpu_period(us)] [pid] [sig] [flag]\n"); + printf ("--------------------------------------------------\n"); + printf (" [cpu_time(us)] : cpu performance time (usec)\n"); + printf (" [cpu_period(us)] : cpu cycle time (usec)\n"); + printf (" [pid] : signal receive pid\n"); + printf (" [sig] : send signal number\n"); + printf (" [flag] : default(current) or else\n"); +} + +int main (int argc, char *argv[]) +{ + int ret; + struct cabi_uaccount *ucabi; + long long cpu_time, cpu_period; + int operation, sig, flag; + pid_t pid; + + /* set accounting object id */ + switch (argc) { + case 3: + operation = OP_BLOCK; + cpu_time = atoll (argv[1]); + cpu_period = atoll (argv[2]); + pid = 0; + sig = 0; + flag = 0; + break; + case 6: + operation = OP_SIGNAL; + cpu_time = atoll (argv[1]); + cpu_period = atoll (argv[2]); + pid = atoi (argv[3]); + sig = atoi (argv[4]); + flag = atoi (argv[5]); + break; + default: + usage(); + return 1; + } + printf ("Create CABI object...\n"); + if (cpu_time <= 0) { + printf ("Invalid parameter. cpu_time = %lld nsec\n", cpu_time); + return 1; + } + if (cpu_period <= 0) { + printf ("Invalid parameter. cpu_period = %lld nsec\n", cpu_period); + return 1; + } + if (cpu_time > cpu_period) { + printf ("Invalid parameter. cpu_time > cpu_period\n"); + return 1; + } + + /* create a user cabi */ + if(!(ucabi = (cabi_account_t) malloc (sizeof(struct cabi_uaccount)))) { + printf ("cabi_create: Memory allocation error.\n"); + return 1; + } + + ucabi->pm.policy = PO_CYCLIC; + ucabi->pm.operation = operation; + ucabi->pm.bind_proc_type = BIND_NORMAL_PROC; + + ucabi->cpu_time.tv_sec = 0; + ucabi->cpu_time.tv_nsec = 0; + + ucabi->cpu_period.tv_sec = 0; + ucabi->cpu_period.tv_nsec = 0; + + ucabi->cpu_time.tv_sec = (cpu_time / ONE_SEC); + ucabi->cpu_time.tv_nsec = (cpu_time % ONE_SEC) * 1000; + + ucabi->cpu_period.tv_sec = (cpu_period / ONE_SEC); + ucabi->cpu_period.tv_nsec = (cpu_period % ONE_SEC) * 1000; + + ucabi->pm.cabi_signal.pid = pid; + ucabi->pm.cabi_signal.sig = sig; + ucabi->pm.cabi_signal.flag = flag; + + if ((ret = cabi_account_create (ucabi)) == CABI_SUCCESS) { + printf ("account set create. operation (%d) object_id [%d]\n", + (int)ucabi->pm.operation, + (int)ucabi->cabi_id); + } else { + printf ("cabi_account_create failed.(%d)\n", ret); + } + + if (ucabi->cabi_id == 0) { + printf ("cabi_account_create faild on cpu %x\n", (int)ucabi); + free(ucabi); + return 1; + } + + free(ucabi); + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_create_bind.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_create_bind.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_create_bind.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_create_bind.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,50 @@ +/* + * This is sample program for CABI system. + * create accounting object. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +int main (int argc, char *argv[]) +{ + int ret; + pid_t pid; + + struct cabi_uaccount *ucabi; + + pid = getpid(); + ucabi = (cabi_account_t) malloc (sizeof(struct cabi_uaccount)); + + memset (ucabi, 0x00, sizeof(struct cabi_uaccount)); + /* set scheduling and enforce mode */ + ucabi->pm.operation = OP_BLOCK; + ucabi->cpu_time.tv_sec = 0; + ucabi->cpu_time.tv_nsec = 1000 * 1000 * 20; + + ucabi->cpu_period.tv_sec = 0; + ucabi->cpu_period.tv_nsec = 1000 * 1000 * 100; + + if (!(ret = cabi_create_bind (ucabi, pid))) { + printf("account set create. object_id [%d]\n", + (int)ucabi->cabi_id); + } else { + printf("cabi_account_create failed.\n"); + } + + if (ucabi->cabi_id == 0) { + printf("cabi_account_create faild on cpu %x\n", (int)ucabi); + return 1; + } + + free(ucabi); + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_defsrv_crebid.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_defsrv_crebid.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_defsrv_crebid.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_defsrv_crebid.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +int main ( + int argc, + char *argv[]) +{ + int i, ret; + pid_t pid; + int execution, period; + + //struct sched_param *sched_param; + struct cabi_uaccount *ucabi; + + if (argc < 3) { + printf("Usage:./program [execution] [period]\n"); + return 0; + } + execution = atoi(argv[1]); + period = atoi(argv[2]); + + /* get selef identity */ + pid = getpid(); + ucabi = malloc (sizeof(struct cabi_uaccount)); + + memset(ucabi, 0x00, sizeof(struct cabi_uaccount)); + + //ucabi->pm.term_act = CABI_TERM_BLOCK; + ucabi->pm.policy = PO_DEFSRV; + //ucabi->pm.operation = OP_BOOST; + ucabi->cpu_time.tv_sec = 0; + ucabi->cpu_time.tv_nsec = 1000 * 1000 * execution; + + ucabi->cpu_period.tv_sec = 0; + ucabi->cpu_period.tv_nsec = 1000 * 1000 * period; + + printf("make [DSV] process.\n"); + + /* set accounting object id */ + if (!(ret = cabi_create_bind (ucabi, pid))) { + printf("account set create. object_id [%d]\n", + (int)ucabi->cabi_id); + } else { + printf("cabi_account_create failed.\n"); + } + + if (ucabi->cabi_id == 0) { + printf("cabi_account_create failed on cpu %x\n", (int) ucabi); + return 1; + } + // do some work + for (; ;) + { + for (i = 1; i < 100000000; i++) { + i =+ i; + //printf("count %d\n", i); + } + } + +} + + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_destroy.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_destroy.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_destroy.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_destroy.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void usage(void) { + printf ("Usage: cabi_destroy [object_id]\n"); + printf ("--------------------------------------------------\n"); + printf (" [object_id] : cabi object id\n"); +} + +int main (int argc, char *argv[]) +{ + int ret; + unsigned long cabi_id; + + if (argc != 2) { + usage(); + return 0; + } + + /* set the object id */ + cabi_id = atol (argv[1]); + + if ((ret = cabi_account_destroy(cabi_id)) != CABI_SUCCESS) { + printf ("destroy faild.(%d)\n", ret); + } else { + printf ("destroy succeed.\n"); + } + + return 1; + + +} + + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_exec_bind.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_exec_bind.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_exec_bind.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_exec_bind.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int errno; + +void usage(void) { + printf ("Usage: cabi_exec_bind [object_id] [program...]\n"); + printf ("--------------------------------------------------\n"); + printf (" [object_id] : cabi object id\n"); + printf (" [program...] : execute program\n"); +} + +int main (int argc, char *argv[]) +{ + unsigned long cabi_id; + pid_t pid; + int status; + int ret; + + if (argc < 3) { + usage(); + return 0; + } + + cabi_id = atoi (argv[1]); + + if ((pid = fork()) == -1) { + perror("fork()"); + return 1; + } else if (pid > 0) { + if ((ret = cabi_account_bind_pid(cabi_id, pid)) != CABI_SUCCESS) { + printf("exec_bind : cabi_account_bind_pid: faild. (%d)\n", ret); + return 1; + } else { + printf("exec_bind : cabi_account_bind_pid: Account ID(%d)[%d]\n", + (int)cabi_id, pid); + } + waitpid(-1, &status, WNOHANG); + } else { + if (execvp(argv[2], &argv[2]) == -1) { + perror("execv"); + return 1; + } + } + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_fifo_crebid.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_fifo_crebid.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_fifo_crebid.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_fifo_crebid.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RT_PRIORITY 99 + +int main ( + int argc, + char *argv[]) +{ + int i, j, ret; + pid_t pid; + int execution, period; + struct timeval tv1, tv2; + double sec1, sec2, sec; + + struct sched_param *sched_param; + struct cabi_uaccount *ucabi; + + if (argc < 3) { + printf("Usage:./program [execution] [period]\n"); + return 0; + } + execution = atoi(argv[1]); + period = atoi(argv[2]); + + /* get selef identity */ + pid = getpid(); + ucabi = malloc (sizeof(struct cabi_uaccount)); + + memset(ucabi, 0x00, sizeof(struct cabi_uaccount)); + + ucabi->pm.operation = OP_BLOCK; + ucabi->pm.policy = PO_CYCLIC; + ucabi->cpu_time.tv_sec = (execution/1000); + ucabi->cpu_time.tv_nsec = 1000 * 1000 * (execution % 1000); + + ucabi->cpu_period.tv_sec = (period/1000); + ucabi->cpu_period.tv_nsec = 1000 * 1000 * (period % 1000); + + printf("[cpu_time] sec %ld nsec %ld\n", + ucabi->cpu_time.tv_sec, + ucabi->cpu_time.tv_nsec); + printf("[cpu_period] sec %ld nsec %ld\n", + ucabi->cpu_period.tv_sec, + ucabi->cpu_period.tv_nsec); + + /* make this process real-time one */ + sched_param = malloc(sizeof(struct sched_param)); + sched_param->sched_priority = RT_PRIORITY; + sched_setscheduler(pid, SCHED_FIFO, sched_param); + printf("make RT[FIFO] process.\n"); + + cabi_account_eval (0); + /* set accounting object id */ + if (!(ret = cabi_create_bind (ucabi, pid))) { + printf("account set create. object_id [%d]\n", + (int)ucabi->cabi_id); + } else { + printf("cabi_account_create failed.\n"); + } + + if (ucabi->cabi_id == 0) { + printf("cabi_account_create failed on cpu %x\n", (int) ucabi); + return 1; + } + + gettimeofday (&tv1, NULL); + for (j = 0;j < 100000 ; j++) + { + for (i = 1; i < 100000; i++) { + i =+ i; + //printf("count %d\n", i); + } + } + gettimeofday (&tv2, NULL); + /* + * Get evaluation result + */ + cabi_account_eval(1); + sleep (2); + for (i = 0; i < 8; i++) { + cabi_account_eval(2); + sleep (2); + } + cabi_account_eval(3); + + sec1 = tv1.tv_sec + tv1.tv_usec*1000; + sec2 = tv2.tv_sec + tv2.tv_usec*1000; + sec = sec2 - sec1; + printf ("time = %10.3f\n", sec); + + + +} + + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_get.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_get.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_get.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_get.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,79 @@ +/* + * This is sample program for CABI system. + * create accounting object. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void usage(void) { + printf ("Usage: cabi_get [object_id]\n"); + printf ("--------------------------------------------------\n"); + printf (" [object_id] : cabi object id\n"); +} + +int main (int argc, char *argv[]) +{ + struct cabi_uaccount *ucabi; + unsigned long cabi_id; + unsigned long long cpu_time, cpu_period; + float cpu_ratio; + int ret; + + if (argc != 2) { + usage(); + return 0; + } + + /* set accounting object id */ + cabi_id = atoi (argv[1]); + + if (cabi_id == 0) { + printf("object id is 0\n"); + return 1; + } + + /* create a user cabi */ + ucabi = (struct cabi_uaccount *) malloc (sizeof(struct cabi_uaccount)); + + if ((ret = cabi_account_get (cabi_id, ucabi)) == CABI_SUCCESS) { + printf("object_id (%d)\n", + (int)ucabi->cabi_id); + } else { + if (ret == CABI_ENOEXIST) { + printf("object id(%ld) not found.\n", cabi_id); + } else { + printf("cabi_account_get failed.(%d)\n", ret); + } + return 1; + } + if (!ucabi) { + printf("cabi_account_get faild on cpu %p\n", ucabi); + return 1; + } + + cpu_time = ucabi->cpu_time.tv_sec * 1000 + ucabi->cpu_time.tv_nsec / 1000000; + cpu_period = ucabi->cpu_period.tv_sec * 1000 + ucabi->cpu_period.tv_nsec / 1000000; + cpu_ratio = (float)cpu_time / (float)cpu_period * 100.0; + + printf ("OPERATION (%x)\n", ucabi->pm.operation); + printf ("CPU_TIME (%lu sec %02lu nsec) /CPU_PERIOD (%lu sec %02lu nsec)\n", + ucabi->cpu_time.tv_sec, + ucabi->cpu_time.tv_nsec, + ucabi->cpu_period.tv_sec, + ucabi->cpu_period.tv_nsec); + printf ("CPU_RATIO (%5.2f %%)\n", cpu_ratio); + + free(ucabi); + + return 0; +} + + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_loop.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_loop.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_loop.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_loop.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define LOOP 10 +#define DELAY_SEC 1 +#define DELAY_USEC 1 + + +int main (int argc, char **argv) +{ + + struct sigaction act, oldact; + struct itimerval value, ovalue; + int i, ret, end; + + /* waiting */ + while (count < LOOP); + + /* release the interrupt */ + setitimer (ITIMER_REAL, &ovalue, &value); + + ret = cabi_account_eval(end); + + /* clear the action */ + sigaction (SIGALRM, &oldact, NULL); + + /* print the result */ + for (i = 0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE_SEC 1000000 + +int count = 0; + +void usage(void) { + printf ("Usage: cabi_overload_create [overload_time(us)] [ovlerload_period(us)]\n"); + printf ("--------------------------------------------------\n"); + printf (" [overload_time(us)] : overload cpu performance time (usec)\n"); + printf (" [overload_period(us)] : overload cycle time (usec)\n"); + printf (" [overload_time] / [overload_period] * 100 : overload limit persents.\n"); +} + +void sig(int sig) +{ + printf ("signal %d received. count(%d)\n", sig, count); + count++; +} + + +void loop(void) +{ + int i; + signal(SIGCONT, sig); + for (;; i++) { + /* printf("%02d: prog running...\n", i); */ + sleep (2); + } +} + +int main (int argc, char *argv[]) +{ + int ret; + pid_t admin; + long long cpu_time, cpu_period; + + struct cabi_uaccount *ucabi; + cabi_account_t cabi = NULL_ACCOUNT; + int status; + + if (argc != 3) { + usage(); + return 0; + } + cpu_time = atoll (argv[1]); + cpu_period = atoll (argv[2]); + + if (cpu_time <= 0) { + printf ("Invalid parameter. cpu_time = %lld nsec\n", cpu_time); + return 1; + } + if (cpu_period <= 0) { + printf ("Invalid parameter. cpu_period = %lld nsec\n", cpu_period); + return 1; + } + if (cpu_time > cpu_period) { + printf ("Invalid parameter. cpu_time > cpu_period\n"); + return 1; + } + + /* create a resource set */ + ucabi = malloc (sizeof(struct cabi_uaccount)); + + /* make admin process */ + if ((admin = fork()) == -1) { + perror("fork()"); + return 1; + } + else if (admin > IDLE_PROCESS) { + /* this is parent */ + printf ("AO PID = %d\n", getpid()); + printf ("Admin PID = %d\n", admin); + waitpid(-1, &status, WNOHANG); + } else { + printf ("Admin start..pid %d\n", getpid()); + loop(); + } + + printf ("Overload Create!\n"); + + /* set scheduling and enforce mode */ + ucabi->pm.operation = OP_SIGNAL; + ucabi->pm.cabi_signal.pid = admin; + ucabi->pm.cabi_signal.sig = SIGCONT; + + ucabi->cpu_time.tv_sec = 0; + ucabi->cpu_time.tv_nsec = 0; + + ucabi->cpu_period.tv_sec = 0; + ucabi->cpu_period.tv_nsec = 0; + + ucabi->cpu_time.tv_sec = (cpu_time / ONE_SEC); + ucabi->cpu_time.tv_nsec = (cpu_time % ONE_SEC) * 1000; + + ucabi->cpu_period.tv_sec = (cpu_period / ONE_SEC); + ucabi->cpu_period.tv_nsec = (cpu_period % ONE_SEC) * 1000; + + if ((ret = cabi_overload_create (ucabi)) == CABI_SUCCESS) { + printf ("account set create[SIGNAL]. object_id (%d)" + "cabi object address (0x%x) , user cabi address(0x%x)\n", + (int)ucabi->cabi_id, (int)cabi, (int)ucabi); + } else { + kill(admin, SIGKILL); + printf ("Kill admin process (pid = %d)\n", admin); + printf ("cabi_account_create failed.(%d)\n", ret); + return 1; + } + + if (ucabi->cabi_id != OVERLOAD_CABI_ID) { + printf ("cabi_account_create faild on cpu %p\n", cabi); + return 1; + } + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_overload_destroy.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_overload_destroy.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_overload_destroy.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_overload_destroy.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,18 @@ +#include +#include +#include +#include + +extern int cabi_overload_destroy(void); + +int main (void) +{ + int ret; + + if ((ret = cabi_overload_destroy()) == CABI_SUCCESS) { + printf("overload destroy success.\n"); + } else { + printf("overload destroy faild. (%d)\n", ret); + } + return ret; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_set.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_set.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_set.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_set.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,145 @@ +/* + * This is sample program for CABI system. + * create accounting object. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE_SEC 1000000 + +void usage(void) { + printf ("cabi_set:\n"); + printf (" only cpu ratio change:\n"); + printf (" Usage: cabi_set [object_id] [cpu_time(us)] [cpu_period(us)]\n"); + printf (" cabi terminate action [signel] -> [block]:\n"); + printf (" Usage: cabi_set [object_id] [cpu_time(us)] [cpu_period(us)] [term_act]\n"); + printf (" cabi terminate action [block] -> [signal]:\n"); + printf (" Usage: cabi_set [object_id] [cpu_time(us)] [cpu_period(us)] [term_act] [pid] [sig] [flag]\n"); + printf ("--------------------------------------------------\n"); + printf (" [object_id] : cabi object id\n"); + printf (" [cpu_time(us)] : cpu performance time (usec)\n"); + printf (" [cpu_period(us)] : cpu cycle time (usec)\n"); + printf (" [term_act] : terminate action (block=1, signal=2)\n"); + printf (" [pid] : signal receive process id\n"); + printf (" [sig] : send signal number\n"); + printf (" [flag] : default(current) or else\n"); +} + +int main (int argc, char *argv[]) +{ + int ret; + struct cabi_uaccount *ucabi; + unsigned long cabi_id; + long long cpu_time, cpu_period; + int operation, sig, flag; + pid_t pid; + + sig = flag = 0; + pid = 0; + /* set accounting object id */ + switch (argc) { + case 4: + operation = OP_BLOCK; + break; + case 5: + operation = atoi(argv[4]); + if(operation != OP_BLOCK) { + usage(); + return 1; + } + break; + case 8: + operation = atoi(argv[4]); + break; + default: + usage(); + return 1; + } + cabi_id = atoi (argv[1]); + cpu_time = atoll (argv[2]); + cpu_period = atoll (argv[3]); + + switch (operation) { + case OP_BLOCK: + break; + case OP_SIGNAL: + pid = atoi (argv[5]); + sig = atoi (argv[6]); + flag = atoi (argv[7]); + break; + default: + printf ("Invalid parameter.operation = %d\n", operation); + return 1; + } + printf ("Setting...\n"); + if (cabi_id == 0) { + return 1; + } + if (cpu_time <= 0) { + printf ("Invalid parameter. cpu_time = %lld nsec\n", cpu_time); + return 1; + } + if (cpu_period <= 0) { + printf ("Invalid parameter. cpu_period = %lld nsec\n", cpu_period); + return 1; + } + if (cpu_time > cpu_period) { + printf ("Invalid parameter. cpu_time > cpu_period\n"); + return 1; + } + + /* create a user cabi */ + ucabi = (cabi_account_t) malloc (sizeof(struct cabi_uaccount)); + + if ((ret = cabi_account_get (cabi_id, ucabi)) != CABI_SUCCESS) { + printf ("cabi_account_set : cabi_id(%ld) no exist. (%d)\n", cabi_id, ret); + return 1; + } + + /* set time to the timespec */ + ucabi->cpu_time.tv_sec = 0; + ucabi->cpu_time.tv_nsec = 0; + ucabi->cpu_period.tv_sec = 0; + ucabi->cpu_period.tv_nsec = 0; + + ucabi->cpu_time.tv_sec = (cpu_time / ONE_SEC); + ucabi->cpu_time.tv_nsec = (cpu_time % ONE_SEC) * 1000; + + ucabi->cpu_period.tv_sec = (cpu_period / ONE_SEC); + ucabi->cpu_period.tv_nsec = (cpu_period % ONE_SEC) * 1000; + + if (argc != 4) { + ucabi->pm.operation = operation; + } + if (operation == OP_SIGNAL) { + ucabi->pm.cabi_signal.pid = pid; + ucabi->pm.cabi_signal.sig = sig; + ucabi->pm.cabi_signal.flag = flag; + } + + if ((ret = cabi_account_set (cabi_id, ucabi)) == CABI_SUCCESS) { + printf ("account set. object_id (%d)", (int)cabi_id); + } else { + printf ("cabi_id = %ld : cabi_account_set failed. (%d)\n", cabi_id, ret); + return 1; + } + + printf ("TERM_ACT (%d)\n", ucabi->pm.operation); + printf ("CPU_TIME (%lu sec %02lu nsec) /CPU_PERIOD (%lu sec %02lu nsec)\n", + ucabi->cpu_time.tv_sec, + ucabi->cpu_time.tv_nsec, + ucabi->cpu_period.tv_sec, + ucabi->cpu_period.tv_nsec); + + free(ucabi); + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_timer.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_timer.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_timer.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_timer.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define LOOP 10 +#define DELAY_SEC 1 +#define DELAY_USEC 1 + + +volatile int count = 0; +struct timeval tv[LOOP]; +struct timezone tz; + + +void sig_action_handler () +{ + + //system("./ctcreate"); + gettimeofday (tv+count, &tz); + count++; +} + +int main () +{ + + struct sigaction act, oldact; + struct itimerval value, ovalue; + int i, ret, end; + end = 1; + + /* set interrupt handler */ + act.sa_handler = sig_action_handler; + act.sa_flags = 0; + sigaction (SIGALRM, &act, &oldact); + + /* set interrupt */ + /* the first interrupt ->it_value */ + /* after the second -> it_interval */ + value.it_value.tv_sec = DELAY_SEC; + value.it_value.tv_usec = DELAY_USEC; + value.it_interval.tv_sec = DELAY_SEC; + value.it_interval.tv_usec = DELAY_USEC; + setitimer (ITIMER_REAL, &value, &ovalue); + + /* waiting */ + while (count < LOOP); + + /* release the interrupt */ + setitimer (ITIMER_REAL, &ovalue, &value); + + ret = cabi_account_eval(end); + + /* clear the action */ + sigaction (SIGALRM, &oldact, NULL); + + /* print the result */ + for (i = 0; i +#include +#include +#include +#include +#include +#include +#include +#include + +#define RT_PRIORITY 50 + +int main ( + int argc, + char *argv[]) +{ + int i, ret; + pid_t pid; + int execution, period; + + struct cabi_uaccount *ucabi; + + if (argc < 3) { + printf("Usage:./program [execution] [period]\n"); + return 0; + } + execution = atoi(argv[1]); + period = atoi(argv[2]); + + /* get selef identity */ + pid = getpid(); + ucabi = malloc (sizeof(struct cabi_uaccount)); + + memset(ucabi, 0x00, sizeof(struct cabi_uaccount)); + + ucabi->pm.operation = OP_BLOCK; + ucabi->pm.policy = PO_CYCLIC; + ucabi->cpu_time.tv_sec = 0; + ucabi->cpu_time.tv_nsec = 1000 * 1000 * execution; + + ucabi->cpu_period.tv_sec = 0; + ucabi->cpu_period.tv_nsec = 1000 * 1000 * period; + + /* make this process real-time one */ + printf("make TS[NORMAL] process.\n"); + + /* set accounting object id */ + if (!(ret = cabi_create_bind (ucabi, pid))) { + printf("account set create. object_id [%d]\n", + (int)ucabi->cabi_id); + } else { + printf("cabi_account_create failed.\n"); + } + + if (ucabi->cabi_id == 0) { + printf("cabi_account_create failed on cpu %x\n", (int) ucabi); + return 1; + } + // do some work + for (; ;) + { + for (i = 1; i < 100000000; i++) { + i =+ i; + //printf("count %d\n", i); + } + } + +} + + diff -urN ./linux-2.6.18.1/drivers/cabi/examples/cb_unbind.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_unbind.c --- ./linux-2.6.18.1/drivers/cabi/examples/cb_unbind.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/examples/cb_unbind.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void usage(void) { + printf ("Usage: cabi_unbind [pid]\n"); + printf ("--------------------------------------------------\n"); + printf (" [pid] : unbind process id\n"); +} + +int main (int argc, char *argv[]) +{ + int ret; + pid_t pid; + + if (argc != 2) { + usage(); + return 0; + } + + pid = (pid_t) atoi (argv[1]); + printf ("cabi unbind pid: %d\n", pid); + + // Dettach this process from accounting obiect + if ((ret = cabi_account_unbind(pid)) != CABI_SUCCESS) { + printf ("unbind faild.(%d)\n", ret); + } else { + printf ("unbaind succeed.\n"); + } + + return 0; +} diff -urN ./linux-2.6.18.1/drivers/cabi/lib/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/Makefile --- ./linux-2.6.18.1/drivers/cabi/lib/Makefile 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/Makefile 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,29 @@ +INSTALL = /usr/bin/install -c -m 644 +DESTDIR = +KERNELDIR = ../../.. +#KERNELDIR = /usr/src/debug +INCLUDEDIR = /usr +CC = $(CROSS_COMPILE)gcc -g +AR = $(CROSS_COMPILE)ar +RANLIB = $(CROSS_COMPILE)ranlib +SOURCES = $(wildcard *.c) +OBJS = $(SOURCES:.c=.o) +CFLAGS = -DCONFIG_CABI -O3 -Wall -I$(KERNELDIR)/include -I$(INCLUDEDIR)/include + +all: libcabi.so libcabi.a + +libcabi.a: $(OBJS) + $(AR) -r libcabi.a $(OBJS) + $(RANLIB) libcabi.a + +libcabi.so: $(OBJS) + $(CC) -fPIC $(OBJS) -shared -o libcabi.so + +clean: + rm -rf $(OBJS) libcabi.a + +install: libcabi.a + $(INSTALL) libcabi.a $(DESTDIR)/lib/libcabi.a + $(INSTALL) libcabi.so $(DESTDIR)/lib/libcabi.so + $(INSTALL) $(KERNELDIR)/include/cabi/cabi.h $(DESTDIR)/usr/include/cabi/cabi.h + $(INSTALL) $(KERNELDIR)/include/cabi/cabi_error.h $(DESTDIR)/usr/include/cabi/cabi_error.h diff -urN ./linux-2.6.18.1/drivers/cabi/lib/README.cabi linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/README.cabi --- ./linux-2.6.18.1/drivers/cabi/lib/README.cabi 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/README.cabi 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,11 @@ +README file for the cabi library. +=================================== +This file describes the brief instruction abount the cabi library +install. + +Before make, please add the link for kernel for header +directories as follows. + +#ln -sf /usr/src/linux/include/linux /usr/include/linux +#ln -sf /usr/src/linux/asm-i386 /usr/include/asm + diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_bind_pgid.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_bind_pgid.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_bind_pgid.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_bind_pgid.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,17 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include +/* + * int sys_cabi_account_bind_pgid(unsigned long cabi_id, pid_t pgid) + */ + +_syscall2(int, cabi_account_bind_pgid, unsigned long, cabi_id, pid_t, pgid) diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_bind_pid.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_bind_pid.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_bind_pid.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_bind_pid.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,17 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include +/* + * int sys_cabi_account_bind_pid(unsigned long cabi_id, pid_t pid) + * + */ +_syscall2(int, cabi_account_bind_pid, unsigned long, cabi_id, pid_t, pid) diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_create.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_create.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_create.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_create.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,19 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include + +/* + * int sys_cabi_account_create(cabi_object_t objectid, cabi_account_t cpu) + */ +_syscall1(int, cabi_account_create, struct cabi_uaccount *, ucabi) + + diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_destroy.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_destroy.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_destroy.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_destroy.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,17 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include + +/* + * int sys_cabi_account_destroy (unsigned long cabi_id) + */ +_syscall1(int, cabi_account_destroy, unsigned long, cabi_id) diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_eval.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_eval.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_eval.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_eval.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,19 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include + +/* + * int sys_cabi_account_create(cabi_object_t objectid, cabi_account_t cpu) + */ +_syscall1(int, cabi_account_eval, int, eventtype) + + diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_get.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_get.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_get.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_get.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,17 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include + +/* + * cabi_uaccount_t sys_cabi_account_get (unsigned long cabi_id, cabi_uaccount_t, ucabi) + */ +_syscall2(int, cabi_account_get, unsigned long, cabi_id, cabi_uaccount_t, ucabi) diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_set.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_set.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_set.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_set.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,17 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include + +/* + * int sys_cabi_account_set (unsigned long cabi_id, struct cabi_uaccount_t *cpu) + */ +_syscall2(int, cabi_account_set, unsigned long, cabi_id, struct cabi_uaccount *, ucabi) diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_unbind.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_unbind.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_account_unbind.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_account_unbind.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,17 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include + +/* + * int sys_cabi_account_unbind (pid_t pid) + */ +_syscall1(int, cabi_account_unbind, pid_t, pid) diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_create_bind.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_create_bind.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_create_bind.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_create_bind.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,72 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include +#include +/* + * int cabi_overload_create(cabi_uaccount_t ucabi) + */ + +extern int cabi_account_create (struct cabi_uaccount *); +extern int cabi_account_bind_pid(unsigned long cabi_id, pid_t pid); +extern int cabi_account_destroy(unsigned long cabi_id); + +int +cabi_create_bind (struct cabi_uaccount *ucabi, pid_t pid) +{ + + int retval = -EPERM; + + /* If ucabi has not valid address. */ + if (!ucabi) { + return CABI_EINVAL; + } + + /* call cabi_account_create() */ + if (!(retval = cabi_account_create (ucabi))) { +#ifdef DEBUG_CABILIB + fprintf(stderr, "create_bind: account set create. object_id (%d)" + "user cabi address (0x%x)\n", + (int) ucabi->cabi_id, (int)ucabi); +#endif + } else { +#ifdef DEBUG_CABILIB + fprintf(stderr, "create_bind failed. (%d)\n", retval); +#endif + goto error; + } + + if (pid < 0) { +#ifdef DEBUG_CABILIB + fprintf(stderr, "create_bind: invalid pid(%d)\n", + (int)pid); +#endif + retval = CABI_EINVAL; /* Invalid argument */ + goto error; + } + + /* call cabi_account_bind_pid */ + if (!(retval = cabi_account_bind_pid(ucabi->cabi_id, pid))) { +#ifdef DEBUG_CABILIB + fprintf(stderr, "cabi_account_bind_pid: Account ID(%d)[%d]\n", + (int) ucabi->cabi_id, pid); +#endif + return CABI_SUCCESS; + } else { +#ifdef DEBUG_CABILIB + fprintf(stderr, "cabi_account_bind_pid() failed.\n"); +#endif + cabi_account_destroy(ucabi->cabi_id); + } + +error: + return retval; +} diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_overload_create.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_overload_create.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_overload_create.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_overload_create.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,84 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include +#include + +#define DEBUG_CABILIB 1 + +/* + * int cabi_overload_create(cabi_uaccount_t ucabi) + */ + +extern int cabi_account_create (struct cabi_uaccount *); +extern int cabi_account_bind_pid (unsigned long cabi_id, pid_t pid); + +int +cabi_overload_create (struct cabi_uaccount *ucabi) +{ + + pid_t pid; + int ret; + + /* If ucabi has not valid address. */ + if (!ucabi) { + return CABI_EINVAL; + } + + ucabi->pm.bind_proc_type = BIND_IDLE_PROC; + +#ifdef DEBUG_CABILIB + printf ("pm.bind_proc_type (%x)\n", + ucabi->pm.bind_proc_type); +#endif + + if ((ret = cabi_account_create (ucabi)) == CABI_SUCCESS) { +#ifdef DEBUG_CABILIB + fprintf(stderr, "overload account set create. object_id (%d)" + "user cabi address (0x%x)\n", + (int) ucabi->cabi_id, (int)ucabi); +#endif + } else { +#ifdef DEBUG_CABILIB + fprintf(stderr, "overload cabi_account_create failed. (%d)\n", + ret); +#endif + return ret; + } + + /* if the system call has been success, then bind the idle process. */ + if (ucabi->cabi_id != OVERLOAD_CABI_ID) { +#ifdef DEBUG_CABILIB + fprintf (stderr, "cabi object_id(%d) did not set. error return.\n", (int) ucabi->cabi_id); +#endif + return CABI_EINVAL; + } + + /* set the idle process */ + pid = IDLE_PROCESS; + + if ((ret = cabi_account_bind_pid(ucabi->cabi_id, pid)) + == CABI_SUCCESS) { +#ifdef DEBUG_CABILIB + fprintf(stderr, "cabi_account_bind_pid: Account ID(%d)[%d]\n", + (int) ucabi->cabi_id, pid); +#endif + return CABI_SUCCESS; + } else { +#ifdef DEBUG_CABILIB + fprintf(stderr, "cabi_account_bind_pid() [idle] failed.\n"); +#endif + return ret; + } + free (ucabi); + return CABI_SUCCESS; +} + diff -urN ./linux-2.6.18.1/drivers/cabi/lib/cabi_overload_destroy.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_overload_destroy.c --- ./linux-2.6.18.1/drivers/cabi/lib/cabi_overload_destroy.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/lib/cabi_overload_destroy.c 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,42 @@ +/* + * Copyright (C) OS Research Group, Waseda University Nakajima Laboratory. + * MontaVista Software, Inc. + * All Rights Reserved. + * any improvements or extensions that they make and grant Waseda University + * the rights to redistribute these changes. + * + */ + +#define __LIBRARY__ +#include +#include +#include + +/* + * int cabi_overload_destroy(void) + */ + +extern int cabi_account_unbind(pid_t pid); +extern int cabi_account_destroy(unsigned long cabi_id); + +int +cabi_overload_destroy(void) +{ + pid_t pid = IDLE_PROCESS; + int ret; + + if ((ret = cabi_account_unbind(pid)) != CABI_SUCCESS) { +#ifdef DEBUG_CABILIB + fprintf (stderr, "cabi_overload_destroy : unbind error. (%d)\n", ret); +#endif + return ret; + } + if ((ret = cabi_account_destroy(OVERLOAD_CABI_ID)) != CABI_SUCCESS) { +#ifdef DEBUG_CABILIB + fprintf (stderr, "cabi_overload_destroy : destroy error. (%d)\n", ret); +#endif + return ret; + } + + return ret; +} diff -urN ./linux-2.6.18.1/drivers/cabi/udivdi3.S linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/udivdi3.S --- ./linux-2.6.18.1/drivers/cabi/udivdi3.S 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/cabi/udivdi3.S 2007-05-20 14:14:28.000000000 +0900 @@ -0,0 +1,396 @@ + .file "libgcc2.c" + .version "01.01" +gcc2_compiled.: +.section .rodata + .type __clz_tab,@object +__clz_tab: +.byte 0 +.byte 1 +.byte 2 +.byte 2 +.byte 3 +.byte 3 +.byte 3 +.byte 3 +.byte 4 +.byte 4 +.byte 4 +.byte 4 +.byte 4 +.byte 4 +.byte 4 +.byte 4 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 5 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 6 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 7 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 +.byte 8 + .size __clz_tab,256 +.text + .align 4 +.globl __udivdi3 + .type __udivdi3,@function +__udivdi3: + subl $28,%esp + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + movl 56(%esp),%ebx + movl 60(%esp),%edi + movl 48(%esp),%eax + movl %eax,40(%esp) + movl 52(%esp),%edx + movl %edx,16(%esp) + testl %edi,%edi + jne .L25 + cmpl %edx,%ebx + jbe .L26 +#APP + divl %ebx +#NO_APP + movl %eax,%esi + jmp .L30 + .align 4 +.L26: + cmpl $0,56(%esp) + jne .L28 + movl $1,%eax + xorl %edx,%edx + divl %ebx + movl %eax,%ebx +.L28: + movl 16(%esp),%eax + xorl %edx,%edx +#APP + divl %ebx +#NO_APP + movl %eax,%edi + movl %edx,16(%esp) + movl 40(%esp),%eax +#APP + divl %ebx +#NO_APP + movl %eax,%esi + jmp .L30 + .align 4 +.L25: + cmpl %edi,16(%esp) + jae .L31 + xorl %edi,%edi + movl %edi,%esi + jmp .L30 + .align 4 +.L31: +#APP + bsrl %edi,%eax +#NO_APP + movl %eax,%esi + xorl $31,%esi + jne .L38 + cmpl %edi,16(%esp) + ja .L39 + cmpl %ebx,40(%esp) + jb .L45 +.L39: + movl $1,%esi + jmp .L45 + .align 4 +.L38: + movl $32,%ebp + subl %esi,%ebp + movl %esi,%ecx + sall %cl,%edi + movl %edi,20(%esp) + movl %ebx,%eax + movl %ebp,%ecx + shrl %cl,%eax + orl %eax,%edi + movl %esi,%ecx + sall %cl,%ebx + movl 16(%esp),%eax + movl %ebp,%ecx + shrl %cl,%eax + movl %eax,36(%esp) + movl 16(%esp),%eax + movl %esi,%ecx + sall %cl,%eax + movl %eax,20(%esp) + movl 40(%esp),%eax + movl %ebp,%ecx + shrl %cl,%eax + movl 20(%esp),%edx + orl %eax,%edx + movl %edx,16(%esp) + movl %esi,%ecx + sall %cl,40(%esp) + movl %edx,%eax + movl 36(%esp),%edx +#APP + divl %edi +#NO_APP + movl %eax,%esi + movl %edx,16(%esp) +#APP + mull %ebx +#NO_APP + movl %eax,%ebx + movl %edx,%edi + cmpl %edi,16(%esp) + jb .L44 + jne .L45 + cmpl %ebx,40(%esp) + jae .L45 +.L44: + decl %esi +.L45: + xorl %edi,%edi +.L30: + movl %esi,28(%esp) + movl %edi,32(%esp) + movl 28(%esp),%eax + movl 32(%esp),%edx + popl %ebx + popl %esi + popl %edi + popl %ebp + addl $28,%esp + ret +.Lfe1: + .size __udivdi3,.Lfe1-__udivdi3 + .ident "GCC: (GNU) 2.8.1" diff -urN ./linux-2.6.18.1/drivers/char/Kconfig linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/Kconfig --- ./linux-2.6.18.1/drivers/char/Kconfig 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/Kconfig 2007-05-19 23:58:35.000000000 +0900 @@ -741,6 +741,46 @@ To compile this driver as a module, choose M here: the module will be called rtc. +config RTC_HISTOGRAM + bool "Real Time Clock Histogram Support" + default n + depends on RTC + ---help--- + If you say Y here then the kernel will track the delivery and + wakeup latency of /dev/rtc using tasks and will report a + histogram to the kernel log when the application closes /dev/rtc. + +config BLOCKER + tristate "Priority Inheritance Debugging (Blocker) Device Support" + depends on X86 + default y + ---help--- + If you say Y here then a device will be created that the userspace + pi_test suite uses to test and measure kernel locking primitives. + +config LPPTEST + tristate "Parallel Port Based Latency Measurement Device" + depends on !PARPORT && X86 + default y + ---help--- + If you say Y here then a device will be created that the userspace + testlpp utility uses to measure IRQ latencies of a target system + from an independent measurement system. + + NOTE: this code assumes x86 PCs and that the parallel port is + bidirectional and is on IRQ 7. + + to use the device, both the target and the source system needs to + run a kernel with CONFIG_LPPTEST enabled. To measure latencies, + use the scripts/testlpp utility in your kernel source directory, + and run it (as root) on the source system - it will start printing + out the latencies it took to get a response from the target system: + + Latency of response: 12.2 usecs (121265 cycles) + + then generate various workloads on the target system to see how + (worst-case-) latencies are impacted. + config SGI_DS1286 tristate "SGI DS1286 RTC support" depends on SGI_IP22 diff -urN ./linux-2.6.18.1/drivers/char/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/Makefile --- ./linux-2.6.18.1/drivers/char/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -89,6 +89,9 @@ obj-$(CONFIG_TANBAC_TB0219) += tb0219.o obj-$(CONFIG_TELCLOCK) += tlclk.o +obj-$(CONFIG_BLOCKER) += blocker.o +obj-$(CONFIG_LPPTEST) += lpptest.o + obj-$(CONFIG_WATCHDOG) += watchdog/ obj-$(CONFIG_MWAVE) += mwave/ obj-$(CONFIG_AGP) += agp/ diff -urN ./linux-2.6.18.1/drivers/char/blocker.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/blocker.c --- ./linux-2.6.18.1/drivers/char/blocker.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/blocker.c 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,107 @@ +/* + * priority inheritance testing device + */ + +#include +#include + +#define BLOCKER_MINOR 221 + +#define BLOCK_IOCTL 4245 +#define BLOCK_SET_DEPTH 4246 + +#define BLOCKER_MAX_LOCK_DEPTH 10 + +void loop(int loops) +{ + int i; + + for (i = 0; i < loops; i++) + get_cycles(); +} + +static spinlock_t blocker_lock[BLOCKER_MAX_LOCK_DEPTH]; + +static unsigned int lock_depth = 1; + +void do_the_lock_and_loop(unsigned int args) +{ + int i, max; + + if (rt_task(current)) + max = lock_depth; + else if (lock_depth > 1) + max = (current->pid % lock_depth) + 1; + else + max = 1; + + /* Always lock from the top down */ + for (i = max-1; i >= 0; i--) + spin_lock(&blocker_lock[i]); + loop(args); + for (i = 0; i < max; i++) + spin_unlock(&blocker_lock[i]); +} + +static int blocker_open(struct inode *in, struct file *file) +{ + printk(KERN_INFO "blocker_open called\n"); + + return 0; +} + +static long blocker_ioctl(struct file *file, + unsigned int cmd, unsigned long args) +{ + switch(cmd) { + case BLOCK_IOCTL: + do_the_lock_and_loop(args); + return 0; + case BLOCK_SET_DEPTH: + if (args >= BLOCKER_MAX_LOCK_DEPTH) + return -EINVAL; + lock_depth = args; + return 0; + default: + return -EINVAL; + } +} + +static struct file_operations blocker_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = blocker_ioctl, + .open = blocker_open, +}; + +static struct miscdevice blocker_dev = +{ + BLOCKER_MINOR, + "blocker", + &blocker_fops +}; + +static int __init blocker_init(void) +{ + int i; + + if (misc_register(&blocker_dev)) + return -ENODEV; + + for (i = 0; i < BLOCKER_MAX_LOCK_DEPTH; i++) + spin_lock_init(blocker_lock + i); + + return 0; +} + +void __exit blocker_exit(void) +{ + printk(KERN_INFO "blocker device uninstalled\n"); + misc_deregister(&blocker_dev); +} + +module_init(blocker_init); +module_exit(blocker_exit); + +MODULE_LICENSE("GPL"); + diff -urN ./linux-2.6.18.1/drivers/char/hangcheck-timer.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/hangcheck-timer.c --- ./linux-2.6.18.1/drivers/char/hangcheck-timer.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/hangcheck-timer.c 2007-05-19 23:58:35.000000000 +0900 @@ -117,7 +117,7 @@ __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks); #endif /* not MODULE */ -#if defined(CONFIG_X86_64) || defined(CONFIG_S390) +#ifdef CONFIG_S390 # define HAVE_MONOTONIC # define TIMER_FREQ 1000000000ULL #elif defined(CONFIG_IA64) diff -urN ./linux-2.6.18.1/drivers/char/hpet.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/hpet.c --- ./linux-2.6.18.1/drivers/char/hpet.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/hpet.c 2007-05-19 23:58:35.000000000 +0900 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -50,8 +51,34 @@ #define HPET_RANGE_SIZE 1024 /* from HPET spec */ +#if BITS_PER_LONG == 64 +#define write_counter(V, MC) writeq(V, MC) +#define read_counter(MC) readq(MC) +#else +#define write_counter(V, MC) writel(V, MC) +#define read_counter(MC) readl(MC) +#endif + static u32 hpet_nhpet, hpet_max_freq = HPET_USER_FREQ; +static void __iomem *hpet_mc_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)read_counter((void __iomem *)hpet_mc_ptr); +} + +static struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 300, + .read = read_hpet, + .mask = 0xffffffffffffffffLL, + .mult = 0, /*to be caluclated*/ + .shift = 10, + .is_continuous = 1, +}; +static struct clocksource *hpet_clocksource_p; + /* A lock for concurrent access by app and isr hpet activity. */ static DEFINE_SPINLOCK(hpet_lock); /* A lock for concurrent intermodule access to hpet and isr hpet activity. */ @@ -78,7 +105,7 @@ struct hpets *hp_next; struct hpet __iomem *hp_hpet; unsigned long hp_hpet_phys; - struct time_interpolator *hp_interpolator; + struct clocksource *hp_clocksource; unsigned long long hp_tick_freq; unsigned long hp_delta; unsigned int hp_ntimer; @@ -93,13 +120,6 @@ #define HPET_PERIODIC 0x0004 #define HPET_SHARED_IRQ 0x0008 -#if BITS_PER_LONG == 64 -#define write_counter(V, MC) writeq(V, MC) -#define read_counter(MC) readq(MC) -#else -#define write_counter(V, MC) writel(V, MC) -#define read_counter(MC) readl(MC) -#endif #ifndef readq static inline unsigned long long readq(void __iomem *addr) @@ -736,27 +756,6 @@ static struct ctl_table_header *sysctl_header; -static void hpet_register_interpolator(struct hpets *hpetp) -{ -#ifdef CONFIG_TIME_INTERPOLATION - struct time_interpolator *ti; - - ti = kzalloc(sizeof(*ti), GFP_KERNEL); - if (!ti) - return; - - ti->source = TIME_SOURCE_MMIO64; - ti->shift = 10; - ti->addr = &hpetp->hp_hpet->hpet_mc; - ti->frequency = hpetp->hp_tick_freq; - ti->drift = HPET_DRIFT; - ti->mask = -1; - - hpetp->hp_interpolator = ti; - register_time_interpolator(ti); -#endif -} - /* * Adjustment for when arming the timer with * initial conditions. That is, main counter @@ -908,7 +907,16 @@ } hpetp->hp_delta = hpet_calibrate(hpetp); - hpet_register_interpolator(hpetp); + + if (!hpet_clocksource_p) { +#ifdef CONFIG_IA64 + clocksource_hpet.fsys_mmio_ptr = hpet_mc_ptr = &hpetp->hp_hpet->hpet_mc; +#endif + clocksource_hpet.mult = clocksource_hz2mult(hpetp->hp_tick_freq, + clocksource_hpet.shift); + clocksource_register(&clocksource_hpet); + hpet_clocksource_p = hpetp->hp_clocksource = &clocksource_hpet; + } return 0; } @@ -994,7 +1002,7 @@ static int hpet_acpi_remove(struct acpi_device *device, int type) { - /* XXX need to unregister interpolator, dealloc mem, etc */ + /* XXX need to unregister clocksource, dealloc mem, etc */ return -EINVAL; } diff -urN ./linux-2.6.18.1/drivers/char/lpptest.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/lpptest.c --- ./linux-2.6.18.1/drivers/char/lpptest.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/lpptest.c 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,179 @@ +/* + * /dev/lpptest device: test IRQ handling latencies over parallel port + * + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar + * + * licensed under the GPL + * + * You need to have CONFIG_PARPORT disabled for this device, it is a + * completely self-contained device that assumes sole ownership of the + * parallel port. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * API wrappers so that the code can be shared with the -rt tree: + */ +#ifndef local_irq_disable +# define local_irq_disable local_irq_disable +# define local_irq_enable local_irq_enable +#endif + +#ifndef IRQ_NODELAY +# define IRQ_NODELAY 0 +# define IRQF_NODELAY 0 +#endif + +/* + * Driver: + */ +#define LPPTEST_CHAR_MAJOR 245 +#define LPPTEST_DEVICE_NAME "lpptest" + +#define LPPTEST_IRQ 7 + +#define LPPTEST_TEST _IOR (LPPTEST_CHAR_MAJOR, 1, unsigned long long) +#define LPPTEST_DISABLE _IOR (LPPTEST_CHAR_MAJOR, 2, unsigned long long) +#define LPPTEST_ENABLE _IOR (LPPTEST_CHAR_MAJOR, 3, unsigned long long) + +static char dev_id[] = "lpptest"; + +#define INIT_PORT() outb(0x04, 0x37a) +#define ENABLE_IRQ() outb(0x10, 0x37a) +#define DISABLE_IRQ() outb(0, 0x37a) + +static unsigned char out = 0x5a; + +/** + * Interrupt handler. Flip a bit in the reply. + */ +static int lpptest_irq (int irq, void *dev_id, struct pt_regs *regs) +{ + out ^= 0xff; + outb(out, 0x378); + + return IRQ_HANDLED; +} + +static cycles_t test_response(void) +{ + cycles_t now, end; + unsigned char in; + int timeout = 0; + + local_irq_disable(); + in = inb(0x379); + inb(0x378); + outb(0x08, 0x378); + now = get_cycles(); + while(1) { + if (inb(0x379) != in) + break; + if (timeout++ > 1000000) { + outb(0x00, 0x378); + local_irq_enable(); + + return 0; + } + } + end = get_cycles(); + outb(0x00, 0x378); + local_irq_enable(); + + return end - now; +} + +static int lpptest_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int lpptest_close(struct inode *inode, struct file *file) +{ + return 0; +} + +int lpptest_ioctl(struct inode *inode, struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) +{ + int retval = 0; + + switch (ioctl_num) { + + case LPPTEST_DISABLE: + DISABLE_IRQ(); + break; + + case LPPTEST_ENABLE: + ENABLE_IRQ(); + break; + + case LPPTEST_TEST: { + + cycles_t diff = test_response(); + if (copy_to_user((void *)ioctl_param, (void*) &diff, sizeof(diff))) + goto errcpy; + break; + } + default: retval = -EINVAL; + } + + return retval; + + errcpy: + return -EFAULT; +} + +static struct file_operations lpptest_dev_fops = { + .ioctl = lpptest_ioctl, + .open = lpptest_open, + .release = lpptest_close, +}; + +static int __init lpptest_init (void) +{ + if (register_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME, &lpptest_dev_fops)) + { + printk(KERN_NOTICE "Can't allocate major number %d for lpptest.\n", + LPPTEST_CHAR_MAJOR); + return -EAGAIN; + } + + if (request_irq (LPPTEST_IRQ, lpptest_irq, 0, "lpptest", dev_id)) { + printk (KERN_WARNING "lpptest: irq %d in use. Unload parport module!\n", LPPTEST_IRQ); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); + return -EAGAIN; + } + irq_desc[LPPTEST_IRQ].status |= IRQ_NODELAY; + irq_desc[LPPTEST_IRQ].action->flags |= IRQF_NODELAY | IRQF_DISABLED; + + INIT_PORT(); + ENABLE_IRQ(); + + return 0; +} +module_init (lpptest_init); + +static void __exit lpptest_exit (void) +{ + DISABLE_IRQ(); + + free_irq(LPPTEST_IRQ, dev_id); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); +} +module_exit (lpptest_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("lpp test module"); + diff -urN ./linux-2.6.18.1/drivers/char/random.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/random.c --- ./linux-2.6.18.1/drivers/char/random.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/random.c 2007-05-19 23:58:35.000000000 +0900 @@ -580,8 +580,11 @@ preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -626,9 +629,6 @@ if(input_pool.entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); - -out: - preempt_enable(); } void add_input_randomness(unsigned int type, unsigned int code, diff -urN ./linux-2.6.18.1/drivers/char/rtc.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/rtc.c --- ./linux-2.6.18.1/drivers/char/rtc.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/rtc.c 2007-05-20 00:11:12.000000000 +0900 @@ -82,10 +82,36 @@ #include #include +#ifdef CONFIG_MIPS +# include +#endif + #if defined(__i386__) #include #endif +#ifdef CONFIG_RTC_HISTOGRAM + +static cycles_t last_interrupt_time; + +#include + +#define CPU_MHZ (cpu_khz / 1000) + +#define HISTSIZE 10000 +static int histogram[HISTSIZE]; + +static int rtc_state; + +enum rtc_states { + S_STARTUP, /* First round - let the application start */ + S_IDLE, /* Waiting for an interrupt */ + S_WAITING_FOR_READ, /* Signal delivered. waiting for rtc_read() */ + S_READ_MISSED, /* Signal delivered, read() deadline missed */ +}; + +#endif + #ifdef __sparc__ #include #include @@ -218,7 +244,146 @@ return uip; } +#ifndef RTC_IRQ +# undef CONFIG_RTC_HISTOGRAM +#endif + +static inline void rtc_open_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i; + + last_interrupt_time = 0; + rtc_state = S_STARTUP; + rtc_irq_data = 0; + + for (i = 0; i < HISTSIZE; i++) + histogram[i] = 0; +#endif +} + +static inline void rtc_wake_event(void) +{ +#ifndef CONFIG_RTC_HISTOGRAM + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); +#else + if (!(rtc_status & RTC_IS_OPEN)) + return; + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + break; + /* Waiting for an interrupt */ + case S_IDLE: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + last_interrupt_time = get_cycles(); + rtc_state = S_WAITING_FOR_READ; + break; + + /* Signal has been delivered. waiting for rtc_read() */ + case S_WAITING_FOR_READ: + /* + * Well foo. The usermode application didn't + * schedule and read in time. + */ + last_interrupt_time = get_cycles(); + rtc_state = S_READ_MISSED; + printk("Read missed before next interrupt\n"); + break; + /* Signal has been delivered, read() deadline was missed */ + case S_READ_MISSED: + /* + * Not much we can do here. We're waiting for the usermode + * application to read the rtc + */ + last_interrupt_time = get_cycles(); + break; + } +#endif +} + +static inline void rtc_read_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + cycles_t now = get_cycles(); + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + rtc_state = S_IDLE; + break; + + /* Waiting for an interrupt */ + case S_IDLE: + printk("bug in rtc_read(): called in state S_IDLE!\n"); + break; + case S_WAITING_FOR_READ: /* + * Signal has been delivered. + * waiting for rtc_read() + */ + /* + * Well done + */ + case S_READ_MISSED: /* + * Signal has been delivered, read() + * deadline was missed + */ + /* + * So, you finally got here. + */ + if (!last_interrupt_time) + printk("bug in rtc_read(): last_interrupt_time = 0\n"); + rtc_state = S_IDLE; + { + cycles_t latency = now - last_interrupt_time; + unsigned long delta; /* Microseconds */ + + delta = latency; + delta /= CPU_MHZ; + + if (delta > 1000 * 1000) { + printk("rtc: eek\n"); + } else { + unsigned long slot = delta; + if (slot >= HISTSIZE) + slot = HISTSIZE - 1; + histogram[slot]++; + if (delta > 2000) + printk("wow! That was a " + "%ld millisec bump\n", + delta / 1000); + } + } + rtc_state = S_IDLE; + break; + } +#endif +} + +static inline void rtc_close_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i = 0; + unsigned long total = 0; + + for (i = 0; i < HISTSIZE; i++) + total += histogram[i]; + if (!total) + return; + + printk("\nrtc latency histogram of {%s/%d, %lu samples}:\n", + current->comm, current->pid, total); + for (i = 0; i < HISTSIZE; i++) { + if (histogram[i]) + printk("%d %d\n", i, histogram[i]); + } +#endif +} + #ifdef RTC_IRQ + /* * A very tiny interrupt handler. It runs with IRQF_DISABLED set, * but there is possibility of conflicting with the set_rtc_mmss() @@ -262,7 +427,7 @@ if (rtc_callback) rtc_callback->func(rtc_callback->private_data); spin_unlock(&rtc_task_lock); - wake_up_interruptible(&rtc_wait); + wake_up_interruptible(&rtc_wait); kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); @@ -376,6 +541,8 @@ schedule(); } while (1); + rtc_read_event(); + if (count == sizeof(unsigned int)) retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int); else @@ -608,6 +775,11 @@ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + /* + * Make CMOS date writes nonpreemptible even on PREEMPT_RT. + * There's a limit to everything! =B-) + */ + preempt_disable(); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); #endif @@ -617,6 +789,7 @@ CMOS_WRITE(hrs, RTC_HOURS); CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); + preempt_enable(); CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); @@ -715,6 +888,7 @@ if(rtc_status & RTC_IS_OPEN) goto out_busy; + rtc_open_event(); rtc_status |= RTC_IS_OPEN; rtc_irq_data = 0; @@ -770,6 +944,7 @@ rtc_irq_data = 0; rtc_status &= ~RTC_IS_OPEN; spin_unlock_irq (&rtc_lock); + rtc_close_event(); return 0; } @@ -1153,6 +1328,7 @@ printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); /* Now we have new data */ + rtc_wake_event(); wake_up_interruptible(&rtc_wait); kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); diff -urN ./linux-2.6.18.1/drivers/char/rtc.c.orig linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/rtc.c.orig --- ./linux-2.6.18.1/drivers/char/rtc.c.orig 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/rtc.c.orig 2007-05-19 23:58:35.000000000 +0900 @@ -0,0 +1,1557 @@ +/* + * Real Time Clock interface for Linux + * + * Copyright (C) 1996 Paul Gortmaker + * + * This driver allows use of the real time clock (built into + * nearly all computers) from user space. It exports the /dev/rtc + * interface supporting various ioctl() and also the + * /proc/driver/rtc pseudo-file for status information. + * + * The ioctls can be used to set the interrupt behaviour and + * generation rate from the RTC via IRQ 8. Then the /dev/rtc + * interface can be used to make use of these timer interrupts, + * be they interval or alarm based. + * + * The /dev/rtc interface will block on reads until an interrupt + * has been received. If a RTC interrupt has already happened, + * it will output an unsigned long and then block. The output value + * contains the interrupt status in the low byte and the number of + * interrupts since the last read in the remaining high bytes. The + * /dev/rtc interface can also be used with the select(2) call. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on other minimal char device drivers, like Alan's + * watchdog, Ted's random, etc. etc. + * + * 1.07 Paul Gortmaker. + * 1.08 Miquel van Smoorenburg: disallow certain things on the + * DEC Alpha as the CMOS clock is also used for other things. + * 1.09 Nikita Schmidt: epoch support and some Alpha cleanup. + * 1.09a Pete Zaitcev: Sun SPARC + * 1.09b Jeff Garzik: Modularize, init cleanup + * 1.09c Jeff Garzik: SMP cleanup + * 1.10 Paul Barton-Davis: add support for async I/O + * 1.10a Andrea Arcangeli: Alpha updates + * 1.10b Andrew Morton: SMP lock fix + * 1.10c Cesar Barros: SMP locking fixes and cleanup + * 1.10d Paul Gortmaker: delete paranoia check in rtc_exit + * 1.10e Maciej W. Rozycki: Handle DECstation's year weirdness. + * 1.11 Takashi Iwai: Kernel access functions + * rtc_register/rtc_unregister/rtc_control + * 1.11a Daniele Bellucci: Audit create_proc_read_entry in rtc_init + * 1.12 Venkatesh Pallipadi: Hooks for emulating rtc on HPET base-timer + * CONFIG_HPET_EMULATE_RTC + * 1.12a Maciej W. Rozycki: Handle memory-mapped chips properly. + * 1.12ac Alan Cox: Allow read access to the day of week register + */ + +#define RTC_VERSION "1.12ac" + +/* + * Note that *all* calls to CMOS_READ and CMOS_WRITE are done with + * interrupts disabled. Due to the index-port/data-port (0x70/0x71) + * design of the RTC, we don't want two different things trying to + * get to it at once. (e.g. the periodic 11 min sync from time.c vs. + * this driver.) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_MIPS +# include +#endif + +#if defined(__i386__) +#include +#endif + +#ifdef CONFIG_RTC_HISTOGRAM + +static cycles_t last_interrupt_time; + +#include + +#define CPU_MHZ (cpu_khz / 1000) + +#define HISTSIZE 10000 +static int histogram[HISTSIZE]; + +static int rtc_state; + +enum rtc_states { + S_STARTUP, /* First round - let the application start */ + S_IDLE, /* Waiting for an interrupt */ + S_WAITING_FOR_READ, /* Signal delivered. waiting for rtc_read() */ + S_READ_MISSED, /* Signal delivered, read() deadline missed */ +}; + +#endif + +#ifdef __sparc__ +#include +#include +#ifdef __sparc_v9__ +#include +#endif + +static unsigned long rtc_port; +static int rtc_irq = PCI_IRQ_NONE; +#endif + +#ifdef CONFIG_HPET_RTC_IRQ +#undef RTC_IRQ +#endif + +#ifdef RTC_IRQ +static int rtc_has_irq = 1; +#endif + +#ifndef CONFIG_HPET_EMULATE_RTC +#define is_hpet_enabled() 0 +#define hpet_set_alarm_time(hrs, min, sec) 0 +#define hpet_set_periodic_freq(arg) 0 +#define hpet_mask_rtc_irq_bit(arg) 0 +#define hpet_set_rtc_irq_bit(arg) 0 +#define hpet_rtc_timer_init() do { } while (0) +#define hpet_rtc_dropped_irq() 0 +static inline irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) {return 0;} +#else +extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); +#endif + +/* + * We sponge a minor off of the misc major. No need slurping + * up another valuable major dev number for this. If you add + * an ioctl, make sure you don't conflict with SPARC's RTC + * ioctls. + */ + +static struct fasync_struct *rtc_async_queue; + +static DECLARE_WAIT_QUEUE_HEAD(rtc_wait); + +#ifdef RTC_IRQ +static struct timer_list rtc_irq_timer; +#endif + +static ssize_t rtc_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos); + +static int rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +#ifdef RTC_IRQ +static unsigned int rtc_poll(struct file *file, poll_table *wait); +#endif + +static void get_rtc_alm_time (struct rtc_time *alm_tm); +#ifdef RTC_IRQ +static void rtc_dropped_irq(unsigned long data); + +static void set_rtc_irq_bit_locked(unsigned char bit); +static void mask_rtc_irq_bit_locked(unsigned char bit); + +static inline void set_rtc_irq_bit(unsigned char bit) +{ + spin_lock_irq(&rtc_lock); + set_rtc_irq_bit_locked(bit); + spin_unlock_irq(&rtc_lock); +} + +static void mask_rtc_irq_bit(unsigned char bit) +{ + spin_lock_irq(&rtc_lock); + mask_rtc_irq_bit_locked(bit); + spin_unlock_irq(&rtc_lock); +} +#endif + +static int rtc_proc_open(struct inode *inode, struct file *file); + +/* + * Bits in rtc_status. (6 bits of room for future expansion) + */ + +#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */ +#define RTC_TIMER_ON 0x02 /* missed irq timer active */ + +/* + * rtc_status is never changed by rtc_interrupt, and ioctl/open/close is + * protected by the big kernel lock. However, ioctl can still disable the timer + * in rtc_status and then with del_timer after the interrupt has read + * rtc_status but before mod_timer is called, which would then reenable the + * timer (but you would need to have an awful timing before you'd trip on it) + */ +static unsigned long rtc_status = 0; /* bitmapped status byte. */ +static unsigned long rtc_freq = 0; /* Current periodic IRQ rate */ +static unsigned long rtc_irq_data = 0; /* our output to the world */ +static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */ + +#ifdef RTC_IRQ +/* + * rtc_task_lock nests inside rtc_lock. + */ +static DEFINE_SPINLOCK(rtc_task_lock); +static rtc_task_t *rtc_callback = NULL; +#endif + +/* + * If this driver ever becomes modularised, it will be really nice + * to make the epoch retain its value across module reload... + */ + +static unsigned long epoch = 1900; /* year corresponding to 0x00 */ + +static const unsigned char days_in_mo[] = +{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +/* + * Returns true if a clock update is in progress + */ +static inline unsigned char rtc_is_updating(void) +{ + unsigned char uip; + + spin_lock_irq(&rtc_lock); + uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); + spin_unlock_irq(&rtc_lock); + return uip; +} + +#ifndef RTC_IRQ +# undef CONFIG_RTC_HISTOGRAM +#endif + +static inline void rtc_open_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i; + + last_interrupt_time = 0; + rtc_state = S_STARTUP; + rtc_irq_data = 0; + + for (i = 0; i < HISTSIZE; i++) + histogram[i] = 0; +#endif +} + +static inline void rtc_wake_event(void) +{ +#ifndef CONFIG_RTC_HISTOGRAM + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); +#else + if (!(rtc_status & RTC_IS_OPEN)) + return; + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + break; + /* Waiting for an interrupt */ + case S_IDLE: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + last_interrupt_time = get_cycles(); + rtc_state = S_WAITING_FOR_READ; + break; + + /* Signal has been delivered. waiting for rtc_read() */ + case S_WAITING_FOR_READ: + /* + * Well foo. The usermode application didn't + * schedule and read in time. + */ + last_interrupt_time = get_cycles(); + rtc_state = S_READ_MISSED; + printk("Read missed before next interrupt\n"); + break; + /* Signal has been delivered, read() deadline was missed */ + case S_READ_MISSED: + /* + * Not much we can do here. We're waiting for the usermode + * application to read the rtc + */ + last_interrupt_time = get_cycles(); + break; + } +#endif +} + +static inline void rtc_read_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + cycles_t now = get_cycles(); + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + rtc_state = S_IDLE; + break; + + /* Waiting for an interrupt */ + case S_IDLE: + printk("bug in rtc_read(): called in state S_IDLE!\n"); + break; + case S_WAITING_FOR_READ: /* + * Signal has been delivered. + * waiting for rtc_read() + */ + /* + * Well done + */ + case S_READ_MISSED: /* + * Signal has been delivered, read() + * deadline was missed + */ + /* + * So, you finally got here. + */ + if (!last_interrupt_time) + printk("bug in rtc_read(): last_interrupt_time = 0\n"); + rtc_state = S_IDLE; + { + cycles_t latency = now - last_interrupt_time; + unsigned long delta; /* Microseconds */ + + delta = latency; + delta /= CPU_MHZ; + + if (delta > 1000 * 1000) { + printk("rtc: eek\n"); + } else { + unsigned long slot = delta; + if (slot >= HISTSIZE) + slot = HISTSIZE - 1; + histogram[slot]++; + if (delta > 2000) + printk("wow! That was a " + "%ld millisec bump\n", + delta / 1000); + } + } + rtc_state = S_IDLE; + break; + } +#endif +} + +static inline void rtc_close_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i = 0; + unsigned long total = 0; + + for (i = 0; i < HISTSIZE; i++) + total += histogram[i]; + if (!total) + return; + + printk("\nrtc latency histogram of {%s/%d, %lu samples}:\n", + current->comm, current->pid, total); + for (i = 0; i < HISTSIZE; i++) { + if (histogram[i]) + printk("%d %d\n", i, histogram[i]); + } +#endif +} + +#ifdef RTC_IRQ + +/* + * A very tiny interrupt handler. It runs with IRQF_DISABLED set, + * but there is possibility of conflicting with the set_rtc_mmss() + * call (the rtc irq and the timer irq can easily run at the same + * time in two different CPUs). So we need to serialize + * accesses to the chip with the rtc_lock spinlock that each + * architecture should implement in the timer code. + * (See ./arch/XXXX/kernel/time.c for the set_rtc_mmss() function.) + */ + +irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + /* + * Can be an alarm interrupt, update complete interrupt, + * or a periodic interrupt. We store the status in the + * low byte and the number of interrupts received since + * the last read in the remainder of rtc_irq_data. + */ + + spin_lock (&rtc_lock); + rtc_irq_data += 0x100; + rtc_irq_data &= ~0xff; + if (is_hpet_enabled()) { + /* + * In this case it is HPET RTC interrupt handler + * calling us, with the interrupt information + * passed as arg1, instead of irq. + */ + rtc_irq_data |= (unsigned long)irq & 0xF0; + } else { + rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); + } + + if (rtc_status & RTC_TIMER_ON) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + + spin_unlock (&rtc_lock); + + /* Now do the rest of the actions */ + spin_lock(&rtc_task_lock); + if (rtc_callback) + rtc_callback->func(rtc_callback->private_data); + spin_unlock(&rtc_task_lock); + wake_up_interruptible(&rtc_wait); + + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + + return IRQ_HANDLED; +} +#endif + +/* + * sysctl-tuning infrastructure. + */ +static ctl_table rtc_table[] = { + { + .ctl_name = 1, + .procname = "max-user-freq", + .data = &rtc_max_user_freq, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table rtc_root[] = { + { + .ctl_name = 1, + .procname = "rtc", + .maxlen = 0, + .mode = 0555, + .child = rtc_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table dev_root[] = { + { + .ctl_name = CTL_DEV, + .procname = "dev", + .maxlen = 0, + .mode = 0555, + .child = rtc_root, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *sysctl_header; + +static int __init init_sysctl(void) +{ + sysctl_header = register_sysctl_table(dev_root, 0); + return 0; +} + +static void __exit cleanup_sysctl(void) +{ + unregister_sysctl_table(sysctl_header); +} + +/* + * Now all the various file operations that we export. + */ + +static ssize_t rtc_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ +#ifndef RTC_IRQ + return -EIO; +#else + DECLARE_WAITQUEUE(wait, current); + unsigned long data; + ssize_t retval; + + if (rtc_has_irq == 0) + return -EIO; + + /* + * Historically this function used to assume that sizeof(unsigned long) + * is the same in userspace and kernelspace. This lead to problems + * for configurations with multiple ABIs such a the MIPS o32 and 64 + * ABIs supported on the same kernel. So now we support read of both + * 4 and 8 bytes and assume that's the sizeof(unsigned long) in the + * userspace ABI. + */ + if (count != sizeof(unsigned int) && count != sizeof(unsigned long)) + return -EINVAL; + + add_wait_queue(&rtc_wait, &wait); + + do { + /* First make it right. Then make it fast. Putting this whole + * block within the parentheses of a while would be too + * confusing. And no, xchg() is not the answer. */ + + __set_current_state(TASK_INTERRUPTIBLE); + + spin_lock_irq (&rtc_lock); + data = rtc_irq_data; + rtc_irq_data = 0; + spin_unlock_irq (&rtc_lock); + + if (data != 0) + break; + + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + schedule(); + } while (1); + + rtc_read_event(); + + if (count == sizeof(unsigned int)) + retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int); + else + retval = put_user(data, (unsigned long __user *)buf) ?: sizeof(long); + if (!retval) + retval = count; + out: + current->state = TASK_RUNNING; + remove_wait_queue(&rtc_wait, &wait); + + return retval; +#endif +} + +static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel) +{ + struct rtc_time wtime; + +#ifdef RTC_IRQ + if (rtc_has_irq == 0) { + switch (cmd) { + case RTC_AIE_OFF: + case RTC_AIE_ON: + case RTC_PIE_OFF: + case RTC_PIE_ON: + case RTC_UIE_OFF: + case RTC_UIE_ON: + case RTC_IRQP_READ: + case RTC_IRQP_SET: + return -EINVAL; + }; + } +#endif + + switch (cmd) { +#ifdef RTC_IRQ + case RTC_AIE_OFF: /* Mask alarm int. enab. bit */ + { + mask_rtc_irq_bit(RTC_AIE); + return 0; + } + case RTC_AIE_ON: /* Allow alarm interrupts. */ + { + set_rtc_irq_bit(RTC_AIE); + return 0; + } + case RTC_PIE_OFF: /* Mask periodic int. enab. bit */ + { + unsigned long flags; /* can be called from isr via rtc_control() */ + spin_lock_irqsave (&rtc_lock, flags); + mask_rtc_irq_bit_locked(RTC_PIE); + if (rtc_status & RTC_TIMER_ON) { + rtc_status &= ~RTC_TIMER_ON; + del_timer(&rtc_irq_timer); + } + spin_unlock_irqrestore (&rtc_lock, flags); + return 0; + } + case RTC_PIE_ON: /* Allow periodic ints */ + { + unsigned long flags; /* can be called from isr via rtc_control() */ + /* + * We don't really want Joe User enabling more + * than 64Hz of interrupts on a multi-user machine. + */ + if (!kernel && (rtc_freq > rtc_max_user_freq) && + (!capable(CAP_SYS_RESOURCE))) + return -EACCES; + + spin_lock_irqsave (&rtc_lock, flags); + if (!(rtc_status & RTC_TIMER_ON)) { + rtc_irq_timer.expires = jiffies + HZ/rtc_freq + 2*HZ/100; + add_timer(&rtc_irq_timer); + rtc_status |= RTC_TIMER_ON; + } + set_rtc_irq_bit_locked(RTC_PIE); + spin_unlock_irqrestore (&rtc_lock, flags); + return 0; + } + case RTC_UIE_OFF: /* Mask ints from RTC updates. */ + { + mask_rtc_irq_bit(RTC_UIE); + return 0; + } + case RTC_UIE_ON: /* Allow ints for RTC updates. */ + { + set_rtc_irq_bit(RTC_UIE); + return 0; + } +#endif + case RTC_ALM_READ: /* Read the present alarm time */ + { + /* + * This returns a struct rtc_time. Reading >= 0xc0 + * means "don't care" or "match all". Only the tm_hour, + * tm_min, and tm_sec values are filled in. + */ + memset(&wtime, 0, sizeof(struct rtc_time)); + get_rtc_alm_time(&wtime); + break; + } + case RTC_ALM_SET: /* Store a time into the alarm */ + { + /* + * This expects a struct rtc_time. Writing 0xff means + * "don't care" or "match all". Only the tm_hour, + * tm_min and tm_sec are used. + */ + unsigned char hrs, min, sec; + struct rtc_time alm_tm; + + if (copy_from_user(&alm_tm, (struct rtc_time __user *)arg, + sizeof(struct rtc_time))) + return -EFAULT; + + hrs = alm_tm.tm_hour; + min = alm_tm.tm_min; + sec = alm_tm.tm_sec; + + spin_lock_irq(&rtc_lock); + if (hpet_set_alarm_time(hrs, min, sec)) { + /* + * Fallthru and set alarm time in CMOS too, + * so that we will get proper value in RTC_ALM_READ + */ + } + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || + RTC_ALWAYS_BCD) + { + if (sec < 60) BIN_TO_BCD(sec); + else sec = 0xff; + + if (min < 60) BIN_TO_BCD(min); + else min = 0xff; + + if (hrs < 24) BIN_TO_BCD(hrs); + else hrs = 0xff; + } + CMOS_WRITE(hrs, RTC_HOURS_ALARM); + CMOS_WRITE(min, RTC_MINUTES_ALARM); + CMOS_WRITE(sec, RTC_SECONDS_ALARM); + spin_unlock_irq(&rtc_lock); + + return 0; + } + case RTC_RD_TIME: /* Read the time/date from RTC */ + { + memset(&wtime, 0, sizeof(struct rtc_time)); + rtc_get_rtc_time(&wtime); + break; + } + case RTC_SET_TIME: /* Set the RTC */ + { + struct rtc_time rtc_tm; + unsigned char mon, day, hrs, min, sec, leap_yr; + unsigned char save_control, save_freq_select; + unsigned int yrs; +#ifdef CONFIG_MACH_DECSTATION + unsigned int real_yrs; +#endif + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&rtc_tm, (struct rtc_time __user *)arg, + sizeof(struct rtc_time))) + return -EFAULT; + + yrs = rtc_tm.tm_year + 1900; + mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ + day = rtc_tm.tm_mday; + hrs = rtc_tm.tm_hour; + min = rtc_tm.tm_min; + sec = rtc_tm.tm_sec; + + if (yrs < 1970) + return -EINVAL; + + leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); + + if ((mon > 12) || (day == 0)) + return -EINVAL; + + if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) + return -EINVAL; + + if ((hrs >= 24) || (min >= 60) || (sec >= 60)) + return -EINVAL; + + if ((yrs -= epoch) > 255) /* They are unsigned */ + return -EINVAL; + + spin_lock_irq(&rtc_lock); +#ifdef CONFIG_MACH_DECSTATION + real_yrs = yrs; + yrs = 72; + + /* + * We want to keep the year set to 73 until March + * for non-leap years, so that Feb, 29th is handled + * correctly. + */ + if (!leap_yr && mon < 3) { + real_yrs--; + yrs = 73; + } +#endif + /* These limits and adjustments are independent of + * whether the chip is in binary mode or not. + */ + if (yrs > 169) { + spin_unlock_irq(&rtc_lock); + return -EINVAL; + } + if (yrs >= 100) + yrs -= 100; + + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) + || RTC_ALWAYS_BCD) { + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hrs); + BIN_TO_BCD(day); + BIN_TO_BCD(mon); + BIN_TO_BCD(yrs); + } + + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + /* + * Make CMOS date writes nonpreemptible even on PREEMPT_RT. + * There's a limit to everything! =B-) + */ + preempt_disable(); +#ifdef CONFIG_MACH_DECSTATION + CMOS_WRITE(real_yrs, RTC_DEC_YEAR); +#endif + CMOS_WRITE(yrs, RTC_YEAR); + CMOS_WRITE(mon, RTC_MONTH); + CMOS_WRITE(day, RTC_DAY_OF_MONTH); + CMOS_WRITE(hrs, RTC_HOURS); + CMOS_WRITE(min, RTC_MINUTES); + CMOS_WRITE(sec, RTC_SECONDS); + preempt_enable(); + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + + spin_unlock_irq(&rtc_lock); + return 0; + } +#ifdef RTC_IRQ + case RTC_IRQP_READ: /* Read the periodic IRQ rate. */ + { + return put_user(rtc_freq, (unsigned long __user *)arg); + } + case RTC_IRQP_SET: /* Set periodic IRQ rate. */ + { + int tmp = 0; + unsigned char val; + unsigned long flags; /* can be called from isr via rtc_control() */ + + /* + * The max we can do is 8192Hz. + */ + if ((arg < 2) || (arg > 8192)) + return -EINVAL; + /* + * We don't really want Joe User generating more + * than 64Hz of interrupts on a multi-user machine. + */ + if (!kernel && (arg > rtc_max_user_freq) && (!capable(CAP_SYS_RESOURCE))) + return -EACCES; + + while (arg > (1<f_flags & FASYNC) { + rtc_fasync (-1, file, 0); + } +no_irq: +#endif + + spin_lock_irq (&rtc_lock); + rtc_irq_data = 0; + rtc_status &= ~RTC_IS_OPEN; + spin_unlock_irq (&rtc_lock); + rtc_close_event(); + return 0; +} + +#ifdef RTC_IRQ +/* Called without the kernel lock - fine */ +static unsigned int rtc_poll(struct file *file, poll_table *wait) +{ + unsigned long l; + + if (rtc_has_irq == 0) + return 0; + + poll_wait(file, &rtc_wait, wait); + + spin_lock_irq (&rtc_lock); + l = rtc_irq_data; + spin_unlock_irq (&rtc_lock); + + if (l != 0) + return POLLIN | POLLRDNORM; + return 0; +} +#endif + +/* + * exported stuffs + */ + +EXPORT_SYMBOL(rtc_register); +EXPORT_SYMBOL(rtc_unregister); +EXPORT_SYMBOL(rtc_control); + +int rtc_register(rtc_task_t *task) +{ +#ifndef RTC_IRQ + return -EIO; +#else + if (task == NULL || task->func == NULL) + return -EINVAL; + spin_lock_irq(&rtc_lock); + if (rtc_status & RTC_IS_OPEN) { + spin_unlock_irq(&rtc_lock); + return -EBUSY; + } + spin_lock(&rtc_task_lock); + if (rtc_callback) { + spin_unlock(&rtc_task_lock); + spin_unlock_irq(&rtc_lock); + return -EBUSY; + } + rtc_status |= RTC_IS_OPEN; + rtc_callback = task; + spin_unlock(&rtc_task_lock); + spin_unlock_irq(&rtc_lock); + return 0; +#endif +} + +int rtc_unregister(rtc_task_t *task) +{ +#ifndef RTC_IRQ + return -EIO; +#else + unsigned char tmp; + + spin_lock_irq(&rtc_lock); + spin_lock(&rtc_task_lock); + if (rtc_callback != task) { + spin_unlock(&rtc_task_lock); + spin_unlock_irq(&rtc_lock); + return -ENXIO; + } + rtc_callback = NULL; + + /* disable controls */ + if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) { + tmp = CMOS_READ(RTC_CONTROL); + tmp &= ~RTC_PIE; + tmp &= ~RTC_AIE; + tmp &= ~RTC_UIE; + CMOS_WRITE(tmp, RTC_CONTROL); + CMOS_READ(RTC_INTR_FLAGS); + } + if (rtc_status & RTC_TIMER_ON) { + rtc_status &= ~RTC_TIMER_ON; + del_timer(&rtc_irq_timer); + } + rtc_status &= ~RTC_IS_OPEN; + spin_unlock(&rtc_task_lock); + spin_unlock_irq(&rtc_lock); + return 0; +#endif +} + +int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg) +{ +#ifndef RTC_IRQ + return -EIO; +#else + unsigned long flags; + if (cmd != RTC_PIE_ON && cmd != RTC_PIE_OFF && cmd != RTC_IRQP_SET) + return -EINVAL; + spin_lock_irqsave(&rtc_task_lock, flags); + if (rtc_callback != task) { + spin_unlock_irqrestore(&rtc_task_lock, flags); + return -ENXIO; + } + spin_unlock_irqrestore(&rtc_task_lock, flags); + return rtc_do_ioctl(cmd, arg, 1); +#endif +} + + +/* + * The various file operations we support. + */ + +static const struct file_operations rtc_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = rtc_read, +#ifdef RTC_IRQ + .poll = rtc_poll, +#endif + .ioctl = rtc_ioctl, + .open = rtc_open, + .release = rtc_release, + .fasync = rtc_fasync, +}; + +static struct miscdevice rtc_dev = { + .minor = RTC_MINOR, + .name = "rtc", + .fops = &rtc_fops, +}; + +static const struct file_operations rtc_proc_fops = { + .owner = THIS_MODULE, + .open = rtc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#if defined(RTC_IRQ) && !defined(__sparc__) +static irqreturn_t (*rtc_int_handler_ptr)(int irq, void *dev_id, struct pt_regs *regs); +#endif + +static int __init rtc_init(void) +{ + struct proc_dir_entry *ent; +#if defined(__alpha__) || defined(__mips__) + unsigned int year, ctrl; + char *guess = NULL; +#endif +#ifdef __sparc__ + struct linux_ebus *ebus; + struct linux_ebus_device *edev; +#ifdef __sparc_v9__ + struct sparc_isa_bridge *isa_br; + struct sparc_isa_device *isa_dev; +#endif +#endif +#ifndef __sparc__ + void *r; +#endif + +#ifdef __sparc__ + for_each_ebus(ebus) { + for_each_ebusdev(edev, ebus) { + if(strcmp(edev->prom_node->name, "rtc") == 0) { + rtc_port = edev->resource[0].start; + rtc_irq = edev->irqs[0]; + goto found; + } + } + } +#ifdef __sparc_v9__ + for_each_isa(isa_br) { + for_each_isadev(isa_dev, isa_br) { + if (strcmp(isa_dev->prom_node->name, "rtc") == 0) { + rtc_port = isa_dev->resource.start; + rtc_irq = isa_dev->irq; + goto found; + } + } + } +#endif + printk(KERN_ERR "rtc_init: no PC rtc found\n"); + return -EIO; + +found: + if (rtc_irq == PCI_IRQ_NONE) { + rtc_has_irq = 0; + goto no_irq; + } + + /* + * XXX Interrupt pin #7 in Espresso is shared between RTC and + * PCI Slot 2 INTA# (and some INTx# in Slot 1). + */ + if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc", (void *)&rtc_port)) { + printk(KERN_ERR "rtc: cannot register IRQ %d\n", rtc_irq); + return -EIO; + } +no_irq: +#else + if (RTC_IOMAPPED) + r = request_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc"); + else + r = request_mem_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc"); + if (!r) { + printk(KERN_ERR "rtc: I/O resource %lx is not free.\n", + (long)(RTC_PORT(0))); + return -EIO; + } + +#ifdef RTC_IRQ + if (is_hpet_enabled()) { + rtc_int_handler_ptr = hpet_rtc_interrupt; + } else { + rtc_int_handler_ptr = rtc_interrupt; + } + + if(request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED, "rtc", NULL)) { + /* Yeah right, seeing as irq 8 doesn't even hit the bus. */ + printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ); + if (RTC_IOMAPPED) + release_region(RTC_PORT(0), RTC_IO_EXTENT); + else + release_mem_region(RTC_PORT(0), RTC_IO_EXTENT); + return -EIO; + } + hpet_rtc_timer_init(); + +#endif + +#endif /* __sparc__ vs. others */ + + if (misc_register(&rtc_dev)) { +#ifdef RTC_IRQ + free_irq(RTC_IRQ, NULL); +#endif + release_region(RTC_PORT(0), RTC_IO_EXTENT); + return -ENODEV; + } + + ent = create_proc_entry("driver/rtc", 0, NULL); + if (!ent) { +#ifdef RTC_IRQ + free_irq(RTC_IRQ, NULL); +#endif + release_region(RTC_PORT(0), RTC_IO_EXTENT); + misc_deregister(&rtc_dev); + return -ENOMEM; + } + ent->proc_fops = &rtc_proc_fops; + +#if defined(__alpha__) || defined(__mips__) + rtc_freq = HZ; + + /* Each operating system on an Alpha uses its own epoch. + Let's try to guess which one we are using now. */ + + if (rtc_is_updating() != 0) + msleep(20); + + spin_lock_irq(&rtc_lock); + year = CMOS_READ(RTC_YEAR); + ctrl = CMOS_READ(RTC_CONTROL); + spin_unlock_irq(&rtc_lock); + + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + BCD_TO_BIN(year); /* This should never happen... */ + + if (year < 20) { + epoch = 2000; + guess = "SRM (post-2000)"; + } else if (year >= 20 && year < 48) { + epoch = 1980; + guess = "ARC console"; + } else if (year >= 48 && year < 72) { + epoch = 1952; + guess = "Digital UNIX"; +#if defined(__mips__) + } else if (year >= 72 && year < 74) { + epoch = 2000; + guess = "Digital DECstation"; +#else + } else if (year >= 70) { + epoch = 1900; + guess = "Standard PC (1900)"; +#endif + } + if (guess) + printk(KERN_INFO "rtc: %s epoch (%lu) detected\n", guess, epoch); +#endif +#ifdef RTC_IRQ + if (rtc_has_irq == 0) + goto no_irq2; + + init_timer(&rtc_irq_timer); + rtc_irq_timer.function = rtc_dropped_irq; + spin_lock_irq(&rtc_lock); + rtc_freq = 1024; + if (!hpet_set_periodic_freq(rtc_freq)) { + /* Initialize periodic freq. to CMOS reset default, which is 1024Hz */ + CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT); + } + spin_unlock_irq(&rtc_lock); +no_irq2: +#endif + + (void) init_sysctl(); + + printk(KERN_INFO "Real Time Clock Driver v" RTC_VERSION "\n"); + + return 0; +} + +static void __exit rtc_exit (void) +{ + cleanup_sysctl(); + remove_proc_entry ("driver/rtc", NULL); + misc_deregister(&rtc_dev); + +#ifdef __sparc__ + if (rtc_has_irq) + free_irq (rtc_irq, &rtc_port); +#else + if (RTC_IOMAPPED) + release_region(RTC_PORT(0), RTC_IO_EXTENT); + else + release_mem_region(RTC_PORT(0), RTC_IO_EXTENT); +#ifdef RTC_IRQ + if (rtc_has_irq) + free_irq (RTC_IRQ, NULL); +#endif +#endif /* __sparc__ */ +} + +module_init(rtc_init); +module_exit(rtc_exit); + +#ifdef RTC_IRQ +/* + * At IRQ rates >= 4096Hz, an interrupt may get lost altogether. + * (usually during an IDE disk interrupt, with IRQ unmasking off) + * Since the interrupt handler doesn't get called, the IRQ status + * byte doesn't get read, and the RTC stops generating interrupts. + * A timer is set, and will call this function if/when that happens. + * To get it out of this stalled state, we just read the status. + * At least a jiffy of interrupts (rtc_freq/HZ) will have been lost. + * (You *really* shouldn't be trying to use a non-realtime system + * for something that requires a steady > 1KHz signal anyways.) + */ + +static void rtc_dropped_irq(unsigned long data) +{ + unsigned long freq; + + spin_lock_irq (&rtc_lock); + + if (hpet_rtc_dropped_irq()) { + spin_unlock_irq(&rtc_lock); + return; + } + + /* Just in case someone disabled the timer from behind our back... */ + if (rtc_status & RTC_TIMER_ON) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + + rtc_irq_data += ((rtc_freq/HZ)<<8); + rtc_irq_data &= ~0xff; + rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); /* restart */ + + freq = rtc_freq; + + spin_unlock_irq(&rtc_lock); + + printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); + + /* Now we have new data */ + rtc_wake_event(); + wake_up_interruptible(&rtc_wait); + + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); +} +#endif + +/* + * Info exported via "/proc/driver/rtc". + */ + +static int rtc_proc_show(struct seq_file *seq, void *v) +{ +#define YN(bit) ((ctrl & bit) ? "yes" : "no") +#define NY(bit) ((ctrl & bit) ? "no" : "yes") + struct rtc_time tm; + unsigned char batt, ctrl; + unsigned long freq; + + spin_lock_irq(&rtc_lock); + batt = CMOS_READ(RTC_VALID) & RTC_VRT; + ctrl = CMOS_READ(RTC_CONTROL); + freq = rtc_freq; + spin_unlock_irq(&rtc_lock); + + + rtc_get_rtc_time(&tm); + + /* + * There is no way to tell if the luser has the RTC set for local + * time or for Universal Standard Time (GMT). Probably local though. + */ + seq_printf(seq, + "rtc_time\t: %02d:%02d:%02d\n" + "rtc_date\t: %04d-%02d-%02d\n" + "rtc_epoch\t: %04lu\n", + tm.tm_hour, tm.tm_min, tm.tm_sec, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch); + + get_rtc_alm_time(&tm); + + /* + * We implicitly assume 24hr mode here. Alarm values >= 0xc0 will + * match any value for that particular field. Values that are + * greater than a valid time, but less than 0xc0 shouldn't appear. + */ + seq_puts(seq, "alarm\t\t: "); + if (tm.tm_hour <= 24) + seq_printf(seq, "%02d:", tm.tm_hour); + else + seq_puts(seq, "**:"); + + if (tm.tm_min <= 59) + seq_printf(seq, "%02d:", tm.tm_min); + else + seq_puts(seq, "**:"); + + if (tm.tm_sec <= 59) + seq_printf(seq, "%02d\n", tm.tm_sec); + else + seq_puts(seq, "**\n"); + + seq_printf(seq, + "DST_enable\t: %s\n" + "BCD\t\t: %s\n" + "24hr\t\t: %s\n" + "square_wave\t: %s\n" + "alarm_IRQ\t: %s\n" + "update_IRQ\t: %s\n" + "periodic_IRQ\t: %s\n" + "periodic_freq\t: %ld\n" + "batt_status\t: %s\n", + YN(RTC_DST_EN), + NY(RTC_DM_BINARY), + YN(RTC_24H), + YN(RTC_SQWE), + YN(RTC_AIE), + YN(RTC_UIE), + YN(RTC_PIE), + freq, + batt ? "okay" : "dead"); + + return 0; +#undef YN +#undef NY +} + +static int rtc_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rtc_proc_show, NULL); +} + +void rtc_get_rtc_time(struct rtc_time *rtc_tm) +{ + unsigned long uip_watchdog = jiffies, flags; + unsigned char ctrl; +#ifdef CONFIG_MACH_DECSTATION + unsigned int real_year; +#endif + + /* + * read RTC once any update in progress is done. The update + * can take just over 2ms. We wait 20ms. There is no need to + * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP. + * If you need to know *exactly* when a second has started, enable + * periodic update complete interrupts, (via ioctl) and then + * immediately read /dev/rtc which will block until you get the IRQ. + * Once the read clears, read the RTC time (again via ioctl). Easy. + */ + + while (rtc_is_updating() != 0 && jiffies - uip_watchdog < 2*HZ/100) { + barrier(); + cpu_relax(); + } + + /* + * Only the values that we read from the RTC are set. We leave + * tm_wday, tm_yday and tm_isdst untouched. Note that while the + * RTC has RTC_DAY_OF_WEEK, we should usually ignore it, as it is + * only updated by the RTC when initially set to a non-zero value. + */ + spin_lock_irqsave(&rtc_lock, flags); + rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS); + rtc_tm->tm_min = CMOS_READ(RTC_MINUTES); + rtc_tm->tm_hour = CMOS_READ(RTC_HOURS); + rtc_tm->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); + rtc_tm->tm_mon = CMOS_READ(RTC_MONTH); + rtc_tm->tm_year = CMOS_READ(RTC_YEAR); + /* Only set from 2.6.16 onwards */ + rtc_tm->tm_wday = CMOS_READ(RTC_DAY_OF_WEEK); + +#ifdef CONFIG_MACH_DECSTATION + real_year = CMOS_READ(RTC_DEC_YEAR); +#endif + ctrl = CMOS_READ(RTC_CONTROL); + spin_unlock_irqrestore(&rtc_lock, flags); + + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + BCD_TO_BIN(rtc_tm->tm_sec); + BCD_TO_BIN(rtc_tm->tm_min); + BCD_TO_BIN(rtc_tm->tm_hour); + BCD_TO_BIN(rtc_tm->tm_mday); + BCD_TO_BIN(rtc_tm->tm_mon); + BCD_TO_BIN(rtc_tm->tm_year); + BCD_TO_BIN(rtc_tm->tm_wday); + } + +#ifdef CONFIG_MACH_DECSTATION + rtc_tm->tm_year += real_year - 72; +#endif + + /* + * Account for differences between how the RTC uses the values + * and how they are defined in a struct rtc_time; + */ + if ((rtc_tm->tm_year += (epoch - 1900)) <= 69) + rtc_tm->tm_year += 100; + + rtc_tm->tm_mon--; +} + +static void get_rtc_alm_time(struct rtc_time *alm_tm) +{ + unsigned char ctrl; + + /* + * Only the values that we read from the RTC are set. That + * means only tm_hour, tm_min, and tm_sec. + */ + spin_lock_irq(&rtc_lock); + alm_tm->tm_sec = CMOS_READ(RTC_SECONDS_ALARM); + alm_tm->tm_min = CMOS_READ(RTC_MINUTES_ALARM); + alm_tm->tm_hour = CMOS_READ(RTC_HOURS_ALARM); + ctrl = CMOS_READ(RTC_CONTROL); + spin_unlock_irq(&rtc_lock); + + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + BCD_TO_BIN(alm_tm->tm_sec); + BCD_TO_BIN(alm_tm->tm_min); + BCD_TO_BIN(alm_tm->tm_hour); + } +} + +#ifdef RTC_IRQ +/* + * Used to disable/enable interrupts for any one of UIE, AIE, PIE. + * Rumour has it that if you frob the interrupt enable/disable + * bits in RTC_CONTROL, you should read RTC_INTR_FLAGS, to + * ensure you actually start getting interrupts. Probably for + * compatibility with older/broken chipset RTC implementations. + * We also clear out any old irq data after an ioctl() that + * meddles with the interrupt enable/disable bits. + */ + +static void mask_rtc_irq_bit_locked(unsigned char bit) +{ + unsigned char val; + + if (hpet_mask_rtc_irq_bit(bit)) + return; + val = CMOS_READ(RTC_CONTROL); + val &= ~bit; + CMOS_WRITE(val, RTC_CONTROL); + CMOS_READ(RTC_INTR_FLAGS); + + rtc_irq_data = 0; +} + +static void set_rtc_irq_bit_locked(unsigned char bit) +{ + unsigned char val; + + if (hpet_set_rtc_irq_bit(bit)) + return; + val = CMOS_READ(RTC_CONTROL); + val |= bit; + CMOS_WRITE(val, RTC_CONTROL); + CMOS_READ(RTC_INTR_FLAGS); + + rtc_irq_data = 0; +} +#endif + +MODULE_AUTHOR("Paul Gortmaker"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(RTC_MINOR); diff -urN ./linux-2.6.18.1/drivers/char/sysrq.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/sysrq.c --- ./linux-2.6.18.1/drivers/char/sysrq.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/sysrq.c 2007-05-19 23:58:35.000000000 +0900 @@ -176,6 +176,23 @@ .enable_mask = SYSRQ_ENABLE_DUMP, }; +#if defined(__i386__) + +static void sysrq_handle_showallregs(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + nmi_show_all_regs(); +} + +static struct sysrq_key_op sysrq_showallregs_op = { + .handler = sysrq_handle_showallregs, + .help_msg = "showalLcpupc", + .action_msg = "Show Regs On All CPUs", +}; +#else +#define sysrq_showallregs_op (*(struct sysrq_key_op *)0) +#endif + static void sysrq_handle_showstate(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { @@ -301,7 +318,7 @@ &sysrq_kill_op, /* i */ NULL, /* j */ &sysrq_SAK_op, /* k */ - NULL, /* l */ + &sysrq_showallregs_op, /* l */ &sysrq_showmem_op, /* m */ &sysrq_unrt_op, /* n */ /* This will often be registered as 'Off' at init time */ diff -urN ./linux-2.6.18.1/drivers/char/tty_io.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/tty_io.c --- ./linux-2.6.18.1/drivers/char/tty_io.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/char/tty_io.c 2007-05-19 23:58:35.000000000 +0900 @@ -254,6 +254,7 @@ printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " "!= #fd's(%d) in %s\n", tty->name, tty->count, count, routine); + dump_stack(); return count; } #endif diff -urN ./linux-2.6.18.1/drivers/ide/ide-floppy.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-floppy.c --- ./linux-2.6.18.1/drivers/ide/ide-floppy.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-floppy.c 2007-05-19 23:58:35.000000000 +0900 @@ -1666,9 +1666,9 @@ atapi_status_t status; unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); status.all = HWIF(drive)->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); progress_indication = !status.b.dsc ? 0 : 0x10000; } diff -urN ./linux-2.6.18.1/drivers/ide/ide-io.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-io.c --- ./linux-2.6.18.1/drivers/ide/ide-io.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-io.c 2007-05-19 23:58:35.000000000 +0900 @@ -1173,7 +1173,7 @@ ide_get_lock(ide_intr, hwgroup); /* caller must own ide_lock */ - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); while (!hwgroup->busy) { hwgroup->busy = 1; @@ -1434,7 +1434,7 @@ #endif /* DISABLE_IRQ_NOSYNC */ /* local CPU only, * as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwgroup->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { diff -urN ./linux-2.6.18.1/drivers/ide/ide-iops.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-iops.c --- ./linux-2.6.18.1/drivers/ide/ide-iops.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-iops.c 2007-05-19 23:58:35.000000000 +0900 @@ -244,10 +244,10 @@ if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->INSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->INSL(IDE_DATA_REG, buffer, wcount); } else { @@ -266,10 +266,10 @@ if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->OUTSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->OUTSL(IDE_DATA_REG, buffer, wcount); } else { @@ -564,12 +564,12 @@ if (!(stat & BUSY_STAT)) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *startstop = ide_error(drive, "status timeout", stat); return 1; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. @@ -731,17 +731,15 @@ printk("%s: CHECK for good STATUS\n", drive->name); return 0; } - local_irq_save(flags); - SELECT_MASK(drive, 0); id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); - if (!id) { - local_irq_restore(flags); + if (!id) return 0; - } + local_irq_save_nort(flags); + SELECT_MASK(drive, 0); ata_input_data(drive, id, SECTOR_WORDS); (void) hwif->INB(IDE_STATUS_REG); /* clear drive IRQ */ - local_irq_enable(); - local_irq_restore(flags); + local_irq_enable_nort(); + local_irq_restore_nort(flags); ide_fix_driveid(id); if (id) { drive->id->dma_ultra = id->dma_ultra; @@ -821,7 +819,7 @@ if (time_after(jiffies, timeout)) break; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* diff -urN ./linux-2.6.18.1/drivers/ide/ide-lib.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-lib.c --- ./linux-2.6.18.1/drivers/ide/ide-lib.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-lib.c 2007-05-19 23:58:35.000000000 +0900 @@ -445,15 +445,16 @@ static void ide_dump_opcode(ide_drive_t *drive) { + unsigned long flags; struct request *rq; u8 opcode = 0; int found = 0; - spin_lock(&ide_lock); + spin_lock_irqsave(&ide_lock, flags); rq = NULL; if (HWGROUP(drive)) rq = HWGROUP(drive)->rq; - spin_unlock(&ide_lock); + spin_unlock_irqrestore(&ide_lock, flags); if (!rq) return; if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { @@ -481,10 +482,8 @@ static u8 ide_dump_ata_status(ide_drive_t *drive, const char *msg, u8 stat) { ide_hwif_t *hwif = HWIF(drive); - unsigned long flags; u8 err = 0; - local_irq_save(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (stat & BUSY_STAT) printk("Busy "); @@ -544,7 +543,7 @@ printk("\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return err; } @@ -559,14 +558,11 @@ static u8 ide_dump_atapi_status(ide_drive_t *drive, const char *msg, u8 stat) { - unsigned long flags; - atapi_status_t status; atapi_error_t error; status.all = stat; error.all = 0; - local_irq_save(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (status.b.bsy) printk("Busy "); @@ -592,7 +588,7 @@ printk("}\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return error.all; } diff -urN ./linux-2.6.18.1/drivers/ide/ide-probe.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-probe.c --- ./linux-2.6.18.1/drivers/ide/ide-probe.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-probe.c 2007-05-19 23:58:35.000000000 +0900 @@ -143,7 +143,7 @@ hwif->ata_input_data(drive, id, SECTOR_WORDS); drive->id_read = 1; - local_irq_enable(); + local_irq_enable_nort(); ide_fix_driveid(id); #if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) @@ -325,14 +325,14 @@ unsigned long flags; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* drive returned ID */ do_identify(drive, cmd); /* drive responded with ID */ rc = 0; /* clear drive IRQ */ (void) hwif->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { /* drive refused ID */ rc = 2; @@ -804,7 +804,7 @@ } while ((stat & BUSY_STAT) && time_after(timeout, jiffies)); } - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * Use cached IRQ number. It might be (and is...) changed by probe * code above diff -urN ./linux-2.6.18.1/drivers/ide/ide-taskfile.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-taskfile.c --- ./linux-2.6.18.1/drivers/ide/ide-taskfile.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/ide-taskfile.c 2007-05-19 23:58:35.000000000 +0900 @@ -274,7 +274,7 @@ offset %= PAGE_SIZE; #ifdef CONFIG_HIGHMEM - local_irq_save(flags); + local_irq_save_nort(flags); #endif buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; @@ -294,7 +294,7 @@ kunmap_atomic(buf, KM_BIO_SRC_IRQ); #ifdef CONFIG_HIGHMEM - local_irq_restore(flags); + local_irq_restore_nort(flags); #endif } @@ -460,7 +460,7 @@ } if (!drive->unmask) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_out_intr, WAIT_WORSTCASE, NULL); ide_pio_datablock(drive, rq, 1); diff -urN ./linux-2.6.18.1/drivers/ide/pci/alim15x3.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/pci/alim15x3.c --- ./linux-2.6.18.1/drivers/ide/pci/alim15x3.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/pci/alim15x3.c 2007-05-19 23:58:35.000000000 +0900 @@ -322,7 +322,7 @@ if (r_clc >= 16) r_clc = 0; } - local_irq_save(flags); + local_irq_save_nort(flags); /* * PIO mode => ATA FIFO on, ATAPI FIFO off @@ -344,7 +344,7 @@ pci_write_config_byte(dev, port, s_clc); pci_write_config_byte(dev, port+drive->select.b.unit+2, (a_clc << 4) | r_clc); - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * setup active rec @@ -600,7 +600,7 @@ } #endif /* defined(DISPLAY_ALI_TIMINGS) && defined(CONFIG_PROC_FS) */ - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision < 0xC2) { /* @@ -613,7 +613,7 @@ * clear bit 7 */ pci_write_config_byte(dev, 0x4b, tmpbyte & 0x7F); - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } @@ -638,7 +638,7 @@ * 0:0.0 so if we didn't find one we know what is cooking. */ if (north && north->vendor != PCI_VENDOR_ID_AL) { - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } @@ -661,7 +661,7 @@ pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x02); } } - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } @@ -685,7 +685,7 @@ unsigned long flags; u8 tmpbyte; - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision >= 0xC2) { /* @@ -737,7 +737,7 @@ pci_write_config_byte(dev, 0x53, tmpbyte); - local_irq_restore(flags); + local_irq_restore_nort(flags); return(ata66); } diff -urN ./linux-2.6.18.1/drivers/ide/pci/cs5530.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/pci/cs5530.c --- ./linux-2.6.18.1/drivers/ide/pci/cs5530.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/pci/cs5530.c 2007-05-19 23:58:35.000000000 +0900 @@ -241,8 +241,8 @@ return 0; } - spin_lock_irqsave(&ide_lock, flags); - /* all CPUs (there should only be one CPU with this chipset) */ + /* Local CPU. ide_lock is acquired in do_ide_setup_pci_device. */ + local_irq_save(flags); /* * Enable BusMaster and MemoryWriteAndInvalidate for the cs5530: @@ -294,7 +294,7 @@ pci_write_config_byte(master_0, 0x42, 0x00); pci_write_config_byte(master_0, 0x43, 0xc1); - spin_unlock_irqrestore(&ide_lock, flags); + local_irq_restore(flags); return 0; } diff -urN ./linux-2.6.18.1/drivers/ide/pci/hpt366.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/pci/hpt366.c --- ./linux-2.6.18.1/drivers/ide/pci/hpt366.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ide/pci/hpt366.c 2007-05-19 23:58:35.000000000 +0900 @@ -1496,7 +1496,7 @@ dma_old = hwif->INB(dmabase+2); - local_irq_save(flags); + local_irq_save_nort(flags); dma_new = dma_old; pci_read_config_byte(hwif->pci_dev, primary, &masterdma); @@ -1507,7 +1507,7 @@ if (dma_new != dma_old) hwif->OUTB(dma_new, dmabase+2); - local_irq_restore(flags); + local_irq_restore_nort(flags); ide_setup_dma(hwif, dmabase, 8); } diff -urN ./linux-2.6.18.1/drivers/ieee1394/ieee1394_types.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ieee1394/ieee1394_types.h --- ./linux-2.6.18.1/drivers/ieee1394/ieee1394_types.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ieee1394/ieee1394_types.h 2007-05-19 23:58:35.000000000 +0900 @@ -19,7 +19,7 @@ spinlock_t lock; u8 next; u32 allocations; - struct semaphore count; + struct compat_semaphore count; }; #define HPSB_TPOOL_INIT(_tp) \ diff -urN ./linux-2.6.18.1/drivers/ieee1394/nodemgr.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ieee1394/nodemgr.c --- ./linux-2.6.18.1/drivers/ieee1394/nodemgr.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ieee1394/nodemgr.c 2007-05-19 23:58:35.000000000 +0900 @@ -167,7 +167,7 @@ struct hpsb_host *host; struct list_head list; struct completion exited; - struct semaphore reset_sem; + struct compat_semaphore reset_sem; int pid; char daemon_name[15]; int kill_me; diff -urN ./linux-2.6.18.1/drivers/ieee1394/raw1394-private.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ieee1394/raw1394-private.h --- ./linux-2.6.18.1/drivers/ieee1394/raw1394-private.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/ieee1394/raw1394-private.h 2007-05-19 23:58:35.000000000 +0900 @@ -29,7 +29,7 @@ struct list_head req_pending; struct list_head req_complete; - struct semaphore complete_sem; + struct compat_semaphore complete_sem; spinlock_t reqlists_lock; wait_queue_head_t poll_wait_complete; diff -urN ./linux-2.6.18.1/drivers/input/gameport/gameport.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/input/gameport/gameport.c --- ./linux-2.6.18.1/drivers/input/gameport/gameport.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/input/gameport/gameport.c 2007-05-19 23:58:35.000000000 +0900 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* HZ */ #include @@ -101,12 +102,12 @@ tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -125,11 +126,11 @@ tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } diff -urN ./linux-2.6.18.1/drivers/input/serio/i8042.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/input/serio/i8042.c --- ./linux-2.6.18.1/drivers/input/serio/i8042.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/input/serio/i8042.c 2007-05-19 23:58:35.000000000 +0900 @@ -1084,7 +1084,7 @@ goto err_controller_cleanup; } - mod_timer(&i8042_timer, jiffies + I8042_POLL_PERIOD); + mod_timer(&i8042_timer, jiffies + 2); //I8042_POLL_PERIOD); return 0; err_unregister_ports: diff -urN ./linux-2.6.18.1/drivers/input/serio/i8042.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/input/serio/i8042.h --- ./linux-2.6.18.1/drivers/input/serio/i8042.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/input/serio/i8042.h 2007-05-19 23:58:35.000000000 +0900 @@ -43,7 +43,7 @@ * polling. */ -#define I8042_POLL_PERIOD HZ/20 +#define I8042_POLL_PERIOD (10*HZ) /* * Status register bits. diff -urN ./linux-2.6.18.1/drivers/media/dvb/dvb-core/dvb_frontend.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/media/dvb/dvb-core/dvb_frontend.c --- ./linux-2.6.18.1/drivers/media/dvb/dvb-core/dvb_frontend.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-05-19 23:58:35.000000000 +0900 @@ -97,7 +97,7 @@ struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct compat_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; pid_t thread_pid; diff -urN ./linux-2.6.18.1/drivers/media/dvb/dvb-core/dvb_frontend.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/media/dvb/dvb-core/dvb_frontend.h --- ./linux-2.6.18.1/drivers/media/dvb/dvb-core/dvb_frontend.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/media/dvb/dvb-core/dvb_frontend.h 2007-05-19 23:58:35.000000000 +0900 @@ -138,7 +138,7 @@ int eventr; int overflow; wait_queue_head_t wait_queue; - struct semaphore sem; + struct compat_semaphore sem; }; struct dvb_frontend { diff -urN ./linux-2.6.18.1/drivers/net/3c527.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/3c527.c --- ./linux-2.6.18.1/drivers/net/3c527.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/3c527.c 2007-05-19 23:58:35.000000000 +0900 @@ -182,7 +182,7 @@ u16 rx_ring_tail; /* index to rx de-queue end */ - struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct compat_semaphore cmd_mutex; /* Serialises issuing of execute commands */ struct completion execution_cmd; /* Card has completed an execute command */ struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; diff -urN ./linux-2.6.18.1/drivers/net/3c59x.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/3c59x.c --- ./linux-2.6.18.1/drivers/net/3c59x.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/3c59x.c 2007-05-19 23:58:35.000000000 +0900 @@ -793,9 +793,9 @@ struct vortex_private *vp = netdev_priv(dev); unsigned long flags; local_save_flags(flags); - local_irq_disable(); + local_irq_disable_nort(); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev,NULL); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1724,6 +1724,7 @@ int next_tick = 60*HZ; int ok = 0; int media_status, old_window; + unsigned long flags; if (vortex_debug > 2) { printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n", @@ -1731,7 +1732,7 @@ printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo); } - disable_irq_lockdep(dev->irq); + spin_lock_irqsave(&vp->lock, flags); old_window = ioread16(ioaddr + EL3_CMD) >> 13; EL3WINDOW(4); media_status = ioread16(ioaddr + Wn4_Media); @@ -1754,9 +1755,7 @@ case XCVR_MII: case XCVR_NWAY: { ok = 1; - spin_lock_bh(&vp->lock); vortex_check_media(dev, 0); - spin_unlock_bh(&vp->lock); } break; default: /* Other media types handled by Tx timeouts. */ @@ -1812,7 +1811,7 @@ dev->name, media_tbl[dev->if_port].name); EL3WINDOW(old_window); - enable_irq_lockdep(dev->irq); + spin_unlock_irqrestore(&vp->lock, flags); mod_timer(&vp->timer, RUN_AT(next_tick)); if (vp->deferred) iowrite16(FakeIntr, ioaddr + EL3_CMD); @@ -1845,13 +1844,17 @@ /* * Block interrupts because vortex_interrupt does a bare spin_lock() */ +#ifndef CONFIG_PREEMPT_RT unsigned long flags; local_irq_save(flags); +#endif if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev, NULL); else vortex_interrupt(dev->irq, dev, NULL); +#ifndef CONFIG_PREEMPT_RT local_irq_restore(flags); +#endif } } diff -urN ./linux-2.6.18.1/drivers/net/e1000/e1000_main.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/e1000/e1000_main.c --- ./linux-2.6.18.1/drivers/net/e1000/e1000_main.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/e1000/e1000_main.c 2007-05-19 23:58:35.000000000 +0900 @@ -2965,10 +2965,8 @@ (adapter->hw.mac_type == e1000_82573)) e1000_transfer_dhcp_info(adapter, skb); - local_irq_save(flags); - if (!spin_trylock(&tx_ring->tx_lock)) { + if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { /* Collision - tell upper layer to requeue */ - local_irq_restore(flags); return NETDEV_TX_LOCKED; } diff -urN ./linux-2.6.18.1/drivers/net/hamradio/6pack.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/hamradio/6pack.c --- ./linux-2.6.18.1/drivers/net/hamradio/6pack.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/hamradio/6pack.c 2007-05-19 23:58:35.000000000 +0900 @@ -123,7 +123,7 @@ struct timer_list tx_t; struct timer_list resync_t; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; spinlock_t lock; }; diff -urN ./linux-2.6.18.1/drivers/net/hamradio/mkiss.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/hamradio/mkiss.c --- ./linux-2.6.18.1/drivers/net/hamradio/mkiss.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/hamradio/mkiss.c 2007-05-19 23:58:35.000000000 +0900 @@ -84,7 +84,7 @@ #define CRC_MODE_SMACK_TEST 4 atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ diff -urN ./linux-2.6.18.1/drivers/net/ibm_emac/ibm_emac_core.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ibm_emac/ibm_emac_core.c --- ./linux-2.6.18.1/drivers/net/ibm_emac/ibm_emac_core.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ibm_emac/ibm_emac_core.c 2007-05-19 23:58:35.000000000 +0900 @@ -1061,6 +1061,8 @@ ++dev->stats.tx_packets; dev->stats.tx_bytes += len; + spin_unlock(&dev->tx_lock); + return 0; } @@ -1074,6 +1076,7 @@ u16 ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | MAL_TX_CTRL_LAST | emac_tx_csum(dev, skb); + spin_lock(&dev->tx_lock); slot = dev->tx_slot++; if (dev->tx_slot == NUM_TX_BUFF) { dev->tx_slot = 0; @@ -1243,6 +1246,7 @@ DBG2("%d: poll_tx, %d %d" NL, dev->def->index, dev->tx_cnt, dev->ack_slot); + spin_lock(&dev->tx_lock); if (dev->tx_cnt) { u16 ctrl; int slot = dev->ack_slot, n = 0; @@ -1252,6 +1256,7 @@ struct sk_buff *skb = dev->tx_skb[slot]; ++n; + spin_unlock(&dev->tx_lock); if (skb) { dev_kfree_skb(skb); dev->tx_skb[slot] = NULL; @@ -1261,6 +1266,7 @@ if (unlikely(EMAC_IS_BAD_TX(ctrl))) emac_parse_tx_error(dev, ctrl); + spin_lock(&dev->tx_lock); if (--dev->tx_cnt) goto again; } @@ -1273,6 +1279,7 @@ DBG2("%d: tx %d pkts" NL, dev->def->index, n); } } + spin_unlock(&dev->tx_lock); } static inline void emac_recycle_rx_skb(struct ocp_enet_private *dev, int slot, @@ -1966,6 +1973,7 @@ dev->ldev = &ocpdev->dev; dev->def = ocpdev->def; SET_MODULE_OWNER(ndev); + spin_lock_init(&dev->tx_lock); /* Find MAL device we are connected to */ maldev = diff -urN ./linux-2.6.18.1/drivers/net/ibm_emac/ibm_emac_core.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ibm_emac/ibm_emac_core.h --- ./linux-2.6.18.1/drivers/net/ibm_emac/ibm_emac_core.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ibm_emac/ibm_emac_core.h 2007-05-19 23:58:35.000000000 +0900 @@ -193,6 +193,8 @@ struct ibm_emac_error_stats estats; struct net_device_stats nstats; + spinlock_t tx_lock; + struct device* ldev; }; diff -urN ./linux-2.6.18.1/drivers/net/netconsole.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/netconsole.c --- ./linux-2.6.18.1/drivers/net/netconsole.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/netconsole.c 2007-05-19 23:58:35.000000000 +0900 @@ -74,16 +74,22 @@ if (!np.dev) return; - local_irq_save(flags); + /* + * A bit hairy. Netconsole uses mutexes (indirectly) and + * thus must have interrupts enabled: + */ + local_irq_save_nort(flags); for(left = len; left; ) { frag = min(left, MAX_PRINT_CHUNK); + WARN_ON_RT(irqs_disabled()); netpoll_send_udp(&np, msg, frag); + WARN_ON_RT(irqs_disabled()); msg += frag; left -= frag; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } static struct console netconsole = { diff -urN ./linux-2.6.18.1/drivers/net/plip.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/plip.c --- ./linux-2.6.18.1/drivers/net/plip.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/plip.c 2007-05-19 23:58:35.000000000 +0900 @@ -227,7 +227,10 @@ struct hh_cache *hh); spinlock_t lock; atomic_t kill_timer; - struct semaphore killed_timer_sem; + /* + * PREEMPT_RT: this isnt a mutex, it should be struct completion. + */ + struct compat_semaphore killed_timer_sem; }; static inline void enable_parport_interrupts (struct net_device *dev) diff -urN ./linux-2.6.18.1/drivers/net/ppp_async.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ppp_async.c --- ./linux-2.6.18.1/drivers/net/ppp_async.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ppp_async.c 2007-05-19 23:58:35.000000000 +0900 @@ -67,7 +67,7 @@ struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; diff -urN ./linux-2.6.18.1/drivers/net/ppp_synctty.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ppp_synctty.c --- ./linux-2.6.18.1/drivers/net/ppp_synctty.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/ppp_synctty.c 2007-05-19 23:58:35.000000000 +0900 @@ -70,7 +70,7 @@ struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ }; diff -urN ./linux-2.6.18.1/drivers/net/tulip/tulip_core.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/tulip/tulip_core.c --- ./linux-2.6.18.1/drivers/net/tulip/tulip_core.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/tulip/tulip_core.c 2007-05-19 23:58:35.000000000 +0900 @@ -1804,6 +1804,7 @@ pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device (pdev); pci_set_drvdata (pdev, NULL); /* pci_power_off (pdev, -1); */ diff -urN ./linux-2.6.18.1/drivers/net/wireless/ipw2100.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/wireless/ipw2100.c --- ./linux-2.6.18.1/drivers/net/wireless/ipw2100.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/net/wireless/ipw2100.c 2007-05-19 23:58:35.000000000 +0900 @@ -163,6 +163,7 @@ #include #include #include +#include #include "ipw2100.h" @@ -1697,6 +1698,11 @@ return 0; } + /* the ipw2100 hardware really doesn't want power management delays + * longer than 175usec + */ + modify_acceptable_latency("ipw2100", 175); + /* If the interrupt is enabled, turn it off... */ spin_lock_irqsave(&priv->low_lock, flags); ipw2100_disable_interrupts(priv); @@ -1849,6 +1855,8 @@ ipw2100_disable_interrupts(priv); spin_unlock_irqrestore(&priv->low_lock, flags); + modify_acceptable_latency("ipw2100", INFINITE_LATENCY); + #ifdef ACPI_CSTATE_LIMIT_DEFINED if (priv->config & CFG_C3_DISABLED) { IPW_DEBUG_INFO(": Resetting C3 transitions.\n"); @@ -6533,6 +6541,7 @@ ret = pci_module_init(&ipw2100_pci_driver); + set_acceptable_latency("ipw2100", INFINITE_LATENCY); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; driver_create_file(&ipw2100_pci_driver.driver, @@ -6553,6 +6562,7 @@ &driver_attr_debug_level); #endif pci_unregister_driver(&ipw2100_pci_driver); + remove_acceptable_latency("ipw2100"); } module_init(ipw2100_init); diff -urN ./linux-2.6.18.1/drivers/oprofile/oprofilefs.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/oprofile/oprofilefs.c --- ./linux-2.6.18.1/drivers/oprofile/oprofilefs.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/oprofile/oprofilefs.c 2007-05-19 23:58:35.000000000 +0900 @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_RAW_SPINLOCK(oprofilefs_lock); static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) { diff -urN ./linux-2.6.18.1/drivers/pci/Makefile linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/Makefile --- ./linux-2.6.18.1/drivers/pci/Makefile 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/Makefile 2007-05-19 23:58:35.000000000 +0900 @@ -27,7 +27,8 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o obj-$(CONFIG_X86_VISWS) += setup-irq.o -msiobj-y := msi.o msi-apic.o +msiobj-y := msi.o +msiobj-$(CONFIG_IA64) += msi-apic.o msiobj-$(CONFIG_IA64_GENERIC) += msi-altix.o msiobj-$(CONFIG_IA64_SGI_SN2) += msi-altix.o obj-$(CONFIG_PCI_MSI) += $(msiobj-y) diff -urN ./linux-2.6.18.1/drivers/pci/hotplug/cpci_hotplug_core.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/cpci_hotplug_core.c --- ./linux-2.6.18.1/drivers/pci/hotplug/cpci_hotplug_core.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/cpci_hotplug_core.c 2007-05-19 23:58:35.000000000 +0900 @@ -59,8 +59,8 @@ static atomic_t extracting; int cpci_debug; static struct cpci_hp_controller *controller; -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ static int thread_finished = 1; static int enable_slot(struct hotplug_slot *slot); diff -urN ./linux-2.6.18.1/drivers/pci/hotplug/cpqphp_ctrl.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/cpqphp_ctrl.c --- ./linux-2.6.18.1/drivers/pci/hotplug/cpqphp_ctrl.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/cpqphp_ctrl.c 2007-05-19 23:58:35.000000000 +0900 @@ -44,8 +44,8 @@ u8 behind_bridge, struct resource_lists *resources); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ diff -urN ./linux-2.6.18.1/drivers/pci/hotplug/ibmphp_hpc.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/ibmphp_hpc.c --- ./linux-2.6.18.1/drivers/pci/hotplug/ibmphp_hpc.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/ibmphp_hpc.c 2007-05-19 23:58:35.000000000 +0900 @@ -106,7 +106,7 @@ static struct mutex sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct compat_semaphore sem_exit; // make sure polling thread goes away //---------------------------------------------------------------------------- // local function prototypes //---------------------------------------------------------------------------- diff -urN ./linux-2.6.18.1/drivers/pci/hotplug/pciehp_ctrl.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/pciehp_ctrl.c --- ./linux-2.6.18.1/drivers/pci/hotplug/pciehp_ctrl.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/hotplug/pciehp_ctrl.c 2007-05-19 23:58:35.000000000 +0900 @@ -37,8 +37,8 @@ static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ static unsigned long surprise_rm_pending; /* = 0 */ diff -urN ./linux-2.6.18.1/drivers/pci/msi-altix.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi-altix.c --- ./linux-2.6.18.1/drivers/pci/msi-altix.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi-altix.c 2007-05-19 23:58:35.000000000 +0900 @@ -26,7 +26,7 @@ static struct sn_msi_info *sn_msi_info; static void -sn_msi_teardown(unsigned int vector) +sn_msi_teardown(unsigned int irq) { nasid_t nasid; int widget; @@ -36,7 +36,7 @@ struct pcibus_bussoft *bussoft; struct sn_pcibus_provider *provider; - sn_irq_info = sn_msi_info[vector].sn_irq_info; + sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) return; @@ -45,9 +45,9 @@ provider = SN_PCIDEV_BUSPROVIDER(pdev); (*provider->dma_unmap)(pdev, - sn_msi_info[vector].pci_addr, + sn_msi_info[irq].pci_addr, PCI_DMA_FROMDEVICE); - sn_msi_info[vector].pci_addr = 0; + sn_msi_info[irq].pci_addr = 0; bussoft = SN_PCIDEV_BUSSOFT(pdev); nasid = NASID_GET(bussoft->bs_base); @@ -56,14 +56,13 @@ SWIN_WIDGETNUM(bussoft->bs_base); sn_intr_free(nasid, widget, sn_irq_info); - sn_msi_info[vector].sn_irq_info = NULL; + sn_msi_info[irq].sn_irq_info = NULL; return; } int -sn_msi_setup(struct pci_dev *pdev, unsigned int vector, - u32 *addr_hi, u32 *addr_lo, u32 *data) +sn_msi_setup(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { int widget; int status; @@ -93,7 +92,7 @@ if (! sn_irq_info) return -ENOMEM; - status = sn_intr_alloc(nasid, widget, sn_irq_info, vector, -1, -1); + status = sn_intr_alloc(nasid, widget, sn_irq_info, irq, -1, -1); if (status) { kfree(sn_irq_info); return -ENOMEM; @@ -119,28 +118,27 @@ return -ENOMEM; } - sn_msi_info[vector].sn_irq_info = sn_irq_info; - sn_msi_info[vector].pci_addr = bus_addr; + sn_msi_info[irq].sn_irq_info = sn_irq_info; + sn_msi_info[irq].pci_addr = bus_addr; - *addr_hi = (u32)(bus_addr >> 32); - *addr_lo = (u32)(bus_addr & 0x00000000ffffffff); + msg->address_hi = (u32)(bus_addr >> 32); + msg->address_lo = (u32)(bus_addr & 0x00000000ffffffff); /* * In the SN platform, bit 16 is a "send vector" bit which * must be present in order to move the vector through the system. */ - *data = 0x100 + (unsigned int)vector; + msg->data = 0x100 + irq; #ifdef CONFIG_SMP - set_irq_affinity_info((vector & 0xff), sn_irq_info->irq_cpuid, 0); + set_irq_affinity_info(irq, sn_irq_info->irq_cpuid, 0); #endif return 0; } static void -sn_msi_target(unsigned int vector, unsigned int cpu, - u32 *addr_hi, u32 *addr_lo) +sn_msi_target(unsigned int irq, cpumask_t cpu_mask, struct msi_msg *msg) { int slice; nasid_t nasid; @@ -150,8 +148,10 @@ struct sn_irq_info *sn_irq_info; struct sn_irq_info *new_irq_info; struct sn_pcibus_provider *provider; + unsigned int cpu; - sn_irq_info = sn_msi_info[vector].sn_irq_info; + cpu = first_cpu(cpu_mask); + sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) return; @@ -163,15 +163,15 @@ pdev = sn_pdev->pdi_linux_pcidev; provider = SN_PCIDEV_BUSPROVIDER(pdev); - bus_addr = (u64)(*addr_hi) << 32 | (u64)(*addr_lo); + bus_addr = (u64)(msg->address_hi) << 32 | (u64)(msg->address_lo); (*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE); - sn_msi_info[vector].pci_addr = 0; + sn_msi_info[irq].pci_addr = 0; nasid = cpuid_to_nasid(cpu); slice = cpuid_to_slice(cpu); new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); - sn_msi_info[vector].sn_irq_info = new_irq_info; + sn_msi_info[irq].sn_irq_info = new_irq_info; if (new_irq_info == NULL) return; @@ -184,12 +184,13 @@ sizeof(new_irq_info->irq_xtalkaddr), SN_DMA_MSI|SN_DMA_ADDR_XIO); - sn_msi_info[vector].pci_addr = bus_addr; - *addr_hi = (u32)(bus_addr >> 32); - *addr_lo = (u32)(bus_addr & 0x00000000ffffffff); + sn_msi_info[irq].pci_addr = bus_addr; + msg->address_hi = (u32)(bus_addr >> 32); + msg->address_lo = (u32)(bus_addr & 0x00000000ffffffff); } struct msi_ops sn_msi_ops = { + .needs_64bit_address = 1, .setup = sn_msi_setup, .teardown = sn_msi_teardown, #ifdef CONFIG_SMP @@ -201,7 +202,7 @@ sn_msi_init(void) { sn_msi_info = - kzalloc(sizeof(struct sn_msi_info) * NR_VECTORS, GFP_KERNEL); + kzalloc(sizeof(struct sn_msi_info) * NR_IRQS, GFP_KERNEL); if (! sn_msi_info) return -ENOMEM; diff -urN ./linux-2.6.18.1/drivers/pci/msi-apic.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi-apic.c --- ./linux-2.6.18.1/drivers/pci/msi-apic.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi-apic.c 2007-05-19 23:58:35.000000000 +0900 @@ -46,37 +46,36 @@ static void -msi_target_apic(unsigned int vector, - unsigned int dest_cpu, - u32 *address_hi, /* in/out */ - u32 *address_lo) /* in/out */ +msi_target_apic(unsigned int irq, cpumask_t cpu_mask, struct msi_msg *msg) { - u32 addr = *address_lo; + u32 addr = msg->address_lo; addr &= MSI_ADDR_DESTID_MASK; - addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(dest_cpu)); + addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(first_cpu(cpu_mask))); - *address_lo = addr; + msg->address_lo = addr; } static int msi_setup_apic(struct pci_dev *pdev, /* unused in generic */ - unsigned int vector, - u32 *address_hi, - u32 *address_lo, - u32 *data) + unsigned int irq, + struct msi_msg *msg) { unsigned long dest_phys_id; + unsigned int vector; dest_phys_id = cpu_physical_id(first_cpu(cpu_online_map)); + vector = irq; - *address_hi = 0; - *address_lo = MSI_ADDR_HEADER | - MSI_ADDR_DESTMODE_PHYS | - MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DESTID_CPU(dest_phys_id); + msg->address_hi = 0; + msg->address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DESTID_CPU(dest_phys_id); - *data = MSI_DATA_TRIGGER_EDGE | + msg->data = + MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(vector); @@ -85,7 +84,7 @@ } static void -msi_teardown_apic(unsigned int vector) +msi_teardown_apic(unsigned int irq) { return; /* no-op */ } @@ -95,6 +94,7 @@ */ struct msi_ops msi_apic_ops = { + .needs_64bit_address = 0, .setup = msi_setup_apic, .teardown = msi_teardown_apic, .target = msi_target_apic, diff -urN ./linux-2.6.18.1/drivers/pci/msi.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi.c --- ./linux-2.6.18.1/drivers/pci/msi.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi.c 2007-05-19 23:58:35.000000000 +0900 @@ -6,6 +6,7 @@ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) */ +#include #include #include #include @@ -22,19 +23,11 @@ #include "pci.h" #include "msi.h" -static DEFINE_SPINLOCK(msi_lock); +static DEFINE_RAW_SPINLOCK(msi_lock); static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL }; static kmem_cache_t* msi_cachep; static int pci_msi_enable = 1; -static int last_alloc_vector; -static int nr_released_vectors; -static int nr_reserved_vectors = NR_HP_RESERVED_VECTORS; -static int nr_msix_devices; - -#ifndef CONFIG_X86_IO_APIC -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; -#endif static struct msi_ops *msi_ops; @@ -61,11 +54,11 @@ return 0; } -static void msi_set_mask_bit(unsigned int vector, int flag) +static void msi_set_mask_bit(unsigned int irq, int flag) { struct msi_desc *entry; - entry = (struct msi_desc *)msi_desc[vector]; + entry = msi_desc[irq]; if (!entry || !entry->dev || !entry->mask_base) return; switch (entry->msi_attrib.type) { @@ -93,84 +86,119 @@ } } -#ifdef CONFIG_SMP -static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) +static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { - struct msi_desc *entry; - u32 address_hi, address_lo; - unsigned int irq = vector; - unsigned int dest_cpu = first_cpu(cpu_mask); + switch(entry->msi_attrib.type) { + case PCI_CAP_ID_MSI: + { + struct pci_dev *dev = entry->dev; + int pos = entry->msi_attrib.pos; + u16 data; + + pci_read_config_dword(dev, msi_lower_address_reg(pos), + &msg->address_lo); + if (entry->msi_attrib.is_64) { + pci_read_config_dword(dev, msi_upper_address_reg(pos), + &msg->address_hi); + pci_read_config_word(dev, msi_data_reg(pos, 1), &data); + } else { + msg->address_hi = 0; + pci_read_config_word(dev, msi_data_reg(pos, 1), &data); + } + msg->data = data; + break; + } + case PCI_CAP_ID_MSIX: + { + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - entry = (struct msi_desc *)msi_desc[vector]; - if (!entry || !entry->dev) - return; + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + break; + } + default: + BUG(); + } +} +static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) +{ switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { - int pos = pci_find_capability(entry->dev, PCI_CAP_ID_MSI); + struct pci_dev *dev = entry->dev; + int pos = entry->msi_attrib.pos; - if (!pos) - return; - - pci_read_config_dword(entry->dev, msi_upper_address_reg(pos), - &address_hi); - pci_read_config_dword(entry->dev, msi_lower_address_reg(pos), - &address_lo); - - msi_ops->target(vector, dest_cpu, &address_hi, &address_lo); - - pci_write_config_dword(entry->dev, msi_upper_address_reg(pos), - address_hi); - pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), - address_lo); - set_native_irq_info(irq, cpu_mask); + pci_write_config_dword(dev, msi_lower_address_reg(pos), + msg->address_lo); + if (entry->msi_attrib.is_64) { + pci_write_config_dword(dev, msi_upper_address_reg(pos), + msg->address_hi); + pci_write_config_word(dev, msi_data_reg(pos, 1), + msg->data); + } else { + pci_write_config_word(dev, msi_data_reg(pos, 0), + msg->data); + } break; } case PCI_CAP_ID_MSIX: { - int offset_hi = - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET; - int offset_lo = - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET; - - address_hi = readl(entry->mask_base + offset_hi); - address_lo = readl(entry->mask_base + offset_lo); - - msi_ops->target(vector, dest_cpu, &address_hi, &address_lo); - - writel(address_hi, entry->mask_base + offset_hi); - writel(address_lo, entry->mask_base + offset_lo); - set_native_irq_info(irq, cpu_mask); + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + writel(msg->address_lo, + base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + writel(msg->address_hi, + base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); break; } default: - break; + BUG(); } } + +#ifdef CONFIG_SMP +static void set_msi_affinity(unsigned int irq, cpumask_t cpu_mask) +{ + struct msi_desc *entry; + struct msi_msg msg; + + entry = msi_desc[irq]; + if (!entry || !entry->dev) + return; + + read_msi_msg(entry, &msg); + msi_ops->target(irq, cpu_mask, &msg); + write_msi_msg(entry, &msg); + set_native_irq_info(irq, cpu_mask); +} #else #define set_msi_affinity NULL #endif /* CONFIG_SMP */ -static void mask_MSI_irq(unsigned int vector) +static void mask_MSI_irq(unsigned int irq) { - msi_set_mask_bit(vector, 1); + msi_set_mask_bit(irq, 1); } -static void unmask_MSI_irq(unsigned int vector) +static void unmask_MSI_irq(unsigned int irq) { - msi_set_mask_bit(vector, 0); + msi_set_mask_bit(irq, 0); } -static unsigned int startup_msi_irq_wo_maskbit(unsigned int vector) +static unsigned int startup_msi_irq_wo_maskbit(unsigned int irq) { struct msi_desc *entry; unsigned long flags; spin_lock_irqsave(&msi_lock, flags); - entry = msi_desc[vector]; + entry = msi_desc[irq]; if (!entry || !entry->dev) { spin_unlock_irqrestore(&msi_lock, flags); return 0; @@ -181,39 +209,39 @@ return 0; /* never anything pending */ } -static unsigned int startup_msi_irq_w_maskbit(unsigned int vector) +static unsigned int startup_msi_irq_w_maskbit(unsigned int irq) { - startup_msi_irq_wo_maskbit(vector); - unmask_MSI_irq(vector); + startup_msi_irq_wo_maskbit(irq); + unmask_MSI_irq(irq); return 0; /* never anything pending */ } -static void shutdown_msi_irq(unsigned int vector) +static void shutdown_msi_irq(unsigned int irq) { struct msi_desc *entry; unsigned long flags; spin_lock_irqsave(&msi_lock, flags); - entry = msi_desc[vector]; + entry = msi_desc[irq]; if (entry && entry->dev) entry->msi_attrib.state = 0; /* Mark it not active */ spin_unlock_irqrestore(&msi_lock, flags); } -static void end_msi_irq_wo_maskbit(unsigned int vector) +static void end_msi_irq_wo_maskbit(unsigned int irq) { - move_native_irq(vector); + move_native_irq(irq); ack_APIC_irq(); } -static void end_msi_irq_w_maskbit(unsigned int vector) +static void end_msi_irq_w_maskbit(unsigned int irq) { - move_native_irq(vector); - unmask_MSI_irq(vector); + move_native_irq(irq); + unmask_MSI_irq(irq); ack_APIC_irq(); } -static void do_nothing(unsigned int vector) +static void do_nothing(unsigned int irq) { } @@ -264,86 +292,7 @@ .set_affinity = set_msi_affinity }; -static int msi_free_vector(struct pci_dev* dev, int vector, int reassign); -static int assign_msi_vector(void) -{ - static int new_vector_avail = 1; - int vector; - unsigned long flags; - - /* - * msi_lock is provided to ensure that successful allocation of MSI - * vector is assigned unique among drivers. - */ - spin_lock_irqsave(&msi_lock, flags); - - if (!new_vector_avail) { - int free_vector = 0; - - /* - * vector_irq[] = -1 indicates that this specific vector is: - * - assigned for MSI (since MSI have no associated IRQ) or - * - assigned for legacy if less than 16, or - * - having no corresponding 1:1 vector-to-IOxAPIC IRQ mapping - * vector_irq[] = 0 indicates that this vector, previously - * assigned for MSI, is freed by hotplug removed operations. - * This vector will be reused for any subsequent hotplug added - * operations. - * vector_irq[] > 0 indicates that this vector is assigned for - * IOxAPIC IRQs. This vector and its value provides a 1-to-1 - * vector-to-IOxAPIC IRQ mapping. - */ - for (vector = FIRST_DEVICE_VECTOR; vector < NR_IRQS; vector++) { - if (vector_irq[vector] != 0) - continue; - free_vector = vector; - if (!msi_desc[vector]) - break; - else - continue; - } - if (!free_vector) { - spin_unlock_irqrestore(&msi_lock, flags); - return -EBUSY; - } - vector_irq[free_vector] = -1; - nr_released_vectors--; - spin_unlock_irqrestore(&msi_lock, flags); - if (msi_desc[free_vector] != NULL) { - struct pci_dev *dev; - int tail; - - /* free all linked vectors before re-assign */ - do { - spin_lock_irqsave(&msi_lock, flags); - dev = msi_desc[free_vector]->dev; - tail = msi_desc[free_vector]->link.tail; - spin_unlock_irqrestore(&msi_lock, flags); - msi_free_vector(dev, tail, 1); - } while (free_vector != tail); - } - - return free_vector; - } - vector = assign_irq_vector(AUTO_ASSIGN); - last_alloc_vector = vector; - if (vector == LAST_DEVICE_VECTOR) - new_vector_avail = 0; - - spin_unlock_irqrestore(&msi_lock, flags); - return vector; -} - -static int get_new_vector(void) -{ - int vector = assign_msi_vector(); - - if (vector > 0) - set_intr_gate(vector, interrupt[vector]); - - return vector; -} - +static int msi_free_irq(struct pci_dev* dev, int irq); static int msi_init(void) { static int status = -ENOMEM; @@ -367,13 +316,13 @@ } if (! msi_ops) { + pci_msi_enable = 0; printk(KERN_WARNING "PCI: MSI ops not registered. MSI disabled.\n"); status = -EINVAL; return status; } - last_alloc_vector = assign_irq_vector(AUTO_ASSIGN); status = msi_cache_init(); if (status < 0) { pci_msi_enable = 0; @@ -381,23 +330,9 @@ return status; } - if (last_alloc_vector < 0) { - pci_msi_enable = 0; - printk(KERN_WARNING "PCI: No interrupt vectors available for MSI\n"); - status = -EBUSY; - return status; - } - vector_irq[last_alloc_vector] = 0; - nr_released_vectors++; - return status; } -static int get_msi_vector(struct pci_dev *dev) -{ - return get_new_vector(); -} - static struct msi_desc* alloc_msi_entry(void) { struct msi_desc *entry; @@ -413,29 +348,45 @@ return entry; } -static void attach_msi_entry(struct msi_desc *entry, int vector) +static void attach_msi_entry(struct msi_desc *entry, int irq) { unsigned long flags; spin_lock_irqsave(&msi_lock, flags); - msi_desc[vector] = entry; + msi_desc[irq] = entry; spin_unlock_irqrestore(&msi_lock, flags); } -static void irq_handler_init(int cap_id, int pos, int mask) +static int create_msi_irq(struct hw_interrupt_type *handler) { - unsigned long flags; + struct msi_desc *entry; + int irq; + + entry = alloc_msi_entry(); + if (!entry) + return -ENOMEM; - spin_lock_irqsave(&irq_desc[pos].lock, flags); - if (cap_id == PCI_CAP_ID_MSIX) - irq_desc[pos].chip = &msix_irq_type; - else { - if (!mask) - irq_desc[pos].chip = &msi_irq_wo_maskbit_type; - else - irq_desc[pos].chip = &msi_irq_w_maskbit_type; + irq = create_irq(); + if (irq < 0) { + kmem_cache_free(msi_cachep, entry); + return -EBUSY; } - spin_unlock_irqrestore(&irq_desc[pos].lock, flags); + + set_irq_chip(irq, handler); + set_irq_data(irq, entry); + + return irq; +} + +static void destroy_msi_irq(unsigned int irq) +{ + struct msi_desc *entry; + + entry = get_irq_data(irq); + set_irq_chip(irq, NULL); + set_irq_data(irq, NULL); + destroy_irq(irq); + kmem_cache_free(msi_cachep, entry); } static void enable_msi_mode(struct pci_dev *dev, int pos, int type) @@ -480,21 +431,21 @@ } } -static int msi_lookup_vector(struct pci_dev *dev, int type) +static int msi_lookup_irq(struct pci_dev *dev, int type) { - int vector; + int irq; unsigned long flags; spin_lock_irqsave(&msi_lock, flags); - for (vector = FIRST_DEVICE_VECTOR; vector < NR_IRQS; vector++) { - if (!msi_desc[vector] || msi_desc[vector]->dev != dev || - msi_desc[vector]->msi_attrib.type != type || - msi_desc[vector]->msi_attrib.default_vector != dev->irq) + for (irq = 0; irq < NR_IRQS; irq++) { + if (!msi_desc[irq] || msi_desc[irq]->dev != dev || + msi_desc[irq]->msi_attrib.type != type || + msi_desc[irq]->msi_attrib.default_irq != dev->irq) continue; spin_unlock_irqrestore(&msi_lock, flags); - /* This pre-assigned MSI vector for this device - already exits. Override dev->irq with this vector */ - dev->irq = vector; + /* This pre-assigned MSI irq for this device + already exits. Override dev->irq with this irq */ + dev->irq = irq; return 0; } spin_unlock_irqrestore(&msi_lock, flags); @@ -506,11 +457,6 @@ { if (!dev) return; - - if (pci_find_capability(dev, PCI_CAP_ID_MSIX) > 0) - nr_msix_devices++; - else if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0) - nr_reserved_vectors++; } #ifdef CONFIG_PM @@ -584,7 +530,7 @@ { int pos; int temp; - int vector, head, tail = 0; + int irq, head, tail = 0; u16 control; struct pci_cap_saved_state *save_state; @@ -606,33 +552,20 @@ /* save the table */ temp = dev->irq; - if (msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { + if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) { kfree(save_state); return -EINVAL; } - vector = head = dev->irq; + irq = head = dev->irq; while (head != tail) { - int j; - void __iomem *base; struct msi_desc *entry; - entry = msi_desc[vector]; - base = entry->mask_base; - j = entry->msi_attrib.entry_nr; - - entry->address_lo_save = - readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - entry->address_hi_save = - readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - entry->data_save = - readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET); + entry = msi_desc[irq]; + read_msi_msg(entry, &entry->msg_save); - tail = msi_desc[vector]->link.tail; - vector = tail; + tail = msi_desc[irq]->link.tail; + irq = tail; } dev->irq = temp; @@ -645,9 +578,7 @@ { u16 save; int pos; - int vector, head, tail = 0; - void __iomem *base; - int j; + int irq, head, tail = 0; struct msi_desc *entry; int temp; struct pci_cap_saved_state *save_state; @@ -665,26 +596,15 @@ /* route the table */ temp = dev->irq; - if (msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) + if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) return; - vector = head = dev->irq; + irq = head = dev->irq; while (head != tail) { - entry = msi_desc[vector]; - base = entry->mask_base; - j = entry->msi_attrib.entry_nr; - - writel(entry->address_lo_save, - base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(entry->address_hi_save, - base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(entry->data_save, - base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET); + entry = msi_desc[irq]; + write_msi_msg(entry, &entry->msg_save); - tail = msi_desc[vector]->link.tail; - vector = tail; + tail = msi_desc[irq]->link.tail; + irq = tail; } dev->irq = temp; @@ -696,29 +616,19 @@ static int msi_register_init(struct pci_dev *dev, struct msi_desc *entry) { int status; - u32 address_hi; - u32 address_lo; - u32 data; - int pos, vector = dev->irq; + struct msi_msg msg; + int pos; u16 control; - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + pos = entry->msi_attrib.pos; pci_read_config_word(dev, msi_control_reg(pos), &control); /* Configure MSI capability structure */ - status = msi_ops->setup(dev, vector, &address_hi, &address_lo, &data); + status = msi_ops->setup(dev, dev->irq, &msg); if (status < 0) return status; - pci_write_config_dword(dev, msi_lower_address_reg(pos), address_lo); - if (is_64bit_address(control)) { - pci_write_config_dword(dev, - msi_upper_address_reg(pos), address_hi); - pci_write_config_word(dev, - msi_data_reg(pos, 1), data); - } else - pci_write_config_word(dev, - msi_data_reg(pos, 0), data); + write_msi_msg(entry, &msg); if (entry->msi_attrib.maskbit) { unsigned int maskbits, temp; /* All MSIs are unmasked by default, Mask them all */ @@ -741,53 +651,54 @@ * @dev: pointer to the pci_dev data structure of MSI device function * * Setup the MSI capability structure of device function with a single - * MSI vector, regardless of device function is capable of handling + * MSI irq, regardless of device function is capable of handling * multiple messages. A return of zero indicates the successful setup - * of an entry zero with the new MSI vector or non-zero for otherwise. + * of an entry zero with the new MSI irq or non-zero for otherwise. **/ static int msi_capability_init(struct pci_dev *dev) { int status; struct msi_desc *entry; - int pos, vector; + int pos, irq; u16 control; + struct hw_interrupt_type *handler; pos = pci_find_capability(dev, PCI_CAP_ID_MSI); pci_read_config_word(dev, msi_control_reg(pos), &control); /* MSI Entry Initialization */ - entry = alloc_msi_entry(); - if (!entry) - return -ENOMEM; - - vector = get_msi_vector(dev); - if (vector < 0) { - kmem_cache_free(msi_cachep, entry); - return -EBUSY; - } - entry->link.head = vector; - entry->link.tail = vector; + handler = &msi_irq_wo_maskbit_type; + if (is_mask_bit_support(control)) + handler = &msi_irq_w_maskbit_type; + + irq = create_msi_irq(handler); + if (irq < 0) + return irq; + + entry = get_irq_data(irq); + entry->link.head = irq; + entry->link.tail = irq; entry->msi_attrib.type = PCI_CAP_ID_MSI; entry->msi_attrib.state = 0; /* Mark it not active */ + entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); - entry->msi_attrib.default_vector = dev->irq; /* Save IOAPIC IRQ */ - dev->irq = vector; + entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ + entry->msi_attrib.pos = pos; + dev->irq = irq; entry->dev = dev; if (is_mask_bit_support(control)) { entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos, is_64bit_address(control)); } - /* Replace with MSI handler */ - irq_handler_init(PCI_CAP_ID_MSI, vector, entry->msi_attrib.maskbit); /* Configure MSI capability structure */ status = msi_register_init(dev, entry); if (status != 0) { - dev->irq = entry->msi_attrib.default_vector; - kmem_cache_free(msi_cachep, entry); + dev->irq = entry->msi_attrib.default_irq; + destroy_msi_irq(irq); return status; } - attach_msi_entry(entry, vector); + attach_msi_entry(entry, irq); /* Set MSI enabled bits */ enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); @@ -801,18 +712,16 @@ * @nvec: number of @entries * * Setup the MSI-X capability structure of device function with a - * single MSI-X vector. A return of zero indicates the successful setup of - * requested MSI-X entries with allocated vectors or non-zero for otherwise. + * single MSI-X irq. A return of zero indicates the successful setup of + * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec) { struct msi_desc *head = NULL, *tail = NULL, *entry = NULL; - u32 address_hi; - u32 address_lo; - u32 data; + struct msi_msg msg; int status; - int vector, pos, i, j, nr_entries, temp = 0; + int irq, pos, i, j, nr_entries, temp = 0; unsigned long phys_addr; u32 table_offset; u16 control; @@ -834,65 +743,58 @@ /* MSI-X Table Initialization */ for (i = 0; i < nvec; i++) { - entry = alloc_msi_entry(); - if (!entry) - break; - vector = get_msi_vector(dev); - if (vector < 0) { - kmem_cache_free(msi_cachep, entry); + irq = create_msi_irq(&msix_irq_type); + if (irq < 0) break; - } + entry = get_irq_data(irq); j = entries[i].entry; - entries[i].vector = vector; + entries[i].vector = irq; entry->msi_attrib.type = PCI_CAP_ID_MSIX; entry->msi_attrib.state = 0; /* Mark it not active */ + entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; entry->msi_attrib.maskbit = 1; - entry->msi_attrib.default_vector = dev->irq; + entry->msi_attrib.default_irq = dev->irq; + entry->msi_attrib.pos = pos; entry->dev = dev; entry->mask_base = base; if (!head) { - entry->link.head = vector; - entry->link.tail = vector; + entry->link.head = irq; + entry->link.tail = irq; head = entry; } else { entry->link.head = temp; entry->link.tail = tail->link.tail; - tail->link.tail = vector; - head->link.head = vector; + tail->link.tail = irq; + head->link.head = irq; } - temp = vector; + temp = irq; tail = entry; - /* Replace with MSI-X handler */ - irq_handler_init(PCI_CAP_ID_MSIX, vector, 1); /* Configure MSI-X capability structure */ - status = msi_ops->setup(dev, vector, - &address_hi, - &address_lo, - &data); - if (status < 0) + status = msi_ops->setup(dev, irq, &msg); + if (status < 0) { + destroy_msi_irq(irq); break; + } - writel(address_lo, - base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(address_hi, - base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(data, - base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET); - attach_msi_entry(entry, vector); + write_msi_msg(entry, &msg); + attach_msi_entry(entry, irq); } if (i != nvec) { + int avail = i - 1; i--; for (; i >= 0; i--) { - vector = (entries + i)->vector; - msi_free_vector(dev, vector, 0); + irq = (entries + i)->vector; + msi_free_irq(dev, irq); (entries + i)->vector = 0; } - return -EBUSY; + /* If we had some success report the number of irqs + * we succeeded in setting up. + */ + if (avail <= 0) + avail = -EBUSY; + return avail; } /* Set MSI-X enabled bits */ enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); @@ -905,10 +807,10 @@ * @dev: pointer to the pci_dev data structure of MSI device function * * Setup the MSI capability structure of device function with - * a single MSI vector upon its software driver call to request for + * a single MSI irq upon its software driver call to request for * MSI mode enabled on its hardware device function. A return of zero * indicates the successful setup of an entry zero with the new MSI - * vector or non-zero for otherwise. + * irq or non-zero for otherwise. **/ int pci_enable_msi(struct pci_dev* dev) { @@ -936,52 +838,29 @@ if (!pos) return -EINVAL; - if (!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) { - /* Lookup Sucess */ - unsigned long flags; + pci_read_config_word(dev, msi_control_reg(pos), &control); + if (!is_64bit_address(control) && msi_ops->needs_64bit_address) + return -EINVAL; - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (control & PCI_MSI_FLAGS_ENABLE) - return 0; /* Already in MSI mode */ - spin_lock_irqsave(&msi_lock, flags); - if (!vector_irq[dev->irq]) { - msi_desc[dev->irq]->msi_attrib.state = 0; - vector_irq[dev->irq] = -1; - nr_released_vectors--; - spin_unlock_irqrestore(&msi_lock, flags); - status = msi_register_init(dev, msi_desc[dev->irq]); - if (status == 0) - enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); - return status; - } - spin_unlock_irqrestore(&msi_lock, flags); - dev->irq = temp; - } - /* Check whether driver already requested for MSI-X vectors */ + WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI)); + + /* Check whether driver already requested for MSI-X irqs */ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { + if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) { printk(KERN_INFO "PCI: %s: Can't enable MSI. " - "Device already has MSI-X vectors assigned\n", + "Device already has MSI-X irq assigned\n", pci_name(dev)); dev->irq = temp; return -EINVAL; } status = msi_capability_init(dev); - if (!status) { - if (!pos) - nr_reserved_vectors--; /* Only MSI capable */ - else if (nr_msix_devices > 0) - nr_msix_devices--; /* Both MSI and MSI-X capable, - but choose enabling MSI */ - } - return status; } void pci_disable_msi(struct pci_dev* dev) { struct msi_desc *entry; - int pos, default_vector; + int pos, default_irq; u16 control; unsigned long flags; @@ -998,6 +877,8 @@ if (!(control & PCI_MSI_FLAGS_ENABLE)) return; + disable_msi_mode(dev, pos, PCI_CAP_ID_MSI); + spin_lock_irqsave(&msi_lock, flags); entry = msi_desc[dev->irq]; if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) { @@ -1007,32 +888,30 @@ if (entry->msi_attrib.state) { spin_unlock_irqrestore(&msi_lock, flags); printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without " - "free_irq() on MSI vector %d\n", + "free_irq() on MSI irq %d\n", pci_name(dev), dev->irq); BUG_ON(entry->msi_attrib.state > 0); } else { - vector_irq[dev->irq] = 0; /* free it */ - nr_released_vectors++; - default_vector = entry->msi_attrib.default_vector; + default_irq = entry->msi_attrib.default_irq; spin_unlock_irqrestore(&msi_lock, flags); - /* Restore dev->irq to its default pin-assertion vector */ - dev->irq = default_vector; - disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), - PCI_CAP_ID_MSI); + msi_free_irq(dev, dev->irq); + + /* Restore dev->irq to its default pin-assertion irq */ + dev->irq = default_irq; } } -static int msi_free_vector(struct pci_dev* dev, int vector, int reassign) +static int msi_free_irq(struct pci_dev* dev, int irq) { struct msi_desc *entry; int head, entry_nr, type; void __iomem *base; unsigned long flags; - msi_ops->teardown(vector); + msi_ops->teardown(irq); spin_lock_irqsave(&msi_lock, flags); - entry = msi_desc[vector]; + entry = msi_desc[irq]; if (!entry || entry->dev != dev) { spin_unlock_irqrestore(&msi_lock, flags); return -EINVAL; @@ -1044,101 +923,43 @@ msi_desc[entry->link.head]->link.tail = entry->link.tail; msi_desc[entry->link.tail]->link.head = entry->link.head; entry->dev = NULL; - if (!reassign) { - vector_irq[vector] = 0; - nr_released_vectors++; - } - msi_desc[vector] = NULL; + msi_desc[irq] = NULL; spin_unlock_irqrestore(&msi_lock, flags); - kmem_cache_free(msi_cachep, entry); + destroy_msi_irq(irq); if (type == PCI_CAP_ID_MSIX) { - if (!reassign) - writel(1, base + - entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); - if (head == vector) + if (head == irq) iounmap(base); } return 0; } -static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec) -{ - int vector = head, tail = 0; - int i, j = 0, nr_entries = 0; - void __iomem *base; - unsigned long flags; - - spin_lock_irqsave(&msi_lock, flags); - while (head != tail) { - nr_entries++; - tail = msi_desc[vector]->link.tail; - if (entries[0].entry == msi_desc[vector]->msi_attrib.entry_nr) - j = vector; - vector = tail; - } - if (*nvec > nr_entries) { - spin_unlock_irqrestore(&msi_lock, flags); - *nvec = nr_entries; - return -EINVAL; - } - vector = ((j > 0) ? j : head); - for (i = 0; i < *nvec; i++) { - j = msi_desc[vector]->msi_attrib.entry_nr; - msi_desc[vector]->msi_attrib.state = 0; /* Mark it not active */ - vector_irq[vector] = -1; /* Mark it busy */ - nr_released_vectors--; - entries[i].vector = vector; - if (j != (entries + i)->entry) { - base = msi_desc[vector]->mask_base; - msi_desc[vector]->msi_attrib.entry_nr = - (entries + i)->entry; - writel( readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET), base + - (entries + i)->entry * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel( readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET), base + - (entries + i)->entry * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel( (readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET) & 0xff00) | vector, - base + (entries+i)->entry*PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET); - } - vector = msi_desc[vector]->link.tail; - } - spin_unlock_irqrestore(&msi_lock, flags); - - return 0; -} - /** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function * @entries: pointer to an array of MSI-X entries - * @nvec: number of MSI-X vectors requested for allocation by device driver + * @nvec: number of MSI-X irqs requested for allocation by device driver * * Setup the MSI-X capability structure of device function with the number - * of requested vectors upon its software driver call to request for + * of requested irqs upon its software driver call to request for * MSI-X mode enabled on its hardware device function. A return of zero * indicates the successful configuration of MSI-X capability structure - * with new allocated MSI-X vectors. A return of < 0 indicates a failure. + * with new allocated MSI-X irqs. A return of < 0 indicates a failure. * Or a return of > 0 indicates that driver request is exceeding the number - * of vectors available. Driver should use the returned value to re-send + * of irqs available. Driver should use the returned value to re-send * its request. **/ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) { struct pci_bus *bus; - int status, pos, nr_entries, free_vectors; + int status, pos, nr_entries; int i, j, temp; u16 control; - unsigned long flags; if (!pci_msi_enable || !dev || !entries) return -EINVAL; @@ -1159,9 +980,6 @@ return -EINVAL; pci_read_config_word(dev, msi_control_reg(pos), &control); - if (control & PCI_MSIX_FLAGS_ENABLE) - return -EINVAL; /* Already in MSI-X mode */ - nr_entries = multi_msix_capable(control); if (nvec > nr_entries) return -EINVAL; @@ -1176,56 +994,18 @@ } } temp = dev->irq; - if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { - /* Lookup Sucess */ - nr_entries = nvec; - /* Reroute MSI-X table */ - if (reroute_msix_table(dev->irq, entries, &nr_entries)) { - /* #requested > #previous-assigned */ - dev->irq = temp; - return nr_entries; - } - dev->irq = temp; - enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); - return 0; - } - /* Check whether driver already requested for MSI vector */ + WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)); + + /* Check whether driver already requested for MSI irq */ if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 && - !msi_lookup_vector(dev, PCI_CAP_ID_MSI)) { + !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) { printk(KERN_INFO "PCI: %s: Can't enable MSI-X. " - "Device already has an MSI vector assigned\n", + "Device already has an MSI irq assigned\n", pci_name(dev)); dev->irq = temp; return -EINVAL; } - - spin_lock_irqsave(&msi_lock, flags); - /* - * msi_lock is provided to ensure that enough vectors resources are - * available before granting. - */ - free_vectors = pci_vector_resources(last_alloc_vector, - nr_released_vectors); - /* Ensure that each MSI/MSI-X device has one vector reserved by - default to avoid any MSI-X driver to take all available - resources */ - free_vectors -= nr_reserved_vectors; - /* Find the average of free vectors among MSI-X devices */ - if (nr_msix_devices > 0) - free_vectors /= nr_msix_devices; - spin_unlock_irqrestore(&msi_lock, flags); - - if (nvec > free_vectors) { - if (free_vectors > 0) - return free_vectors; - else - return -EBUSY; - } - status = msix_capability_init(dev, entries, nvec); - if (!status && nr_msix_devices > 0) - nr_msix_devices--; - return status; } @@ -1247,47 +1027,42 @@ if (!(control & PCI_MSIX_FLAGS_ENABLE)) return; + disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); + temp = dev->irq; - if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { - int state, vector, head, tail = 0, warning = 0; + if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) { + int state, irq, head, tail = 0, warning = 0; unsigned long flags; - vector = head = dev->irq; - spin_lock_irqsave(&msi_lock, flags); + irq = head = dev->irq; + dev->irq = temp; /* Restore pin IRQ */ while (head != tail) { - state = msi_desc[vector]->msi_attrib.state; + spin_lock_irqsave(&msi_lock, flags); + state = msi_desc[irq]->msi_attrib.state; + tail = msi_desc[irq]->link.tail; + spin_unlock_irqrestore(&msi_lock, flags); if (state) warning = 1; - else { - vector_irq[vector] = 0; /* free it */ - nr_released_vectors++; - } - tail = msi_desc[vector]->link.tail; - vector = tail; + else if (irq != head) /* Release MSI-X irq */ + msi_free_irq(dev, irq); + irq = tail; } - spin_unlock_irqrestore(&msi_lock, flags); + msi_free_irq(dev, irq); if (warning) { - dev->irq = temp; printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without " - "free_irq() on all MSI-X vectors\n", + "free_irq() on all MSI-X irqs\n", pci_name(dev)); BUG_ON(warning > 0); - } else { - dev->irq = temp; - disable_msi_mode(dev, - pci_find_capability(dev, PCI_CAP_ID_MSIX), - PCI_CAP_ID_MSIX); - } } } /** - * msi_remove_pci_irq_vectors - reclaim MSI(X) vectors to unused state + * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state * @dev: pointer to the pci_dev data structure of MSI(X) device function * * Being called during hotplug remove, from which the device function - * is hot-removed. All previous assigned MSI/MSI-X vectors, if + * is hot-removed. All previous assigned MSI/MSI-X irqs, if * allocated for this device function, are reclaimed to unused state, * which may be used later on. **/ @@ -1301,42 +1076,42 @@ temp = dev->irq; /* Save IOAPIC IRQ */ pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSI)) { + if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) { spin_lock_irqsave(&msi_lock, flags); state = msi_desc[dev->irq]->msi_attrib.state; spin_unlock_irqrestore(&msi_lock, flags); if (state) { printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() " - "called without free_irq() on MSI vector %d\n", + "called without free_irq() on MSI irq %d\n", pci_name(dev), dev->irq); BUG_ON(state > 0); - } else /* Release MSI vector assigned to this device */ - msi_free_vector(dev, dev->irq, 0); + } else /* Release MSI irq assigned to this device */ + msi_free_irq(dev, dev->irq); dev->irq = temp; /* Restore IOAPIC IRQ */ } pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { - int vector, head, tail = 0, warning = 0; + if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) { + int irq, head, tail = 0, warning = 0; void __iomem *base = NULL; - vector = head = dev->irq; + irq = head = dev->irq; while (head != tail) { spin_lock_irqsave(&msi_lock, flags); - state = msi_desc[vector]->msi_attrib.state; - tail = msi_desc[vector]->link.tail; - base = msi_desc[vector]->mask_base; + state = msi_desc[irq]->msi_attrib.state; + tail = msi_desc[irq]->link.tail; + base = msi_desc[irq]->mask_base; spin_unlock_irqrestore(&msi_lock, flags); if (state) warning = 1; - else if (vector != head) /* Release MSI-X vector */ - msi_free_vector(dev, vector, 0); - vector = tail; + else if (irq != head) /* Release MSI-X irq */ + msi_free_irq(dev, irq); + irq = tail; } - msi_free_vector(dev, vector, 0); + msi_free_irq(dev, irq); if (warning) { iounmap(base); printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() " - "called without free_irq() on all MSI-X vectors\n", + "called without free_irq() on all MSI-X irqs\n", pci_name(dev)); BUG_ON(warning > 0); } diff -urN ./linux-2.6.18.1/drivers/pci/msi.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi.h --- ./linux-2.6.18.1/drivers/pci/msi.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/pci/msi.h 2007-05-19 23:58:35.000000000 +0900 @@ -6,85 +6,9 @@ #ifndef MSI_H #define MSI_H -/* - * MSI operation vector. Used by the msi core code (drivers/pci/msi.c) - * to abstract platform-specific tasks relating to MSI address generation - * and resource management. - */ -struct msi_ops { - /** - * setup - generate an MSI bus address and data for a given vector - * @pdev: PCI device context (in) - * @vector: vector allocated by the msi core (in) - * @addr_hi: upper 32 bits of PCI bus MSI address (out) - * @addr_lo: lower 32 bits of PCI bus MSI address (out) - * @data: MSI data payload (out) - * - * Description: The setup op is used to generate a PCI bus addres and - * data which the msi core will program into the card MSI capability - * registers. The setup routine is responsible for picking an initial - * cpu to target the MSI at. The setup routine is responsible for - * examining pdev to determine the MSI capabilities of the card and - * generating a suitable address/data. The setup routine is - * responsible for allocating and tracking any system resources it - * needs to route the MSI to the cpu it picks, and for associating - * those resources with the passed in vector. - * - * Returns 0 if the MSI address/data was successfully setup. - **/ - - int (*setup) (struct pci_dev *pdev, unsigned int vector, - u32 *addr_hi, u32 *addr_lo, u32 *data); - - /** - * teardown - release resources allocated by setup - * @vector: vector context for resources (in) - * - * Description: The teardown op is used to release any resources - * that were allocated in the setup routine associated with the passed - * in vector. - **/ - - void (*teardown) (unsigned int vector); - - /** - * target - retarget an MSI at a different cpu - * @vector: vector context for resources (in) - * @cpu: new cpu to direct vector at (in) - * @addr_hi: new value of PCI bus upper 32 bits (in/out) - * @addr_lo: new value of PCI bus lower 32 bits (in/out) - * - * Description: The target op is used to redirect an MSI vector - * at a different cpu. addr_hi/addr_lo coming in are the existing - * values that the MSI core has programmed into the card. The - * target code is responsible for freeing any resources (if any) - * associated with the old address, and generating a new PCI bus - * addr_hi/addr_lo that will redirect the vector at the indicated cpu. - **/ - - void (*target) (unsigned int vector, unsigned int cpu, - u32 *addr_hi, u32 *addr_lo); -}; - -extern int msi_register(struct msi_ops *ops); - #include /* - * Assume the maximum number of hot plug slots supported by the system is about - * ten. The worstcase is that each of these slots is hot-added with a device, - * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which - * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined - * as below to ensure at least one message is assigned to each detected MSI/ - * MSI-X device function. - */ -#define NR_HP_RESERVED_VECTORS 20 - -extern int vector_irq[NR_VECTORS]; -extern void (*interrupt[NR_IRQS])(void); -extern int pci_vector_resources(int last, int nr_released); - -/* * MSI-X Address Register */ #define PCI_MSIX_FLAGS_QSIZE 0x7FF @@ -110,8 +34,8 @@ (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) #define multi_msi_enable(control, num) \ control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); -#define is_64bit_address(control) (control & PCI_MSI_FLAGS_64BIT) -#define is_mask_bit_support(control) (control & PCI_MSI_FLAGS_MASKBIT) +#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) +#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) #define msi_enable(control, num) multi_msi_enable(control, num); \ control |= PCI_MSI_FLAGS_ENABLE @@ -130,10 +54,10 @@ __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ __u8 maskbit : 1; /* mask-pending bit supported ? */ __u8 state : 1; /* {0: free, 1: busy} */ - __u8 reserved: 1; /* reserved */ - __u8 entry_nr; /* specific enabled entry */ - __u8 default_vector; /* default pre-assigned vector */ - __u8 unused; /* formerly unused destination cpu*/ + __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ + __u8 pos; /* Location of the msi capability */ + __u16 entry_nr; /* specific enabled entry */ + unsigned default_irq; /* default pre-assigned irq */ }msi_attrib; struct { @@ -146,10 +70,7 @@ #ifdef CONFIG_PM /* PM save area for MSIX address/data */ - - u32 address_hi_save; - u32 address_lo_save; - u32 data_save; + struct msi_msg msg_save; #endif }; diff -urN ./linux-2.6.18.1/drivers/scsi/aacraid/aacraid.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/scsi/aacraid/aacraid.h --- ./linux-2.6.18.1/drivers/scsi/aacraid/aacraid.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/scsi/aacraid/aacraid.h 2007-05-19 23:58:35.000000000 +0900 @@ -744,7 +744,7 @@ u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct compat_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -814,7 +814,7 @@ * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct compat_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ diff -urN ./linux-2.6.18.1/drivers/scsi/qla2xxx/qla_def.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/scsi/qla2xxx/qla_def.h --- ./linux-2.6.18.1/drivers/scsi/qla2xxx/qla_def.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/scsi/qla2xxx/qla_def.h 2007-05-19 23:58:35.000000000 +0900 @@ -2284,7 +2284,7 @@ spinlock_t mbx_reg_lock; /* Mbx Cmd Register Lock */ struct semaphore mbx_cmd_sem; /* Serialialize mbx access */ - struct semaphore mbx_intr_sem; /* Used for completion notification */ + struct compat_semaphore mbx_intr_sem; /* Used for completion notification */ uint32_t mbx_flags; #define MBX_IN_PROGRESS BIT_0 diff -urN ./linux-2.6.18.1/drivers/serial/8250.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/serial/8250.c --- ./linux-2.6.18.1/drivers/serial/8250.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/serial/8250.c 2007-05-19 23:58:35.000000000 +0900 @@ -2252,14 +2252,10 @@ touch_nmi_watchdog(); - local_irq_save(flags); - if (up->port.sysrq) { - /* serial8250_handle_port() already took the lock */ - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); + else + spin_lock_irqsave(&up->port.lock, flags); /* * First save the IER then disable the interrupts @@ -2281,8 +2277,7 @@ serial_out(up, UART_IER, ier); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int serial8250_console_setup(struct console *co, char *options) diff -urN ./linux-2.6.18.1/drivers/usb/core/devio.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/core/devio.c --- ./linux-2.6.18.1/drivers/usb/core/devio.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/core/devio.c 2007-05-19 23:58:35.000000000 +0900 @@ -304,10 +304,11 @@ struct async *as = (struct async *)urb->context; struct dev_state *ps = as->ps; struct siginfo sinfo; + unsigned long flags; - spin_lock(&ps->lock); - list_move_tail(&as->asynclist, &ps->async_completed); - spin_unlock(&ps->lock); + spin_lock_irqsave(&ps->lock, flags); + list_move_tail(&as->asynclist, &ps->async_completed); + spin_unlock_irqrestore(&ps->lock, flags); if (as->signr) { sinfo.si_signo = as->signr; sinfo.si_errno = as->urb->status; diff -urN ./linux-2.6.18.1/drivers/usb/core/hcd.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/core/hcd.c --- ./linux-2.6.18.1/drivers/usb/core/hcd.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/core/hcd.c 2007-05-19 23:58:35.000000000 +0900 @@ -515,13 +515,11 @@ } /* any errors get returned through the urb completion */ - local_irq_save (flags); - spin_lock (&urb->lock); + spin_lock_irqsave(&urb->lock, flags); if (urb->status == -EINPROGRESS) urb->status = status; - spin_unlock (&urb->lock); + spin_unlock_irqrestore(&urb->lock, flags); usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_restore (flags); return 0; } @@ -549,8 +547,7 @@ if (length > 0) { /* try to complete the status urb */ - local_irq_save (flags); - spin_lock(&hcd_root_hub_lock); + spin_lock_irqsave(&hcd_root_hub_lock, flags); urb = hcd->status_urb; if (urb) { spin_lock(&urb->lock); @@ -566,14 +563,13 @@ spin_unlock(&urb->lock); } else length = 0; - spin_unlock(&hcd_root_hub_lock); + spin_unlock_irqrestore(&hcd_root_hub_lock, flags); /* local irqs are always blocked in completions */ if (length > 0) usb_hcd_giveback_urb (hcd, urb, NULL); else hcd->poll_pending = 1; - local_irq_restore (flags); } /* The USB 2.0 spec says 256 ms. This is close enough and won't @@ -656,17 +652,15 @@ } else { /* Status URB */ if (!hcd->uses_new_polling) del_timer_sync (&hcd->rh_timer); - local_irq_disable (); - spin_lock (&hcd_root_hub_lock); + spin_lock_irq(&hcd_root_hub_lock); if (urb == hcd->status_urb) { hcd->status_urb = NULL; urb->hcpriv = NULL; } else urb = NULL; /* wasn't fully queued */ - spin_unlock (&hcd_root_hub_lock); + spin_unlock_irq(&hcd_root_hub_lock); if (urb) usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_enable (); } return 0; @@ -1371,15 +1365,13 @@ WARN_ON (!HC_IS_RUNNING (hcd->state) && hcd->state != HC_STATE_HALT && udev->state != USB_STATE_NOTATTACHED); - local_irq_disable (); - /* FIXME move most of this into message.c as part of its * endpoint disable logic */ /* ep is already gone from udev->ep_{in,out}[]; no more submits */ rescan: - spin_lock (&hcd_data_lock); + spin_lock_irq(&hcd_data_lock); list_for_each_entry (urb, &ep->urb_list, urb_list) { int tmp; @@ -1392,13 +1384,13 @@ if (urb->status != -EINPROGRESS) continue; usb_get_urb (urb); - spin_unlock (&hcd_data_lock); + spin_unlock_irq(&hcd_data_lock); - spin_lock (&urb->lock); + spin_lock_irq(&urb->lock); tmp = urb->status; if (tmp == -EINPROGRESS) urb->status = -ESHUTDOWN; - spin_unlock (&urb->lock); + spin_unlock_irq(&urb->lock); /* kick hcd unless it's already returning this */ if (tmp == -EINPROGRESS) { @@ -1421,8 +1413,7 @@ /* list contents may have changed */ goto rescan; } - spin_unlock (&hcd_data_lock); - local_irq_enable (); + spin_unlock_irq(&hcd_data_lock); /* synchronize with the hardware, so old configuration state * clears out immediately (and will be freed). diff -urN ./linux-2.6.18.1/drivers/usb/core/message.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/core/message.c --- ./linux-2.6.18.1/drivers/usb/core/message.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/core/message.c 2007-05-19 23:58:35.000000000 +0900 @@ -264,8 +264,9 @@ static void sg_complete (struct urb *urb, struct pt_regs *regs) { struct usb_sg_request *io = (struct usb_sg_request *) urb->context; + unsigned long flags; - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); /* In 2.5 we require hcds' endpoint queues not to progress after fault * reports, until the completion callback (this!) returns. That lets @@ -299,7 +300,7 @@ * unlink pending urbs so they won't rx/tx bad data. * careful: unlink can sometimes be synchronous... */ - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); for (i = 0, found = 0; i < io->entries; i++) { if (!io->urbs [i] || !io->urbs [i]->dev) continue; @@ -314,7 +315,7 @@ } else if (urb == io->urbs [i]) found = 1; } - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); } urb->dev = NULL; @@ -324,7 +325,7 @@ if (!io->count) complete (&io->complete); - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); } @@ -586,7 +587,7 @@ dev_warn (&io->dev->dev, "%s, unlink --> %d\n", __FUNCTION__, retval); } - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); } spin_unlock_irqrestore (&io->lock, flags); } diff -urN ./linux-2.6.18.1/drivers/usb/net/usbnet.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/net/usbnet.c --- ./linux-2.6.18.1/drivers/usb/net/usbnet.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/net/usbnet.c 2007-05-19 23:58:35.000000000 +0900 @@ -818,6 +818,8 @@ urb->dev = NULL; entry->state = tx_done; + spin_lock_rt(&dev->txq.lock); + spin_unlock_rt(&dev->txq.lock); defer_bh(dev, skb, &dev->txq); } diff -urN ./linux-2.6.18.1/drivers/usb/storage/usb.h linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/storage/usb.h --- ./linux-2.6.18.1/drivers/usb/storage/usb.h 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/usb/storage/usb.h 2007-05-19 23:58:35.000000000 +0900 @@ -147,7 +147,7 @@ dma_addr_t iobuf_dma; /* mutual exclusion and synchronization structures */ - struct semaphore sema; /* to sleep thread on */ + struct compat_semaphore sema; /* to sleep thread on */ struct completion notify; /* thread begin/end */ wait_queue_head_t delay_wait; /* wait during scan, reset */ diff -urN ./linux-2.6.18.1/drivers/video/console/fbcon.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/video/console/fbcon.c --- ./linux-2.6.18.1/drivers/video/console/fbcon.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/video/console/fbcon.c 2007-05-19 23:58:35.000000000 +0900 @@ -1247,7 +1247,6 @@ { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1276,10 +1275,11 @@ struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -3079,6 +3079,7 @@ .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { diff -urN ./linux-2.6.18.1/drivers/video/console/vgacon.c linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/video/console/vgacon.c --- ./linux-2.6.18.1/drivers/video/console/vgacon.c 2006-10-14 12:34:03.000000000 +0900 +++ linux-2.6.18.1-cabi-20070529-RT_HRT/drivers/video/console/vgacon.c 2007-05-19 23:58:35.000000000 +0900 @@ -52,7 +52,7 @@ #include