diff options
author | Chen, Kenneth W <kenneth.w.chen@intel.com> | 2005-09-09 13:02:02 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-09 13:57:31 -0700 |
commit | 383f2835eb9afb723af71850037b2f074ac9db60 (patch) | |
tree | 1ef99fd4d7246b2afa16dc7d1514b6ff25fa8284 /arch/ia64/kernel | |
parent | b0d62e6d5b3318b6b722121d945afa295f7201b5 (diff) | |
download | kernel_samsung_smdk4412-383f2835eb9afb723af71850037b2f074ac9db60.tar.gz kernel_samsung_smdk4412-383f2835eb9afb723af71850037b2f074ac9db60.tar.bz2 kernel_samsung_smdk4412-383f2835eb9afb723af71850037b2f074ac9db60.zip |
[PATCH] Prefetch kernel stacks to speed up context switch
For architecture like ia64, the switch stack structure is fairly large
(currently 528 bytes). For context switch intensive application, we found
that significant amount of cache misses occurs in switch_to() function.
The following patch adds a hook in the schedule() function to prefetch
switch stack structure as soon as 'next' task is determined. This allows
maximum overlap in prefetch cache lines for that structure.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/entry.S | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3c882102450..915e1279183 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -470,6 +470,29 @@ ENTRY(load_switch_stack) br.cond.sptk.many b7 END(load_switch_stack) +GLOBAL_ENTRY(prefetch_stack) + add r14 = -IA64_SWITCH_STACK_SIZE, sp + add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0 + ;; + ld8 r16 = [r15] // load next's stack pointer + lfetch.fault.excl [r14], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault [r16], 128 + br.ret.sptk.many rp +END(prefetch_switch_stack) + GLOBAL_ENTRY(execve) mov r15=__NR_execve // put syscall number in place break __BREAK_SYSCALL |