From e40b17208b6805be50ffe891878662b6076206b9 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:03 -0500 Subject: x86: Move notify_die from nmi.c to traps.c In order to handle a new nmi_watchdog approach, I need to move the notify_die() routine out of nmi_watchdog_tick() and into default_do_nmi(). This lets me easily swap out the old nmi_watchdog with the new one with just a config change. The change probably makes sense from a high level perspective because the nmi_watchdog shouldn't be handling notify_die routines anyway. However, this move does change the semantics a little bit. Instead of checking on every nmi interrupt if the cpus are stuck, only check them on the nmi_watchdog interrupts. v2: Move notify_die call into #idef block Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/nmi.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 0159a69396c..5d47682f580 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -400,13 +400,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) int cpu = smp_processor_id(); int rc = 0; - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - sum = get_timer_irqs(cpu); if (__get_cpu_var(nmi_touch)) { -- cgit v1.2.3 From 1fb9d6ad2766a1dd70d167552988375049a97f21 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:04 -0500 Subject: nmi_watchdog: Add new, generic implementation, using perf events This is a new generic nmi_watchdog implementation using the perf events infrastructure as suggested by Ingo. The implementation is simple, just create an in-kernel perf event and register an overflow handler to check for cpu lockups. I created a generic implementation that lives in kernel/ and the hardware specific part that for now lives in arch/x86. This approach has a number of advantages: - It simplifies the x86 PMU implementation in the long run, in that it removes the hardcoded low-level PMU implementation that was the NMI watchdog before. - It allows new NMI watchdog features to be added in a central place. - It allows other architectures to enable the NMI watchdog, as long as they have perf events (that provide NMIs) implemented. - It also allows for more graceful co-existence of existing perf events apps and the NMI watchdog - before these changes the relationship was exclusive. (The NMI watchdog will 'spend' a perf event when enabled. In later iterations we might be able to piggyback from an existing NMI event without having to allocate a hardware event for the NMI watchdog - turning this into a no-hardware-cost feature.) As for compatibility, we'll keep the old NMI watchdog code as well until the new one can 100% replace it on all CPUs, old and new alike. That might take some time as the NMI watchdog has been ported to many CPU models. I have done light testing to make sure the framework works correctly and it does. v2: Set the correct timeout values based on the old nmi watchdog Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 114 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 arch/x86/kernel/apic/hw_nmi.c (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c new file mode 100644 index 00000000000..8c0e6a410d0 --- /dev/null +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -0,0 +1,114 @@ +/* + * HW NMI watchdog support + * + * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. + * + * Arch specific calls to support NMI watchdog + * + * Bits copied from original nmi.c file + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; + +static DEFINE_PER_CPU(unsigned, last_irq_sum); + +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ + return per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; +} + +static inline int mce_in_progress(void) +{ +#if defined(CONFIG_X86_MCE) + return atomic_read(&mce_entry) > 0; +#endif + return 0; +} + +int hw_nmi_is_cpu_stuck(struct pt_regs *regs) +{ + unsigned int sum; + int cpu = smp_processor_id(); + + /* FIXME: cheap hack for this check, probably should get its own + * die_notifier handler + */ + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); + show_regs(regs); + dump_stack(); + spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + } + + /* if we are doing an mce, just assume the cpu is not stuck */ + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) + return 0; + + /* We determine if the cpu is stuck by checking whether any + * interrupts have happened since we last checked. Of course + * an nmi storm could create false positives, but the higher + * level logic should account for that + */ + sum = get_timer_irqs(cpu); + if (__get_cpu_var(last_irq_sum) == sum) { + return 1; + } else { + __get_cpu_var(last_irq_sum) = sum; + return 0; + } +} + +void arch_trigger_all_cpu_backtrace(void) +{ + int i; + + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + + printk(KERN_INFO "sending NMI to all CPUs:\n"); + apic->send_IPI_all(NMI_VECTOR); + + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpumask_empty(to_cpumask(backtrace_mask))) + break; + mdelay(1); + } +} + +/* STUB calls to mimic old nmi_watchdog behaviour */ +unsigned int nmi_watchdog = NMI_NONE; +EXPORT_SYMBOL(nmi_watchdog); +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +EXPORT_SYMBOL(nmi_active); +int nmi_watchdog_enabled; +int unknown_nmi_panic; +void cpu_nmi_set_wd_enabled(void) { return; } +void acpi_nmi_enable(void) { return; } +void acpi_nmi_disable(void) { return; } +void stop_apic_nmi_watchdog(void *unused) { return; } +void setup_apic_nmi_watchdog(void *unused) { return; } +int __init check_nmi_watchdog(void) { return 0; } -- cgit v1.2.3 From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:05 -0500 Subject: nmi_watchdog: Config option to enable new nmi_watchdog These are the bits that enable the new nmi_watchdog and safely isolate the old nmi_watchdog. Only one or the other can run, not both at the same time. Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 565c1bfc507..1a4512e48d2 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,12 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o +ifneq ($(CONFIG_NMI_WATCHDOG),y) +obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o +endif +obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o + obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o -- cgit v1.2.3 From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:19 -0500 Subject: nmi_watchdog: Compile and portability fixes The original patch was x86_64 centric. Changed the code to make it less so. ested by building and running on a powerpc. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 8c0e6a410d0..312d772c5c3 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -32,8 +32,13 @@ static DEFINE_PER_CPU(unsigned, last_irq_sum); */ static inline unsigned int get_timer_irqs(int cpu) { - return per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; + unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs; + +#if defined(CONFIG_X86_LOCAL_APIC) + irqs += per_cpu(irq_stat, cpu).apic_timer_irqs; +#endif + + return irqs; } static inline int mce_in_progress(void) @@ -82,6 +87,11 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs) } } +u64 hw_nmi_get_sample_period(void) +{ + return cpu_khz * 1000; +} + void arch_trigger_all_cpu_backtrace(void) { int i; @@ -100,15 +110,16 @@ void arch_trigger_all_cpu_backtrace(void) } /* STUB calls to mimic old nmi_watchdog behaviour */ +#if defined(CONFIG_X86_LOCAL_APIC) unsigned int nmi_watchdog = NMI_NONE; EXPORT_SYMBOL(nmi_watchdog); +void acpi_nmi_enable(void) { return; } +void acpi_nmi_disable(void) { return; } +#endif atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); -int nmi_watchdog_enabled; int unknown_nmi_panic; void cpu_nmi_set_wd_enabled(void) { return; } -void acpi_nmi_enable(void) { return; } -void acpi_nmi_disable(void) { return; } void stop_apic_nmi_watchdog(void *unused) { return; } void setup_apic_nmi_watchdog(void *unused) { return; } int __init check_nmi_watchdog(void) { return 0; } -- cgit v1.2.3 From 2cc4452bc31fc1cde6f0b64a4eb13269f982787d Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 18 Feb 2010 21:56:52 -0500 Subject: nmi_watchdog: Fix undefined 'apic' build bug Ingo provided me a config that fails to compile with: arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace': (.text+0x17e78): undefined reference to `apic' make: *** [.tmp_vmlinux1] Error 1 I realized I changed the compile behaviour of the nmi code by not wrapping it with CONFIG_LOCAL_APIC. To fix this I add a compile check for ARCH_HAS_NMI_WATCHDOG around arch_trigger_all_cpu_backtrace. Signed-off-by: Don Zickus Cc: a.p.zijlstra@chello.nl Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266548212-24243-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 312d772c5c3..0b4d205a6b8 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -92,6 +92,7 @@ u64 hw_nmi_get_sample_period(void) return cpu_khz * 1000; } +#ifdef ARCH_HAS_NMI_WATCHDOG void arch_trigger_all_cpu_backtrace(void) { int i; @@ -108,6 +109,7 @@ void arch_trigger_all_cpu_backtrace(void) mdelay(1); } } +#endif /* STUB calls to mimic old nmi_watchdog behaviour */ #if defined(CONFIG_X86_LOCAL_APIC) -- cgit v1.2.3 From 47195d57636604ff6048b0d7aa3e4ed9643f6073 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 22 Feb 2010 18:09:03 -0500 Subject: nmi_watchdog: Clean up various small details Mostly copy/paste whitespace damage with a couple of nitpicks by the checkpatch script. Fix the struct definition as requested by Ingo too. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/apic/hw_nmi.c | 14 +++++------ arch/x86/kernel/traps.c | 6 ++-- include/linux/nmi.h | 2 - kernel/nmi_watchdog.c | 51 ++++++++++++++++++++---------------------- 4 files changed, 36 insertions(+), 37 deletions(-) --- arch/x86/kernel/apic/hw_nmi.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 0b4d205a6b8..e8b78a0be5d 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -38,15 +38,15 @@ static inline unsigned int get_timer_irqs(int cpu) irqs += per_cpu(irq_stat, cpu).apic_timer_irqs; #endif - return irqs; + return irqs; } static inline int mce_in_progress(void) { #if defined(CONFIG_X86_MCE) - return atomic_read(&mce_entry) > 0; + return atomic_read(&mce_entry) > 0; #endif - return 0; + return 0; } int hw_nmi_is_cpu_stuck(struct pt_regs *regs) @@ -69,9 +69,9 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs) } /* if we are doing an mce, just assume the cpu is not stuck */ - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - return 0; + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) + return 0; /* We determine if the cpu is stuck by checking whether any * interrupts have happened since we last checked. Of course @@ -89,7 +89,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs) u64 hw_nmi_get_sample_period(void) { - return cpu_khz * 1000; + return cpu_khz * 1000; } #ifdef ARCH_HAS_NMI_WATCHDOG -- cgit v1.2.3 From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/apic/Makefile | 4 ++-- arch/x86/kernel/apic/hw_nmi.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 1a4512e48d2..52f32e0ea19 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -3,10 +3,10 @@ # obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -ifneq ($(CONFIG_NMI_WATCHDOG),y) +ifneq ($(CONFIG_LOCKUP_DETECTOR),y) obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o endif -obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o +obj-$(CONFIG_LOCKUP_DETECTOR) += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index e8b78a0be5d..79425f96fce 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -89,7 +89,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs) u64 hw_nmi_get_sample_period(void) { - return cpu_khz * 1000; + return (u64)(cpu_khz) * 1000 * 60; } #ifdef ARCH_HAS_NMI_WATCHDOG -- cgit v1.2.3 From 7cbb7e7fa46f6e5229438ac9e4a5c72ec0d53e0b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:48 -0400 Subject: x86: Move trigger_all_cpu_backtrace to its own die_notifier As part of the transition of the nmi watchdog to something more generic, the trigger_all_cpu_backtrace code is getting left behind. Put it in its own die_notifier so it can still be used. V2: - use arch_spin_locks Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-6-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/apic/hw_nmi.c | 65 +++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 79425f96fce..8c3edfb89c2 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -17,6 +17,10 @@ #include #include #include +#include +#include +#include + #include #include @@ -54,20 +58,6 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs) unsigned int sum; int cpu = smp_processor_id(); - /* FIXME: cheap hack for this check, probably should get its own - * die_notifier handler - */ - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - dump_stack(); - spin_unlock(&lock); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - } - /* if we are doing an mce, just assume the cpu is not stuck */ /* Could check oops_in_progress here too, but it's safer not to */ if (mce_in_progress()) @@ -109,6 +99,53 @@ void arch_trigger_all_cpu_backtrace(void) mdelay(1); } } + +static int __kprobes +arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct pt_regs *regs; + int cpu = smp_processor_id(); + + switch (cmd) { + case DIE_NMI: + case DIE_NMI_IPI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { + static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + + arch_spin_lock(&lock); + printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); + show_regs(regs); + dump_stack(); + arch_spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + return NOTIFY_STOP; + } + + return NOTIFY_DONE; +} + +static __read_mostly struct notifier_block backtrace_notifier = { + .notifier_call = arch_trigger_all_cpu_backtrace_handler, + .next = NULL, + .priority = 1 +}; + +static int __init register_trigger_all_cpu_backtrace(void) +{ + register_die_notifier(&backtrace_notifier); + return 0; +} +early_initcall(register_trigger_all_cpu_backtrace); #endif /* STUB calls to mimic old nmi_watchdog behaviour */ -- cgit v1.2.3 From 10f9014912a2b1cb59c39cdea777e6d9afa8f17e Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:49 -0400 Subject: x86: Cleanup hw_nmi.c cruft The design of the hardlockup watchdog has changed and cruft was left behind in the hw_nmi.c file. Just remove the code that isn't used anymore. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-7-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/apic/hw_nmi.c | 58 ------------------------------------------- 1 file changed, 58 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 8c3edfb89c2..3b40082f037 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -9,74 +9,16 @@ * */ -#include -#include #include -#include -#include -#include -#include -#include #include #include #include - - #include #include /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -static DEFINE_PER_CPU(unsigned, last_irq_sum); - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs; - -#if defined(CONFIG_X86_LOCAL_APIC) - irqs += per_cpu(irq_stat, cpu).apic_timer_irqs; -#endif - - return irqs; -} - -static inline int mce_in_progress(void) -{ -#if defined(CONFIG_X86_MCE) - return atomic_read(&mce_entry) > 0; -#endif - return 0; -} - -int hw_nmi_is_cpu_stuck(struct pt_regs *regs) -{ - unsigned int sum; - int cpu = smp_processor_id(); - - /* if we are doing an mce, just assume the cpu is not stuck */ - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - return 0; - - /* We determine if the cpu is stuck by checking whether any - * interrupts have happened since we last checked. Of course - * an nmi storm could create false positives, but the higher - * level logic should account for that - */ - sum = get_timer_irqs(cpu); - if (__get_cpu_var(last_irq_sum) == sum) { - return 1; - } else { - __get_cpu_var(last_irq_sum) = sum; - return 0; - } -} - u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; -- cgit v1.2.3 From 5e85391b3badd3f0e50ebdd0cafe0202a979f73a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 13 May 2010 09:12:39 +0200 Subject: x86, watchdog: Fix build error in hw_nmi.c On some configs the following build error triggers: arch/x86/kernel/apic/hw_nmi.c:35: error: 'apic' undeclared (first use in this function) arch/x86/kernel/apic/hw_nmi.c:35: error: (Each undeclared identifier is reported only once arch/x86/kernel/apic/hw_nmi.c:35: error: for each function it appears in.) Because asm/apic.h was only included implicitly. Include it explicitly. Cc: Frederic Weisbecker Cc: Don Zickus Cc: Peter Zijlstra Cc: Cyrill Gorcunov LKML-Reference: <1273713674-8434-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 3b40082f037..cefd6942f0e 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -8,6 +8,7 @@ * Bits copied from original nmi.c file * */ +#include #include #include -- cgit v1.2.3 From cafcd80d216bc2136b8edbb794327e495792c666 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 14 May 2010 11:11:21 -0400 Subject: lockup_detector: Cross arch compile fixes Combining the softlockup and hardlockup code causes watchdog.c to build even without the hardlockup detection support. So if an arch, that has the previous and the new nmi watchdog implementations cohabiting, wants to know if the generic one is in use, CONFIG_LOCKUP_DETECTOR is not a reliable check. We need to use CONFIG_HARDLOCKUP_DETECTOR instead. Fixes: kernel/built-in.o: In function `touch_nmi_watchdog': (.text+0x449bc): multiple definition of `touch_nmi_watchdog' arch/sparc/kernel/built-in.o:(.text+0x11b28): first defined here Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov LKML-Reference: <20100514151121.GR15159@redhat.com> [ use CONFIG_HARDLOCKUP_DETECTOR instead of CONFIG_PERF_EVENTS_NMI] Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/apic/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 52f32e0ea19..910f20b457c 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -3,10 +3,10 @@ # obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -ifneq ($(CONFIG_LOCKUP_DETECTOR),y) +ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o endif -obj-$(CONFIG_LOCKUP_DETECTOR) += hw_nmi.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o -- cgit v1.2.3