From 6afe1a1fe8ff83f6ac2726b04665e76ba7b14f3e Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 13 Mar 2008 23:52:49 +0100 Subject: PM: Remove legacy PM AFAICT pm_send_all is a nop when noone uses pm_register... Hmm.. can we just force CONFIG_PM_LEGACY=n, and see what happens? Or maybe this is better idea? It may break build somewhere, but it should be easy to fix... (it builds here, i386 and x86-64). Signed-off-by: Pavel Machek Acked-by: Ralf Baechle Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/apm_32.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d4438ef296d8..d7e92bfb8f66 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1189,19 +1189,6 @@ static int suspend(int vetoable) int err; struct apm_user *as; - if (pm_send_all(PM_SUSPEND, (void *)3)) { - /* Vetoed */ - if (vetoable) { - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - err = -EBUSY; - ignore_sys_suspend = 0; - printk(KERN_WARNING "apm: suspend was vetoed.\n"); - goto out; - } - printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n"); - } - device_suspend(PMSG_SUSPEND); local_irq_disable(); device_power_down(PMSG_SUSPEND); @@ -1224,7 +1211,6 @@ static int suspend(int vetoable) device_power_up(); local_irq_enable(); device_resume(); - pm_send_all(PM_RESUME, (void *)0); queue_event(APM_NORMAL_RESUME, NULL); out: spin_lock(&user_list_lock); @@ -1337,7 +1323,6 @@ static void check_events(void) if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { device_resume(); - pm_send_all(PM_RESUME, (void *)0); queue_event(event, NULL); } ignore_normal_resume = 0; -- cgit v1.2.3 From 5373fd72577ffc4689ade0a2a1a885293c32c711 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 27 Mar 2008 02:00:06 -0400 Subject: PM: arch/x86/kernel/apm_32.c: fix build warning arch/x86/kernel/apm_32.c:1215: warning: label 'out' defined but not used Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/x86/kernel/apm_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d7e92bfb8f66..bc8b57d119a1 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1212,7 +1212,6 @@ static int suspend(int vetoable) local_irq_enable(); device_resume(); queue_event(APM_NORMAL_RESUME, NULL); - out: spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { as->suspend_wait = 0; -- cgit v1.2.3 From 64ac24e738823161693bf791f87adc802cf529ff Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 7 Mar 2008 21:55:58 -0500 Subject: Generic semaphore implementation Semaphores are no longer performance-critical, so a generic C implementation is better for maintainability, debuggability and extensibility. Thanks to Peter Zijlstra for fixing the lockdep warning. Thanks to Harvey Harrison for pointing out that the unlikely() was unnecessary. Signed-off-by: Matthew Wilcox Acked-by: Ingo Molnar --- arch/x86/kernel/i386_ksyms_32.c | 5 ----- arch/x86/kernel/x8664_ksyms_64.c | 6 ------ 2 files changed, 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 061627806a2d..deb43785e923 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,13 +1,8 @@ #include -#include #include #include #include -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a66e9c1a0537..95a993e18165 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -12,11 +11,6 @@ EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); - EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); EXPORT_SYMBOL(__get_user_4); -- cgit v1.2.3 From b4e0409a36f4533770a12095bde2a574a08a319e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Feb 2008 13:45:16 +0100 Subject: x86: check vmlinux limits, 64-bit these build-time and link-time checks would have prevented the vmlinux size regression. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 13 +++++++++++++ arch/x86/kernel/vmlinux_64.lds.S | 6 ++++++ 2 files changed, 19 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index ad2440832de0..38f32e798a99 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -82,6 +82,19 @@ void __init x86_64_start_kernel(char * real_mode_data) { int i; + /* + * Build-time sanity checks on the kernel image and module + * area mappings. (these are purely build-time and produce no code) + */ + BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START); + BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE); + BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE); + BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0); + BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + (__START_KERNEL & PGDIR_MASK))); + /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fab132299735..4c369451007b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -247,3 +247,9 @@ SECTIONS DWARF_DEBUG } + +/* + * Build-time check on the image size: + */ +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") -- cgit v1.2.3 From 85eb69a16aab5a394ce043c2131319eae35e6493 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Feb 2008 12:50:51 +0100 Subject: x86: increase the kernel text limit to 512 MB people sometimes do crazy stuff like building really large static arrays into their kernels or building allyesconfig kernels. Give more space to the kernel and push modules up a bit: kernel has 512 MB and modules have 1.5 GB. Should be enough for a few years ;-) Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a007454133a3..017216916dff 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -383,12 +383,12 @@ NEXT_PAGE(level2_ident_pgt) NEXT_PAGE(level2_kernel_pgt) /* - * 128 MB kernel mapping. We spend a full page on this pagetable + * 512 MB kernel mapping. We spend a full page on this pagetable * anyway. * * The kernel code+data+bss must not be bigger than that. * - * (NOTE: at +128MB starts the module area, see MODULES_VADDR. + * (NOTE: at +512MB starts the module area, see MODULES_VADDR. * If you want to increase this then increase MODULES_VADDR * too.) */ -- cgit v1.2.3 From 1a3e4ca41c5a38975023a6e8831c309d3322889c Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 Apr 2008 01:29:27 -0700 Subject: x86 vDSO: don't use disabled vDSO for signal trampoline If the vDSO was not mapped, don't use it as the "restorer" for a signal handler. Whether we have a pointer in mm->context.vdso depends on what happened at exec time, so we shouldn't check any global flags now. Background: Currently, every 32-bit exec gets the vDSO mapped even if it's disabled (the process just doesn't get told about it). Because it's in fact always there, the bug that this patch fixes cannot happen now. With the second patch, it won't be mapped at all when it's disabled, which is one of the things that people might really want when they disable it (so nothing they didn't ask for goes into their address space). The 32-bit signal handler setup when SA_RESTORER is not used refers to current->mm->context.vdso without regard to whether the vDSO has been disabled when the process was exec'd. This patch fixes this not to use it when it's null, which becomes possible after the second patch. (This never happens in normal use, because glibc's sigaction call uses SA_RESTORER unless glibc detected the vDSO.) Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 0157a6f0f41f..011c62fa563c 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -365,7 +365,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, goto give_sigsegv; } - if (current->binfmt->hasvdso) + if (current->mm->context.vdso) restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); else restorer = &frame->retcode; -- cgit v1.2.3 From 48c508b364324c35018284328b5b92c51d2b30e0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 17 Apr 2008 17:40:45 +0200 Subject: x86: clean up find_e820_area(), 64-bit Change size to unsigned long, becase caller and user all used unsigned long. Also make bad_addr take an alignment parameter. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_64.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 9be697126013..a8694a35352b 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -95,7 +95,8 @@ void __init early_res_to_bootmem(void) } /* Check for already reserved areas */ -static inline int bad_addr(unsigned long *addrp, unsigned long size) +static inline int +bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) { int i; unsigned long addr = *addrp, last; @@ -105,7 +106,7 @@ again: for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; if (last >= r->start && addr < r->end) { - *addrp = addr = r->end; + *addrp = addr = round_up(r->end, align); changed = 1; goto again; } @@ -174,26 +175,27 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, * Find a free area with specified alignment in a specific range. */ unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned size, unsigned long align) + unsigned long size, unsigned long align) { int i; - unsigned long mask = ~(align - 1); for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; - unsigned long addr = ei->addr, last; + unsigned long addr, last; + unsigned long ei_last; if (ei->type != E820_RAM) continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; if (addr < start) - addr = start; - if (addr > ei->addr + ei->size) + addr = round_up(start, align); + if (addr > ei_last) continue; - while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) + while (bad_addr(&addr, size, align) && addr+size <= ei_last) ; - addr = (addr + align - 1) & mask; last = addr + size; - if (last > ei->addr + ei->size) + if (last > ei_last) continue; if (last > end) continue; -- cgit v1.2.3 From b62576a2f53ad7edf604fedba0da1d4329749b7d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 9 Feb 2008 16:16:58 +0100 Subject: x86: use year 2000 offset for cmos clock We know it is already after 2000. Use the year 2000 offset for both 32 and 64 bit, which removes ifdefs and the 1970 magic. [ tglx@linutronix.de: remove 1970 magic, replace bogus commit message ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/rtc.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index eb9b1a198f5e..d2569513ad4c 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -9,7 +9,6 @@ #include #ifdef CONFIG_X86_32 -# define CMOS_YEARS_OFFS 1900 /* * This is a special lock that is owned by the CPU and holds the index * register we are working with. It is required for NMI access to the @@ -17,14 +16,11 @@ */ volatile unsigned long cmos_lock = 0; EXPORT_SYMBOL(cmos_lock); -#else -/* - * x86-64 systems only exists since 2002. - * This will work up to Dec 31, 2100 - */ -# define CMOS_YEARS_OFFS 2000 #endif +/* For two digit years assume time is always after that */ +#define CMOS_YEARS_OFFS 2000 + DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -136,11 +132,8 @@ unsigned long mach_get_cmos_time(void) BCD_TO_BIN(century); year += century * 100; printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); - } else { + } else year += CMOS_YEARS_OFFS; - if (year < 1970) - year += 100; - } return mktime(year, mon, day, hour, min, sec); } -- cgit v1.2.3 From 068c9222d0206e8a6a905efeb9f4fe8dde8b5ff5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 9 Feb 2008 16:16:59 +0100 Subject: x86: add warning when RTC clock reports binary We assume that the RTC clock is BCD, so print a warning if it claims to be binary. [ tglx@linutronix.de: changed to WARN_ON - we want to know that! If no one reports it we can remove the complete if (RTC_ALWAYS_BCD) magic, which has RTC_ALWAYS_BCD defined to 1 since Linux 1.0 ... ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/rtc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index d2569513ad4c..d4d8277e890e 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -94,7 +94,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) unsigned long mach_get_cmos_time(void) { - unsigned int year, mon, day, hour, min, sec, century = 0; + unsigned int status, year, mon, day, hour, min, sec, century = 0; /* * If UIP is clear, then we have >= 244 microseconds before @@ -119,7 +119,10 @@ unsigned long mach_get_cmos_time(void) century = CMOS_READ(acpi_gbl_FADT.century); #endif - if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) { + status = CMOS_READ(RTC_CONTROL); + WARN_ON_ONCE((RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); + + if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { BCD_TO_BIN(sec); BCD_TO_BIN(min); BCD_TO_BIN(hour); -- cgit v1.2.3 From 45de70791165ce7eac5232ed5a7c31152567f4da Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 9 Feb 2008 16:17:01 +0100 Subject: x86: enable ACPI extended century handling for 32bit The extended century readout does not solve the year 2038 problem on 32bit! v2: Fix compilation on !ACPI, pointed out by tglx Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/rtc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index d4d8277e890e..91492190ac72 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -112,15 +112,14 @@ unsigned long mach_get_cmos_time(void) mon = CMOS_READ(RTC_MONTH); year = CMOS_READ(RTC_YEAR); -#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64) - /* CHECKME: Is this really 64bit only ??? */ +#ifdef CONFIG_ACPI if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && acpi_gbl_FADT.century) century = CMOS_READ(acpi_gbl_FADT.century); #endif status = CMOS_READ(RTC_CONTROL); - WARN_ON_ONCE((RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); + WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { BCD_TO_BIN(sec); -- cgit v1.2.3 From 92bc2056855b3250bf6fd5849f05f88d85839efa Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:09:56 -0800 Subject: x86: change most X86_32 pt_regs members to unsigned long Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 5 ++--- arch/x86/kernel/signal_32.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 43930e73f657..2cd89b8a7050 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -332,7 +332,7 @@ void __show_registers(struct pt_regs *regs, int all) init_utsname()->version); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", - 0xffff & regs->cs, regs->ip, regs->flags, + (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); print_symbol("EIP is at %s\n", regs->ip); @@ -341,8 +341,7 @@ void __show_registers(struct pt_regs *regs, int all) printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - regs->ds & 0xffff, regs->es & 0xffff, - regs->fs & 0xffff, gs, ss); + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); if (!all) return; diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 011c62fa563c..34fc0416b135 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -393,8 +393,8 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->sp = (unsigned long) frame; regs->ip = (unsigned long) ka->sa.sa_handler; regs->ax = (unsigned long) sig; - regs->dx = (unsigned long) 0; - regs->cx = (unsigned long) 0; + regs->dx = 0; + regs->cx = 0; regs->ds = __USER_DS; regs->es = __USER_DS; -- cgit v1.2.3 From 9902a702c76f904be0057f8647dda9d6f89d4847 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:09:57 -0800 Subject: x86: make X86_32 pt_regs members unsigned long Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 34fc0416b135..b1e97e80a57c 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -527,7 +527,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if (regs->orig_ax >= 0) { + if ((long)regs->orig_ax >= 0) { /* If so, check system call restarting.. */ switch (regs->ax) { case -ERESTART_RESTARTBLOCK: @@ -625,7 +625,7 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if (regs->orig_ax >= 0) { + if ((long)regs->orig_ax >= 0) { /* Restart the system call - no handlers present */ switch (regs->ax) { case -ERESTARTNOHAND: -- cgit v1.2.3 From c3e6ff87a3ad9124a67e149e4f9c080626a0d83e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:09:57 -0800 Subject: x86: regparm(3) is mandatory, no need to annotate Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ptrace.c | 1 - arch/x86/kernel/signal_32.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index eb92ccbb3502..559c1b027417 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1456,7 +1456,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) /* notification of system call entry/exit * - triggered by current->work.syscall_trace */ -__attribute__((regparm(3))) int do_syscall_trace(struct pt_regs *regs, int entryexit) { int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b1e97e80a57c..a393e3711e08 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -654,7 +654,6 @@ static void do_signal(struct pt_regs *regs) * notification of userspace execution resumption * - triggered by the TIF_WORK_MASK flags */ -__attribute__((regparm(3))) void do_notify_resume(struct pt_regs *regs, void *_unused, __u32 thread_info_flags) { -- cgit v1.2.3 From ac66f3fd89ee20b73b3374e6343c5e36e3e3c51a Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:09:58 -0800 Subject: x86: reduce trivial style differences in signal_32|64.c Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 64 +++++++++++++++++++++------------------------ arch/x86/kernel/signal_64.c | 47 +++++++++++++++++---------------- 2 files changed, 55 insertions(+), 56 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index a393e3711e08..182269b752da 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -131,14 +131,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax COPY_SEG(fs); COPY_SEG(es); COPY_SEG(ds); - COPY(di); - COPY(si); - COPY(bp); - COPY(sp); - COPY(bx); - COPY(dx); - COPY(cx); - COPY(ip); + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); COPY_SEG_STRICT(cs); COPY_SEG_STRICT(ss); @@ -412,7 +406,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, ptrace_notify(SIGTRAP); #if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", current->comm, current->pid, frame, regs->ip, frame->pretcode); #endif @@ -522,7 +516,7 @@ give_sigsegv: static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, - sigset_t *oldset, struct pt_regs * regs) + sigset_t *oldset, struct pt_regs *regs) { int ret; @@ -530,20 +524,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if ((long)regs->orig_ax >= 0) { /* If so, check system call restarting.. */ switch (regs->ax) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->ax = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { regs->ax = -EINTR; break; - - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ax = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - regs->ax = regs->orig_ax; - regs->ip -= 2; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->ax = regs->orig_ax; + regs->ip -= 2; + break; } } @@ -580,18 +575,17 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, */ static void do_signal(struct pt_regs *regs) { + struct k_sigaction ka; siginfo_t info; int signr; - struct k_sigaction ka; sigset_t *oldset; /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. vm86 regs switched out by assembly code - * before reaching here, so testing against kernel - * CS suffices. + * We want the common case to go fast, which is why we may in certain + * cases get here from kernel mode. Just return without doing anything + * if so. + * X86_32: vm86 regs switched out by assembly code before reaching + * here, so testing against kernel CS suffices. */ if (!user_mode(regs)) return; @@ -608,7 +602,7 @@ static void do_signal(struct pt_regs *regs) * have been cleared if the watchpoint triggered * inside the kernel. */ - if (unlikely(current->thread.debugreg7)) + if (current->thread.debugreg7) set_debugreg(current->thread.debugreg7, 7); /* Whee! Actually deliver the signal. */ @@ -642,8 +636,10 @@ static void do_signal(struct pt_regs *regs) } } - /* if there's no signal to deliver, we just put the saved sigmask - * back */ + /* + * If there's no signal to deliver, we just put the saved sigmask + * back. + */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); @@ -654,12 +650,12 @@ static void do_signal(struct pt_regs *regs) * notification of userspace execution resumption * - triggered by the TIF_WORK_MASK flags */ -void do_notify_resume(struct pt_regs *regs, void *_unused, +void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { /* Pending single-step? */ if (thread_info_flags & _TIF_SINGLESTEP) { - regs->flags |= TF_MASK; + regs->flags |= X86_EFLAGS_TF; clear_thread_flag(TIF_SINGLESTEP); } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 1c83e5124c65..863cebe8e60a 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -345,7 +345,7 @@ static long current_syscall_ret(struct pt_regs *regs) static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, - sigset_t *oldset, struct pt_regs *regs) + sigset_t *oldset, struct pt_regs *regs) { int ret; @@ -359,21 +359,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (current_syscall(regs) >= 0) { /* If so, check system call restarting.. */ switch (current_syscall_ret(regs)) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->ax = -EINTR; - break; + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->ax = -EINTR; + break; - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ax = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - regs->ax = regs->orig_ax; - regs->ip -= 2; + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->ax = -EINTR; break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->ax = regs->orig_ax; + regs->ip -= 2; + break; } } @@ -420,10 +420,11 @@ static void do_signal(struct pt_regs *regs) sigset_t *oldset; /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything + * We want the common case to go fast, which is why we may in certain + * cases get here from kernel mode. Just return without doing anything * if so. + * X86_32: vm86 regs switched out by assembly code before reaching + * here, so testing against kernel CS suffices. */ if (!user_mode(regs)) return; @@ -473,16 +474,18 @@ static void do_signal(struct pt_regs *regs) } } - /* if there's no signal to deliver, we just put the saved sigmask - back. */ + /* + * If there's no signal to deliver, we just put the saved sigmask + * back. + */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } -void -do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) +void do_notify_resume(struct pt_regs *regs, void *unused, + __u32 thread_info_flags) { #ifdef DEBUG_SIG printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n", @@ -502,7 +505,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #endif /* CONFIG_X86_MCE */ /* deal with pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); if (thread_info_flags & _TIF_HRTICK_RESCHED) -- cgit v1.2.3 From 1a1768039c8fdd48d69a6bc3b7f56943b2b20567 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:09:59 -0800 Subject: x86: Use FIX_EFLAGS define in X86_64 [ tglx@linutronix.de: simplified ] Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 16 +++++++++++----- arch/x86/kernel/signal_64.c | 14 +++++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 182269b752da..9eb23fb66b1e 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -30,6 +30,17 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + +#ifdef CONFIG_X86_32 +# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) +#else +# define FIX_EFLAGS __FIX_EFLAGS +#endif + /* * Atomically swap in the new signal mask, and wait for a signal. */ @@ -122,11 +133,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax err |= __get_user(tmp, &sc->seg); \ loadsegment(seg,tmp); } -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_RF | \ - X86_EFLAGS_OF | X86_EFLAGS_DF | \ - X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ - X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) - GET_SEG(gs); COPY_SEG(fs); COPY_SEG(es); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 863cebe8e60a..b7d7a6d5c26b 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,17 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + +#ifdef CONFIG_X86_32 +# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) +#else +# define FIX_EFLAGS __FIX_EFLAGS +#endif + int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs); int ia32_setup_frame(int sig, struct k_sigaction *ka, @@ -87,7 +99,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned { unsigned int tmpflags; err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); regs->orig_ax = -1; /* disable syscall checks */ } -- cgit v1.2.3 From 2d19c4580682511be1eadf47cdee22d5eb002f94 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:10:00 -0800 Subject: x86: use sizeof(long) to unify signal_32|64.c Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 5 +++-- arch/x86/kernel/signal_64.c | 8 +++----- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 9eb23fb66b1e..47c85e6b14bb 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -214,11 +214,12 @@ badframe: asmlinkage int sys_rt_sigreturn(unsigned long __unused) { - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp - 4); + struct pt_regs *regs = (struct pt_regs *)&__unused; + struct rt_sigframe __user *frame; sigset_t set; int ax; + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b7d7a6d5c26b..1045a07eeaec 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -133,13 +133,11 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) sigset_t set; unsigned long ax; - frame = (struct rt_sigframe __user *)(regs->sp - 8); - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) { + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; - } - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) { + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - } sigdelsetmask(&set, ~_BLOCKABLE); spin_lock_irq(¤t->sighand->siglock); -- cgit v1.2.3 From 123a63476cafcede1c70529f62a5bfb96a0efc1b Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:10:00 -0800 Subject: x86: move struct definitions to unifed sigframe.h [ tglx@linutronix.de: cleanup the other structs as well ] Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/sigframe.h | 27 +++++++++++++++++++++++++++ arch/x86/kernel/sigframe_32.h | 21 --------------------- arch/x86/kernel/signal_32.c | 2 +- arch/x86/kernel/signal_64.c | 9 +-------- 5 files changed, 30 insertions(+), 31 deletions(-) create mode 100644 arch/x86/kernel/sigframe.h delete mode 100644 arch/x86/kernel/sigframe_32.h (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 8ea040124f7d..670c3c311289 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -10,7 +10,7 @@ #include #include #include -#include "sigframe_32.h" +#include "sigframe.h" #include #include #include diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h new file mode 100644 index 000000000000..72bbb519d2dc --- /dev/null +++ b/arch/x86/kernel/sigframe.h @@ -0,0 +1,27 @@ +#ifdef CONFIG_X86_32 +struct sigframe { + char __user *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe { + char __user *pretcode; + int sig; + struct siginfo __user *pinfo; + void __user *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; +#else +struct rt_sigframe { + char __user *pretcode; + struct ucontext uc; + struct siginfo info; +}; +#endif diff --git a/arch/x86/kernel/sigframe_32.h b/arch/x86/kernel/sigframe_32.h deleted file mode 100644 index 0b2221711dad..000000000000 --- a/arch/x86/kernel/sigframe_32.h +++ /dev/null @@ -1,21 +0,0 @@ -struct sigframe -{ - char __user *pretcode; - int sig; - struct sigcontext sc; - struct _fpstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char __user *pretcode; - int sig; - struct siginfo __user *pinfo; - void __user *puc; - struct siginfo info; - struct ucontext uc; - struct _fpstate fpstate; - char retcode[8]; -}; diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 47c85e6b14bb..5447fa5ec52c 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -24,7 +24,7 @@ #include #include #include -#include "sigframe_32.h" +#include "sigframe.h" #define DEBUG_SIG 0 diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 1045a07eeaec..8bb1013eb62b 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -26,6 +26,7 @@ #include #include #include +#include "sigframe.h" /* #define DEBUG_SIG 1 */ @@ -58,14 +59,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, /* * Do a signal return; undo the signal stack. */ - -struct rt_sigframe -{ - char __user *pretcode; - struct ucontext uc; - struct siginfo info; -}; - static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *prax) { -- cgit v1.2.3 From 866bc13fc4c625186dd01429c68c5cf708f1cfd5 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:10:02 -0800 Subject: x86: Unify argument names in signal_32|64.c Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 12 ++++++------ arch/x86/kernel/signal_64.c | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 5447fa5ec52c..add9c6e9c44d 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -107,9 +107,9 @@ sys_sigaltstack(unsigned long bx) /* * Do a signal return; undo the signal stack. */ - static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) +restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + unsigned long *pax) { unsigned int err = 0; @@ -165,19 +165,19 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax } } - err |= __get_user(*peax, &sc->ax); + err |= __get_user(*pax, &sc->ax); return err; badframe: return 1; } -asmlinkage int sys_sigreturn(unsigned long __unused) +asmlinkage unsigned long sys_sigreturn(unsigned long __unused) { struct pt_regs *regs = (struct pt_regs *) &__unused; struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8); sigset_t set; - int ax; + unsigned long ax; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -216,8 +216,8 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) { struct pt_regs *regs = (struct pt_regs *)&__unused; struct rt_sigframe __user *frame; + unsigned long ax; sigset_t set; - int ax; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 8bb1013eb62b..f3247d71edbc 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -60,7 +60,8 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, * Do a signal return; undo the signal stack. */ static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *prax) +restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + unsigned long *pax) { unsigned int err = 0; @@ -113,7 +114,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned } } - err |= __get_user(*prax, &sc->ax); + err |= __get_user(*pax, &sc->ax); return err; badframe: -- cgit v1.2.3 From e0bf0f75bdc441abb05365abc56ee96ba44ca073 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 8 Feb 2008 12:10:03 -0800 Subject: x86: define DEBUG_SIG in signal_64.c Signed-off-by: Harvey Harrison Cc: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_64.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index f3247d71edbc..043294582f41 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -28,7 +28,7 @@ #include #include "sigframe.h" -/* #define DEBUG_SIG 1 */ +#define DEBUG_SIG 0 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -142,7 +142,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; -#ifdef DEBUG_SIG +#if DEBUG_SIG printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax); #endif @@ -274,7 +274,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (err) goto give_sigsegv; -#ifdef DEBUG_SIG +#if DEBUG_SIG printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax); #endif @@ -302,7 +302,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); -#ifdef DEBUG_SIG +#if DEBUG_SIG printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", current->comm, current->pid, frame, regs->ip, frame->pretcode); #endif @@ -353,7 +353,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, { int ret; -#ifdef DEBUG_SIG +#if DEBUG_SIG printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n", current->pid, sig, regs->ip, regs->sp, regs); @@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#ifdef DEBUG_SIG +#if DEBUG_SIG printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n", thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current)); #endif -- cgit v1.2.3 From e319e76521767f7f64cd1fb6f58d4d36bc861a67 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 13 Feb 2008 16:19:36 +0100 Subject: x86: apic: extended interrupt LVT support for AMD Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic_32.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 35a568ea8400..6aa93db7faa3 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -620,6 +620,35 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + /* * Local APIC start and shutdown */ -- cgit v1.2.3 From 270883a8b98af5e6ed591b4762fb046e9f044dfb Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 11 Feb 2008 17:16:01 -0200 Subject: x86: change vsmp compile dependency Change Makefile so vsmp_64.o object is dependent on PARAVIRT, rather than X86_VSMP Signed-off-by: Glauber Costa Signed-off-by: Ravikiran Thirumalai Acked-by: Shai Fultheim Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4eb5ce841106..80e6695a12a3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o -obj-$(CONFIG_X86_VSMP) += vsmp_64.o +obj-$(CONFIG_PARAVIRT) += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o obj-$(CONFIG_ACPI_SRAT) += srat_32.o -- cgit v1.2.3 From a2beab31b167bd8ba49bb84944e07ac096f2ab0a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 11 Feb 2008 17:16:02 -0200 Subject: x86: make vsmp_init void, instead of static int Signed-off-by: Glauber Costa Signed-off-by: Ravikiran Thirumalai Acked-by: Shai Fultheim Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsmp_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index d971210a6d36..976691726de4 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -16,20 +16,20 @@ #include #include -static int __init vsmp_init(void) +static void __init vsmp_init(void) { void *address; unsigned int cap, ctl; if (!early_pci_allowed()) - return 0; + return; /* Check if we are running on a ScaleMP vSMP box */ if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) - return 0; + return; /* set vSMP magic bits to indicate vSMP capable kernel */ address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8); @@ -46,7 +46,7 @@ static int __init vsmp_init(void) } iounmap(address); - return 0; + return; } core_initcall(vsmp_init); -- cgit v1.2.3 From 2785c8d052278228cc3806233c09295088f83d42 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 11 Feb 2008 17:16:03 -0200 Subject: x86: call vsmp_init explicitly It becomes to early for ioremap, so we use early_ioremap Signed-off-by: Glauber Costa Signed-off-by: Ravikiran Thirumalai Acked-by: Shai Fultheim Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup_64.c | 4 ++++ arch/x86/kernel/vsmp_64.c | 11 +++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f4f7ecfb898c..e67925674eae 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -345,6 +345,10 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_init(); +#ifdef CONFIG_PARAVIRT + vsmp_init(); +#endif + dmi_scan_machine(); io_delay_init(); diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 976691726de4..fdf9fba6ba9c 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -16,10 +16,10 @@ #include #include -static void __init vsmp_init(void) +void __init vsmp_init(void) { void *address; - unsigned int cap, ctl; + unsigned int cap, ctl, cfg; if (!early_pci_allowed()) return; @@ -32,7 +32,8 @@ static void __init vsmp_init(void) return; /* set vSMP magic bits to indicate vSMP capable kernel */ - address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8); + cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0); + address = early_ioremap(cfg, 8); cap = readl(address); ctl = readl(address + 4); printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", @@ -45,8 +46,6 @@ static void __init vsmp_init(void) printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); } - iounmap(address); + early_iounmap(address, 8); return; } - -core_initcall(vsmp_init); -- cgit v1.2.3 From 96597fd2be7070631ad0776cd8bced21415fd5e3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 11 Feb 2008 17:16:04 -0200 Subject: x86: introduce vsmp paravirt helpers Signed-off-by: Glauber Costa Signed-off-by: Ravikiran Thirumalai Acked-by: Shai Fultheim Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsmp_64.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index fdf9fba6ba9c..b93ed66c754f 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -8,6 +8,8 @@ * * Ravikiran Thirumalai , * Shai Fultheim + * Paravirt ops integration: Glauber de Oliveira Costa , + * Ravikiran Thirumalai */ #include @@ -15,6 +17,60 @@ #include #include #include +#include + +/* + * Interrupt control on vSMPowered systems: + * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' + * and vice versa. + */ + +static unsigned long vsmp_save_fl(void) +{ + unsigned long flags = native_save_fl(); + + if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC)) + flags &= ~X86_EFLAGS_IF; + return flags; +} + +static void vsmp_restore_fl(unsigned long flags) +{ + if (flags & X86_EFLAGS_IF) + flags &= ~X86_EFLAGS_AC; + else + flags |= X86_EFLAGS_AC; + native_restore_fl(flags); +} + +static void vsmp_irq_disable(void) +{ + unsigned long flags = native_save_fl(); + + native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); +} + +static void vsmp_irq_enable(void) +{ + unsigned long flags = native_save_fl(); + + native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); +} + +static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len) +{ + switch (type) { + case PARAVIRT_PATCH(pv_irq_ops.irq_enable): + case PARAVIRT_PATCH(pv_irq_ops.irq_disable): + case PARAVIRT_PATCH(pv_irq_ops.save_fl): + case PARAVIRT_PATCH(pv_irq_ops.restore_fl): + return paravirt_patch_default(type, clobbers, ibuf, addr, len); + default: + return native_patch(type, clobbers, ibuf, addr, len); + } + +} void __init vsmp_init(void) { -- cgit v1.2.3 From bc7c314d7048017caa0725b41cc577cccf4fc53b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 11 Feb 2008 17:16:05 -0200 Subject: x86, vsmp: use the paravirt helpers Signed-off-by: Glauber Costa Signed-off-by: Ravikiran Thirumalai Acked-by: Shai Fultheim Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsmp_64.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index b93ed66c754f..54202b1805da 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -87,6 +87,13 @@ void __init vsmp_init(void) PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) return; + /* If we are, use the distinguished irq functions */ + pv_irq_ops.irq_disable = vsmp_irq_disable; + pv_irq_ops.irq_enable = vsmp_irq_enable; + pv_irq_ops.save_fl = vsmp_save_fl; + pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_init_ops.patch = vsmp_patch; + /* set vSMP magic bits to indicate vSMP capable kernel */ cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0); address = early_ioremap(cfg, 8); -- cgit v1.2.3 From 03ae5768b6110ebaa97dc3e7abf1c3d8bec5f874 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 15 Feb 2008 12:00:23 +0100 Subject: x86: use ELF section to list CPU vendor specific code Replace the hardcoded list of initialization functions for each CPU vendor by a list in an ELF section, which is read at initialization in arch/x86/kernel/cpu/cpu.c to fill the cpu_devs[] array. The ELF section, named .x86cpuvendor.init, is reclaimed after boot, and contains entries of type "struct cpu_vendor_dev" which associates a vendor number with a pointer to a "struct cpu_dev" structure. This first modification allows to remove all the VENDOR_init_cpu() functions. This patch also removes the hardcoded calls to early_init_amd() and early_init_intel(). Instead, we add a "c_early_init" member to the cpu_dev structure, which is then called if not NULL by the generic CPU initialization code. Unfortunately, in early_cpu_detect(), this_cpu is not yet set, so we have to use the cpu_devs[] array directly. This patch is part of the Linux Tiny project, and is needed for further patch that will allow to disable compilation of unused CPU support code. Signed-off-by: Thomas Petazzoni Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/amd.c | 5 ++++- arch/x86/kernel/cpu/centaur.c | 6 +----- arch/x86/kernel/cpu/common.c | 33 ++++++++++----------------------- arch/x86/kernel/cpu/cpu.h | 26 +++++++++++++------------- arch/x86/kernel/cpu/cyrix.c | 13 ++----------- arch/x86/kernel/cpu/intel.c | 9 +++------ arch/x86/kernel/cpu/transmeta.c | 6 +----- arch/x86/kernel/cpu/umc.c | 7 ++----- arch/x86/kernel/vmlinux_32.lds.S | 5 +++++ arch/x86/kernel/vmlinux_64.lds.S | 5 +++++ 10 files changed, 46 insertions(+), 69 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 693e353999cd..cab4e562b5cb 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -63,7 +63,7 @@ static __cpuinit int amd_apic_timer_broken(void) int force_mwait __cpuinitdata; -void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { if (cpuid_eax(0x80000000) >= 0x80000007) { c->x86_power = cpuid_edx(0x80000007); @@ -336,6 +336,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, + .c_early_init = early_init_amd, .c_init = init_amd, .c_size_cache = amd_size_cache, }; @@ -345,3 +346,5 @@ int __init amd_init_cpu(void) cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; return 0; } + +cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 9681fa15ddf0..194ec8311c3b 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -464,8 +464,4 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_size_cache = centaur_size_cache, }; -int __init centaur_init_cpu(void) -{ - cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; - return 0; -} +cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a38aafaefc23..0fd6be154d5d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -328,14 +328,9 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - early_init_amd(c); - break; - case X86_VENDOR_INTEL: - early_init_intel(c); - break; - } + if (c->x86_vendor != X86_VENDOR_UNKNOWN && + cpu_devs[c->x86_vendor]->c_early_init) + cpu_devs[c->x86_vendor]->c_early_init(c); early_get_cap(c); } @@ -616,23 +611,15 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -/* This is hacky. :) - * We're emulating future behavior. - * In the future, the cpu-specific init functions will be called implicitly - * via the magic of initcalls. - * They will insert themselves into the cpu_devs structure. - * Then, when cpu_init() is called, we can just iterate over that array. - */ void __init early_cpu_init(void) { - intel_cpu_init(); - cyrix_init_cpu(); - nsc_init_cpu(); - amd_init_cpu(); - centaur_init_cpu(); - transmeta_init_cpu(); - nexgen_init_cpu(); - umc_init_cpu(); + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start ; + cvdev < __x86cpuvendor_end ; + cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + early_cpu_detect(); } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e0b38c33d842..783691b2a738 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -14,6 +14,7 @@ struct cpu_dev { struct cpu_model_info c_models[4]; + void (*c_early_init)(struct cpuinfo_x86 *c); void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); @@ -21,18 +22,17 @@ struct cpu_dev { extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; +struct cpu_vendor_dev { + int vendor; + struct cpu_dev *cpu_dev; +}; + +#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ + static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ + __attribute__((__section__(".x86cpuvendor.init"))) = \ + { cpu_vendor_id, cpu_dev } + +extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; + extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); - -extern void early_init_intel(struct cpuinfo_x86 *c); -extern void early_init_amd(struct cpuinfo_x86 *c); - -/* Specific CPU type init functions */ -int intel_cpu_init(void); -int amd_init_cpu(void); -int cyrix_init_cpu(void); -int nsc_init_cpu(void); -int centaur_init_cpu(void); -int transmeta_init_cpu(void); -int nexgen_init_cpu(void); -int umc_init_cpu(void); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 7139b0262703..9c4ee98f2cb8 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -439,11 +439,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_identify = cyrix_identify, }; -int __init cyrix_init_cpu(void) -{ - cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; - return 0; -} +cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", @@ -451,9 +447,4 @@ static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_init = init_nsc, }; -int __init nsc_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; - return 0; -} - +cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fae31ce747bd..34468b2e2507 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -30,7 +30,7 @@ struct movsl_mask movsl_mask __read_mostly; #endif -void __cpuinit early_init_intel(struct cpuinfo_x86 *c) +static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) @@ -290,15 +290,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { } }, }, + .c_early_init = early_init_intel, .c_init = init_intel, .c_size_cache = intel_size_cache, }; -__init int intel_cpu_init(void) -{ - cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; - return 0; -} +cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); #ifndef CONFIG_X86_CMPXCHG unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index e8b422c1c512..c2d168e992f4 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -102,8 +102,4 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_identify = transmeta_identify, }; -int __init transmeta_init_cpu(void) -{ - cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; - return 0; -} +cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index a7a4e75bdcd7..b1acf08245fb 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -19,8 +19,5 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { }, }; -int __init umc_init_cpu(void) -{ - cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; - return 0; -} +cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); + diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 2ffa9656fe7a..ce5ed083a1e9 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -149,6 +149,11 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } + .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { + __x86cpuvendor_start = .; + *(.x86cpuvendor.init) + __x86cpuvendor_end = .; + } SECURITY_INIT . = ALIGN(4); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 4c369451007b..b7ab3c335fae 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -177,6 +177,11 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; + __x86cpuvendor_start = .; + .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { + *(.x86cpuvendor.init) + } + __x86cpuvendor_end = .; SECURITY_INIT . = ALIGN(8); -- cgit v1.2.3 From 04aaa7ba096c707a8df337b29303f1a5a65f0462 Mon Sep 17 00:00:00 2001 From: "David P. Reed" Date: Sun, 17 Feb 2008 16:56:39 -0500 Subject: x86: fix cmos read and write to not use inb_p and outb_p fix code to access CMOS rtc registers so that it does not use inb_p and outb_p routines, which are deprecated. Extensive research on all known CMOS RTC chipset timing shows that there is no need for a delay in accessing the registers of these chips even on old machines. These chipa are never on an expansion bus, but have always been "motherboard" resources, either in the processor chipset or explicitly on the motherboard, and they are not part of the ISA/LPC or PCI buses, so delays should not be based on bus timing. The reason to fix it: 1) port 80 writes often hang some laptops that use ENE EC chipsets, esp. those designed and manufactured by Quanta for HP; 2) RTC accesses are timing sensitive, and extra microseconds may matter; 3) the new "io_delay" function is calibrated by expansion bus timing needs, thus is not appropriate for access to CMOS rtc registers. Signed-off-by: David P. Reed Acked-by: Alan Cox Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/rtc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 91492190ac72..9615eee9b775 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -146,8 +146,8 @@ unsigned char rtc_cmos_read(unsigned char addr) unsigned char val; lock_cmos_prefix(addr); - outb_p(addr, RTC_PORT(0)); - val = inb_p(RTC_PORT(1)); + outb(addr, RTC_PORT(0)); + val = inb(RTC_PORT(1)); lock_cmos_suffix(addr); return val; } @@ -156,8 +156,8 @@ EXPORT_SYMBOL(rtc_cmos_read); void rtc_cmos_write(unsigned char val, unsigned char addr) { lock_cmos_prefix(addr); - outb_p(addr, RTC_PORT(0)); - outb_p(val, RTC_PORT(1)); + outb(addr, RTC_PORT(0)); + outb(val, RTC_PORT(1)); lock_cmos_suffix(addr); } EXPORT_SYMBOL(rtc_cmos_write); -- cgit v1.2.3 From 29a9994bd8dbafc17f43d31651d31ea7b0add6a4 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sun, 17 Feb 2008 23:30:23 +0100 Subject: x86: coding style fixes for arch/x86/kernel/cpu/centaur.c Kills more than 150 errors/warnings Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/centaur.c | 230 +++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 116 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 194ec8311c3b..710fe1ed0731 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -11,21 +11,21 @@ static u32 __cpuinit power2(u32 x) { - u32 s=1; - while(s<=x) - s<<=1; - return s>>=1; + u32 s = 1; + while(s <= x) + s <<= 1; + return s >>= 1; } /* * Set up an actual MCR */ - + static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) { u32 lo, hi; - + hi = base & ~0xFFF; lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ lo &= ~0xFFF; /* Remove the ctrl value bits */ @@ -45,7 +45,7 @@ static u32 __cpuinit ramtop(void) /* 16388 */ int i; u32 top = 0; u32 clip = 0xFFFFFFFFUL; - + for (i = 0; i < e820.nr_map; i++) { unsigned long start, end; @@ -55,10 +55,10 @@ static u32 __cpuinit ramtop(void) /* 16388 */ * Don't MCR over reserved space. Ignore the ISA hole * we frob around that catastrophe already */ - + if (e820.map[i].type == E820_RESERVED) { - if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) + if (e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) clip = e820.map[i].addr; continue; } @@ -71,19 +71,19 @@ static u32 __cpuinit ramtop(void) /* 16388 */ } /* Everything below 'top' should be RAM except for the ISA hole. Because of the limited MCR's we want to map NV/ACPI into our - MCR range for gunk in RAM - + MCR range for gunk in RAM + Clip might cause us to MCR insufficient RAM but that is an acceptable failure mode and should only bite obscure boxes with a VESA hole at 15Mb - + The second case Clip sometimes kicks in is when the EBDA is marked as reserved. Again we fail safe with reasonable results */ - - if(top>clip) - top=clip; - + + if(top > clip) + top = clip; + return top; } @@ -99,8 +99,8 @@ static int __cpuinit centaur_mcr_compute(int nr, int key) u32 top = root; u32 floor = 0; int ct = 0; - - while(ct high && fspace > low) + + if (fspace > high && fspace > low) { centaur_mcr_insert(ct, floor, fspace, key); floor += fspace; } - else if(high > low) - { + else if (high > low) { centaur_mcr_insert(ct, top, high, key); top += high; } - else if(low > 0) - { + else if (low > 0) { base -= low; centaur_mcr_insert(ct, base, low, key); } @@ -162,7 +160,7 @@ static int __cpuinit centaur_mcr_compute(int nr, int key) * We loaded ct values. We now need to set the mask. The caller * must do this bit. */ - + return ct; } @@ -173,7 +171,7 @@ static void __cpuinit centaur_create_optimal_mcr(void) * Allocate up to 6 mcrs to mark as much of ram as possible * as write combining and weak write ordered. * - * To experiment with: Linux never uses stack operations for + * To experiment with: Linux never uses stack operations for * mmio spaces so we could globally enable stack operation wc * * Load the registers with type 31 - full write combining, all @@ -184,8 +182,8 @@ static void __cpuinit centaur_create_optimal_mcr(void) /* * Wipe unused MCRs */ - - for(i=used;i<8;i++) + + for (i = used; i < 8; i++) wrmsr(MSR_IDT_MCR0+i, 0, 0); } @@ -205,21 +203,21 @@ static void __cpuinit winchip2_create_optimal_mcr(void) */ int used = centaur_mcr_compute(6, 25); - + /* * Mark the registers we are using. */ - + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for(i=0;i>17) & 7; lo |= key<<6; /* replace with unlock key */ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); @@ -242,9 +240,9 @@ static void __cpuinit winchip2_unprotect_mcr(void) static void __cpuinit winchip2_protect_mcr(void) { u32 lo, hi; - + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo&=~0x1C0; /* blank bits 8-6 */ + lo &= ~0x1C0; /* blank bits 8-6 */ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } #endif /* CONFIG_X86_OOSTORE */ @@ -267,17 +265,17 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) /* enable ACE unit, if present and disabled */ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { - rdmsr (MSR_VIA_FCR, lo, hi); + rdmsr(MSR_VIA_FCR, lo, hi); lo |= ACE_FCR; /* enable ACE unit */ - wrmsr (MSR_VIA_FCR, lo, hi); + wrmsr(MSR_VIA_FCR, lo, hi); printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); } /* enable RNG unit, if present and disabled */ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { - rdmsr (MSR_VIA_RNG, lo, hi); + rdmsr(MSR_VIA_RNG, lo, hi); lo |= RNG_ENABLE; /* enable RNG unit */ - wrmsr (MSR_VIA_RNG, lo, hi); + wrmsr(MSR_VIA_RNG, lo, hi); printk(KERN_INFO "CPU: Enabled h/w RNG\n"); } @@ -288,15 +286,15 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) } /* Cyrix III family needs CX8 & PGE explicitly enabled. */ - if (c->x86_model >=6 && c->x86_model <= 9) { - rdmsr (MSR_VIA_FCR, lo, hi); + if (c->x86_model >= 6 && c->x86_model <= 9) { + rdmsr(MSR_VIA_FCR, lo, hi); lo |= (1<<1 | 1<<7); - wrmsr (MSR_VIA_FCR, lo, hi); + wrmsr(MSR_VIA_FCR, lo, hi); set_bit(X86_FEATURE_CX8, c->x86_capability); } /* Before Nehemiah, the C3's had 3dNOW! */ - if (c->x86_model >=6 && c->x86_model <9) + if (c->x86_model >= 6 && c->x86_model < 9) set_bit(X86_FEATURE_3DNOW, c->x86_capability); get_model_name(c); @@ -306,31 +304,31 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { enum { - ECX8=1<<1, - EIERRINT=1<<2, - DPM=1<<3, - DMCE=1<<4, - DSTPCLK=1<<5, - ELINEAR=1<<6, - DSMC=1<<7, - DTLOCK=1<<8, - EDCTLB=1<<8, - EMMX=1<<9, - DPDC=1<<11, - EBRPRED=1<<12, - DIC=1<<13, - DDC=1<<14, - DNA=1<<15, - ERETSTK=1<<16, - E2MMX=1<<19, - EAMD3D=1<<20, + ECX8 = 1<<1, + EIERRINT = 1<<2, + DPM = 1<<3, + DMCE = 1<<4, + DSTPCLK = 1<<5, + ELINEAR = 1<<6, + DSMC = 1<<7, + DTLOCK = 1<<8, + EDCTLB = 1<<8, + EMMX = 1<<9, + DPDC = 1<<11, + EBRPRED = 1<<12, + DIC = 1<<13, + DDC = 1<<14, + DNA = 1<<15, + ERETSTK = 1<<16, + E2MMX = 1<<19, + EAMD3D = 1<<20, }; char *name; - u32 fcr_set=0; - u32 fcr_clr=0; - u32 lo,hi,newlo; - u32 aa,bb,cc,dd; + u32 fcr_set = 0; + u32 fcr_clr = 0; + u32 lo, hi, newlo; + u32 aa, bb, cc, dd; /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ @@ -338,12 +336,12 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) switch (c->x86) { - case 5: - switch(c->x86_model) { + case 5: + switch (c->x86_model) { case 4: - name="C6"; - fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; - fcr_clr=DPDC; + name = "C6"; + fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; + fcr_clr = DPDC; printk(KERN_NOTICE "Disabling bugged TSC.\n"); clear_bit(X86_FEATURE_TSC, c->x86_capability); #ifdef CONFIG_X86_OOSTORE @@ -351,29 +349,29 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) /* Enable write combining on non-stack, non-string write combining on string, all types - weak write ordering - - The C6 original lacks weak read order - + weak write ordering + + The C6 original lacks weak read order + Note 0x120 is write only on Winchip 1 */ - + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif +#endif break; case 8: - switch(c->x86_mask) { + switch (c->x86_mask) { default: - name="2"; + name = "2"; break; case 7 ... 9: - name="2A"; + name = "2A"; break; case 10 ... 15: - name="2B"; + name = "2B"; break; } - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; + fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr = DPDC; #ifdef CONFIG_X86_OOSTORE winchip2_unprotect_mcr(); winchip2_create_optimal_mcr(); @@ -381,17 +379,17 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) /* Enable write combining on non-stack, non-string write combining on string, all types - weak write ordering + weak write ordering */ - lo|=31; + lo |= 31; wrmsr(MSR_IDT_MCR_CTRL, lo, hi); winchip2_protect_mcr(); #endif break; case 9: - name="3"; - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; + name = "3"; + fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr = DPDC; #ifdef CONFIG_X86_OOSTORE winchip2_unprotect_mcr(); winchip2_create_optimal_mcr(); @@ -399,50 +397,50 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) /* Enable write combining on non-stack, non-string write combining on string, all types - weak write ordering + weak write ordering */ - lo|=31; + lo |= 31; wrmsr(MSR_IDT_MCR_CTRL, lo, hi); winchip2_protect_mcr(); #endif break; default: - name="??"; + name = "??"; } rdmsr(MSR_IDT_FCR1, lo, hi); - newlo=(lo|fcr_set) & (~fcr_clr); + newlo = (lo|fcr_set) & (~fcr_clr); - if (newlo!=lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); - wrmsr(MSR_IDT_FCR1, newlo, hi ); + if (newlo != lo) { + printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo); + wrmsr(MSR_IDT_FCR1, newlo, hi); } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); + printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); } /* Emulate MTRRs using Centaur's MCR. */ set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); /* Report CX8 */ set_bit(X86_FEATURE_CX8, c->x86_capability); /* Set 3DNow! on Winchip 2 and above. */ - if (c->x86_model >=8) + if (c->x86_model >= 8) set_bit(X86_FEATURE_3DNOW, c->x86_capability); /* See if we can find out some more. */ - if ( cpuid_eax(0x80000000) >= 0x80000005 ) { + if (cpuid_eax(0x80000000) >= 0x80000005) { /* Yes, we can. */ - cpuid(0x80000005,&aa,&bb,&cc,&dd); + cpuid(0x80000005, &aa, &bb, &cc, &dd); /* Add L1 data and code cache sizes. */ c->x86_cache_size = (cc>>24)+(dd>>24); } - sprintf( c->x86_model_id, "WinChip %s", name ); + sprintf(c->x86_model_id, "WinChip %s", name); break; - case 6: + case 6: init_c3(c); break; } } -static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) @@ -451,8 +449,8 @@ static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigne /* VIA also screwed up Nehemiah stepping 1, and made it return '65KB' instead of '64KB' - Note, it seems this may only be in engineering samples. */ - if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) - size -=1; + if ((c->x86 == 6) && (c->x86_model == 9) && (c->x86_mask == 1) && (size == 65)) + size -= 1; return size; } -- cgit v1.2.3 From edc05e6de3e2fd203da21ba984b19d92e5398b62 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 18 Feb 2008 03:30:47 +0100 Subject: x86: more coding style fixes in centaur.c no code changed: arch/x86/kernel/cpu/centaur.o: text data bss dec hex filename 1031 324 0 1355 54b centaur.o.before 1031 324 0 1355 54b centaur.o.after md5: 4f306a7f980b58eb69c4bdcfcde565f1 centaur.o.before.asm 4f306a7f980b58eb69c4bdcfcde565f1 centaur.o.after.asm Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/centaur.c | 394 +++++++++++++++++++++--------------------- 1 file changed, 199 insertions(+), 195 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 710fe1ed0731..efe8da88da53 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,10 +1,12 @@ #include #include #include + #include #include #include #include + #include "cpu.h" #ifdef CONFIG_X86_OOSTORE @@ -12,16 +14,17 @@ static u32 __cpuinit power2(u32 x) { u32 s = 1; - while(s <= x) + + while (s <= x) s <<= 1; + return s >>= 1; } /* - * Set up an actual MCR + * Set up an actual MCR */ - static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) { u32 lo, hi; @@ -35,16 +38,15 @@ static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) } /* - * Figure what we can cover with MCR's + * Figure what we can cover with MCR's * - * Shortcut: We know you can't put 4Gig of RAM on a winchip + * Shortcut: We know you can't put 4Gig of RAM on a winchip */ - -static u32 __cpuinit ramtop(void) /* 16388 */ +static u32 __cpuinit ramtop(void) { - int i; - u32 top = 0; u32 clip = 0xFFFFFFFFUL; + u32 top = 0; + int i; for (i = 0; i < e820.nr_map; i++) { unsigned long start, end; @@ -52,13 +54,12 @@ static u32 __cpuinit ramtop(void) /* 16388 */ if (e820.map[i].addr > 0xFFFFFFFFUL) continue; /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophe already + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophe already */ - - if (e820.map[i].type == E820_RESERVED) - { - if (e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) + if (e820.map[i].type == E820_RESERVED) { + if (e820.map[i].addr >= 0x100000UL && + e820.map[i].addr < clip) clip = e820.map[i].addr; continue; } @@ -69,28 +70,27 @@ static u32 __cpuinit ramtop(void) /* 16388 */ if (end > top) top = end; } - /* Everything below 'top' should be RAM except for the ISA hole. - Because of the limited MCR's we want to map NV/ACPI into our - MCR range for gunk in RAM - - Clip might cause us to MCR insufficient RAM but that is an - acceptable failure mode and should only bite obscure boxes with - a VESA hole at 15Mb - - The second case Clip sometimes kicks in is when the EBDA is marked - as reserved. Again we fail safe with reasonable results - */ - - if(top > clip) + /* + * Everything below 'top' should be RAM except for the ISA hole. + * Because of the limited MCR's we want to map NV/ACPI into our + * MCR range for gunk in RAM + * + * Clip might cause us to MCR insufficient RAM but that is an + * acceptable failure mode and should only bite obscure boxes with + * a VESA hole at 15Mb + * + * The second case Clip sometimes kicks in is when the EBDA is marked + * as reserved. Again we fail safe with reasonable results + */ + if (top > clip) top = clip; return top; } /* - * Compute a set of MCR's to give maximum coverage + * Compute a set of MCR's to give maximum coverage */ - static int __cpuinit centaur_mcr_compute(int nr, int key) { u32 mem = ramtop(); @@ -100,33 +100,31 @@ static int __cpuinit centaur_mcr_compute(int nr, int key) u32 floor = 0; int ct = 0; - while (ct < nr) - { + while (ct < nr) { u32 fspace = 0; + u32 high; + u32 low; /* - * Find the largest block we will fill going upwards + * Find the largest block we will fill going upwards */ - - u32 high = power2(mem-top); + high = power2(mem-top); /* - * Find the largest block we will fill going downwards + * Find the largest block we will fill going downwards */ - - u32 low = base/2; + low = base/2; /* - * Don't fill below 1Mb going downwards as there - * is an ISA hole in the way. + * Don't fill below 1Mb going downwards as there + * is an ISA hole in the way. */ - if (base <= 1024*1024) low = 0; /* - * See how much space we could cover by filling below - * the ISA hole + * See how much space we could cover by filling below + * the ISA hole */ if (floor == 0) @@ -137,52 +135,48 @@ static int __cpuinit centaur_mcr_compute(int nr, int key) /* And forget ROM space */ /* - * Now install the largest coverage we get + * Now install the largest coverage we get */ - - if (fspace > high && fspace > low) - { + if (fspace > high && fspace > low) { centaur_mcr_insert(ct, floor, fspace, key); floor += fspace; - } - else if (high > low) { + } else if (high > low) { centaur_mcr_insert(ct, top, high, key); top += high; - } - else if (low > 0) { + } else if (low > 0) { base -= low; centaur_mcr_insert(ct, base, low, key); - } - else break; + } else + break; ct++; } /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. + * We loaded ct values. We now need to set the mask. The caller + * must do this bit. */ - return ct; } static void __cpuinit centaur_create_optimal_mcr(void) { + int used; int i; + /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining and weak write ordered. * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc + * To experiment with: Linux never uses stack operations for + * mmio spaces so we could globally enable stack operation wc * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. + * Load the registers with type 31 - full write combining, all + * writes weakly ordered. */ - int used = centaur_mcr_compute(6, 31); + used = centaur_mcr_compute(6, 31); /* - * Wipe unused MCRs + * Wipe unused MCRs */ - for (i = used; i < 8; i++) wrmsr(MSR_IDT_MCR0+i, 0, 0); } @@ -190,31 +184,30 @@ static void __cpuinit centaur_create_optimal_mcr(void) static void __cpuinit winchip2_create_optimal_mcr(void) { u32 lo, hi; + int used; int i; /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining, weak store ordered. * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining + * Load the registers with type 25 + * 8 - weak write ordering + * 16 - weak read ordering + * 1 - write combining */ - - int used = centaur_mcr_compute(6, 25); + used = centaur_mcr_compute(6, 25); /* - * Mark the registers we are using. + * Mark the registers we are using. */ - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); for (i = 0; i < used; i++) lo |= 1<<(9+i); wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* - * Wipe unused MCRs + * Wipe unused MCRs */ for (i = used; i < 8; i++) @@ -222,9 +215,8 @@ static void __cpuinit winchip2_create_optimal_mcr(void) } /* - * Handle the MCR key on the Winchip 2. + * Handle the MCR key on the Winchip 2. */ - static void __cpuinit winchip2_unprotect_mcr(void) { u32 lo, hi; @@ -301,28 +293,29 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) display_cacheinfo(c); } +enum { + ECX8 = 1<<1, + EIERRINT = 1<<2, + DPM = 1<<3, + DMCE = 1<<4, + DSTPCLK = 1<<5, + ELINEAR = 1<<6, + DSMC = 1<<7, + DTLOCK = 1<<8, + EDCTLB = 1<<8, + EMMX = 1<<9, + DPDC = 1<<11, + EBRPRED = 1<<12, + DIC = 1<<13, + DDC = 1<<14, + DNA = 1<<15, + ERETSTK = 1<<16, + E2MMX = 1<<19, + EAMD3D = 1<<20, +}; + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { - enum { - ECX8 = 1<<1, - EIERRINT = 1<<2, - DPM = 1<<3, - DMCE = 1<<4, - DSTPCLK = 1<<5, - ELINEAR = 1<<6, - DSMC = 1<<7, - DTLOCK = 1<<8, - EDCTLB = 1<<8, - EMMX = 1<<9, - DPDC = 1<<11, - EBRPRED = 1<<12, - DIC = 1<<13, - DDC = 1<<14, - DNA = 1<<15, - ERETSTK = 1<<16, - E2MMX = 1<<19, - EAMD3D = 1<<20, - }; char *name; u32 fcr_set = 0; @@ -330,126 +323,137 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) u32 lo, hi, newlo; u32 aa, bb, cc, dd; - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_bit(0*32+31, c->x86_capability); switch (c->x86) { - case 5: - switch (c->x86_model) { - case 4: - name = "C6"; - fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; - fcr_clr = DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); - clear_bit(X86_FEATURE_TSC, c->x86_capability); + switch (c->x86_model) { + case 4: + name = "C6"; + fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; + fcr_clr = DPDC; + printk(KERN_NOTICE "Disabling bugged TSC.\n"); + clear_bit(X86_FEATURE_TSC, c->x86_capability); #ifdef CONFIG_X86_OOSTORE - centaur_create_optimal_mcr(); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - - The C6 original lacks weak read order - - Note 0x120 is write only on Winchip 1 */ - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); + centaur_create_optimal_mcr(); + /* + * Enable: + * write combining on non-stack, non-string + * write combining on string, all types + * weak write ordering + * + * The C6 original lacks weak read order + * + * Note 0x120 is write only on Winchip 1 + */ + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); #endif + break; + case 8: + switch (c->x86_mask) { + default: + name = "2"; break; - case 8: - switch (c->x86_mask) { - default: - name = "2"; - break; - case 7 ... 9: - name = "2A"; - break; - case 10 ... 15: - name = "2B"; - break; - } - fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr = DPDC; + case 7 ... 9: + name = "2A"; + break; + case 10 ... 15: + name = "2B"; + break; + } + fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| + E2MMX|EAMD3D; + fcr_clr = DPDC; #ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* + * Enable: + * write combining on non-stack, non-string + * write combining on string, all types + * weak write ordering + */ + lo |= 31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); #endif - break; - case 9: - name = "3"; - fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr = DPDC; + break; + case 9: + name = "3"; + fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| + E2MMX|EAMD3D; + fcr_clr = DPDC; #ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* + * Enable: + * write combining on non-stack, non-string + * write combining on string, all types + * weak write ordering + */ + lo |= 31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); #endif - break; - default: - name = "??"; - } + break; + default: + name = "??"; + } - rdmsr(MSR_IDT_FCR1, lo, hi); - newlo = (lo|fcr_set) & (~fcr_clr); + rdmsr(MSR_IDT_FCR1, lo, hi); + newlo = (lo|fcr_set) & (~fcr_clr); - if (newlo != lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo); - wrmsr(MSR_IDT_FCR1, newlo, hi); - } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); - } - /* Emulate MTRRs using Centaur's MCR. */ - set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); - /* Report CX8 */ - set_bit(X86_FEATURE_CX8, c->x86_capability); - /* Set 3DNow! on Winchip 2 and above. */ - if (c->x86_model >= 8) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - /* See if we can find out some more. */ - if (cpuid_eax(0x80000000) >= 0x80000005) { - /* Yes, we can. */ - cpuid(0x80000005, &aa, &bb, &cc, &dd); - /* Add L1 data and code cache sizes. */ - c->x86_cache_size = (cc>>24)+(dd>>24); - } - sprintf(c->x86_model_id, "WinChip %s", name); - break; + if (newlo != lo) { + printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", + lo, newlo); + wrmsr(MSR_IDT_FCR1, newlo, hi); + } else { + printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); + } + /* Emulate MTRRs using Centaur's MCR. */ + set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); + /* Report CX8 */ + set_bit(X86_FEATURE_CX8, c->x86_capability); + /* Set 3DNow! on Winchip 2 and above. */ + if (c->x86_model >= 8) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + /* See if we can find out some more. */ + if (cpuid_eax(0x80000000) >= 0x80000005) { + /* Yes, we can. */ + cpuid(0x80000005, &aa, &bb, &cc, &dd); + /* Add L1 data and code cache sizes. */ + c->x86_cache_size = (cc>>24)+(dd>>24); + } + sprintf(c->x86_model_id, "WinChip %s", name); + break; case 6: - init_c3(c); - break; + init_c3(c); + break; } } -static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) +static unsigned int __cpuinit +centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) size >>= 8; - /* VIA also screwed up Nehemiah stepping 1, and made - it return '65KB' instead of '64KB' - - Note, it seems this may only be in engineering samples. */ - if ((c->x86 == 6) && (c->x86_model == 9) && (c->x86_mask == 1) && (size == 65)) + /* + * There's also an erratum in Nehemiah stepping 1, which + * returns '65KB' instead of '64KB' + * - Note, it seems this may only be in engineering samples. + */ + if ((c->x86 == 6) && (c->x86_model == 9) && + (c->x86_mask == 1) && (size == 65)) size -= 1; return size; -- cgit v1.2.3 From 325f86ec6d2ae4ab9879e745a92444f8bac20233 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Tue, 19 Feb 2008 21:02:16 +0100 Subject: x86: coding style fixes to arch/x86/kernel/syscall_64.c After the patch: total: 0 errors, 1 warnings, 29 lines checked no code changed: arch/x86/kernel/syscall_64.o: text data bss dec hex filename 2304 0 0 2304 900 syscall_64.o.before 2304 0 0 2304 900 syscall_64.o.after md5: 0fdbb875cde8892296585226b92f4333 syscall_64.o.before.asm 0fdbb875cde8892296585226b92f4333 syscall_64.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/syscall_64.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 9d498c2f8eea..170d43c17487 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -1,4 +1,4 @@ -/* System call table for x86-64. */ +/* System call table for x86-64. */ #include #include @@ -7,20 +7,23 @@ #define __NO_STUBS -#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; +#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; #undef _ASM_X86_64_UNISTD_H_ #include #undef __SYSCALL -#define __SYSCALL(nr, sym) [ nr ] = sym, +#define __SYSCALL(nr, sym) [nr] = sym, #undef _ASM_X86_64_UNISTD_H_ -typedef void (*sys_call_ptr_t)(void); +typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { - /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ + /* + *Smells like a like a compiler bug -- it doesn't work + *when the & below is removed. + */ [0 ... __NR_syscall_max] = &sys_ni_syscall, #include }; -- cgit v1.2.3 From 1577720524bab104eeb605c810963a2106cf4575 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Tue, 19 Feb 2008 23:20:45 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/p5.c The patch make the file errors free. Only 4 "WARNING: line over 80 characters" left. arch/x86/kernel/cpu/mcheck/p5.o: text data bss dec hex filename 452 0 4 456 1c8 p5.o.before 452 0 4 456 1c8 p5.o.after md5: 50c945ef150aa95bf0481cc3e1dc3315 p5.o.before.asm 50c945ef150aa95bf0481cc3e1dc3315 p5.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/p5.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index a18310aaae0c..bfa5817afdda 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -9,20 +9,20 @@ #include #include -#include +#include #include #include #include "mce.h" /* Machine check handler for Pentium class Intel */ -static void pentium_machine_check(struct pt_regs * regs, long error_code) +static void pentium_machine_check(struct pt_regs *regs, long error_code) { u32 loaddr, hi, lotype; rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); - if(lotype&(1<<5)) + if (lotype&(1<<5)) printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); add_taint(TAINT_MACHINE_CHECK); } @@ -31,13 +31,13 @@ static void pentium_machine_check(struct pt_regs * regs, long error_code) void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; - + /*Check for MCE support */ - if( !cpu_has(c, X86_FEATURE_MCE) ) - return; + if (!cpu_has(c, X86_FEATURE_MCE)) + return; /* Default P5 to off as its often misconnected */ - if(mce_disabled != -1) + if (mce_disabled != -1) return; machine_check_vector = pentium_machine_check; wmb(); @@ -47,7 +47,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_P5_MC_TYPE, l, h); printk(KERN_INFO "Intel old style machine check architecture supported.\n"); - /* Enable MCE */ + /* Enable MCE */ set_in_cr4(X86_CR4_MCE); printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); } -- cgit v1.2.3 From 8000a83f43f2d943eebeee9e070beb45949858ec Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Tue, 19 Feb 2008 23:34:02 +0100 Subject: x86: coding style fixes to arch/x86/kernel/x8664_ksyms_64.c arch/x86/kernel/x8664_ksyms_64.o: text data bss dec hex filename 0 0 0 0 0 x8664_ksyms_64.o.before 0 0 0 0 0 x8664_ksyms_64.o.after md5: 2dd2d82a2b440a3c29b9ac9ce3221994 x8664_ksyms_64.o.before.asm 2dd2d82a2b440a3c29b9ac9ce3221994 x8664_ksyms_64.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/x8664_ksyms_64.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a66e9c1a0537..e63d96823a16 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -35,15 +35,17 @@ EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); -/* Export string functions. We normally rely on gcc builtin for most of these, - but gcc sometimes decides not to inline them. */ +/* + * Export string functions. We normally rely on gcc builtin for most of these, + * but gcc sometimes decides not to inline them. + */ #undef memcpy #undef memset #undef memmove -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -extern void * __memcpy(void *,const void *,__kernel_size_t); +extern void *memset(void *, int, __kernel_size_t); +extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); -- cgit v1.2.3 From c99aa3804eecbeadabcf658a535e8a00d0f2b6e6 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Wed, 20 Feb 2008 00:18:05 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/nexgen.c arch/x86/kernel/cpu/nexgen.o: text data bss dec hex filename 111 316 0 427 1ab nexgen.o.before 111 316 0 427 1ab nexgen.o.after md5: e796efefea9ebc6644338bad226599ee nexgen.o.before.asm e796efefea9ebc6644338bad226599ee nexgen.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/nexgen.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c index 961fbe1a748f..5d5e1c134123 100644 --- a/arch/x86/kernel/cpu/nexgen.c +++ b/arch/x86/kernel/cpu/nexgen.c @@ -9,11 +9,11 @@ * Detect a NexGen CPU running without BIOS hypercode new enough * to have CPUID. (Thanks to Herbert Oppmann) */ - + static int __cpuinit deep_magic_nexgen_probe(void) { int ret; - + __asm__ __volatile__ ( " movw $0x5555, %%ax\n" " xorw %%dx,%%dx\n" @@ -22,22 +22,21 @@ static int __cpuinit deep_magic_nexgen_probe(void) " movl $0, %%eax\n" " jnz 1f\n" " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx" ); + "1:\n" + : "=a" (ret) : : "cx", "dx"); return ret; } -static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) +static void __cpuinit init_nexgen(struct cpuinfo_x86 *c) { c->x86_cache_size = 256; /* A few had 1 MB... */ } -static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) +static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c) { /* Detect NexGen with old hypercode */ - if ( deep_magic_nexgen_probe() ) { + if (deep_magic_nexgen_probe()) strcpy(c->x86_vendor_id, "NexGenDriven"); - } } static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { -- cgit v1.2.3 From 8fa6878ffc6366f490e99a1ab31127fb599657c9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 20 Feb 2008 10:41:51 -0800 Subject: x86: split cpuinfo from setup_64.c into cpu/proc_64.c x86 /proc/cpuinfo code can be unified. This is the first step of unification. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/proc_64.c | 126 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_64.c | 120 ---------------------------------------- 3 files changed, 127 insertions(+), 120 deletions(-) create mode 100644 arch/x86/kernel/cpu/proc_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0c4d7c5dbd7..8ba7d281fbc2 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o obj-$(CONFIG_X86_32) += nexgen.o obj-$(CONFIG_X86_32) += umc.o +obj-$(CONFIG_X86_64) += proc_64.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/proc_64.c b/arch/x86/kernel/cpu/proc_64.c new file mode 100644 index 000000000000..bf4a94b4b0f0 --- /dev/null +++ b/arch/x86/kernel/cpu/proc_64.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include + +/* + * Get CPU information for use by the procfs. + */ + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + struct cpuinfo_x86 *c = v; + int cpu = 0, i; + +#ifdef CONFIG_SMP + cpu = c->cpu_index; +#endif + + seq_printf(m, "processor\t: %u\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n", + (unsigned)cpu, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + (int)c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if (cpu_has(c, X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get((unsigned)cpu); + + if (!freq) + freq = cpu_khz; + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", + freq / 1000, (freq % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + +#ifdef CONFIG_SMP + if (smp_num_siblings * c->x86_max_cores > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif + + seq_printf(m, + "fpu\t\t: yes\n" + "fpu_exception\t: yes\n" + "cpuid level\t: %d\n" + "wp\t\t: yes\n" + "flags\t\t:", + c->cpuid_level); + + for (i = 0; i < 32*NCAPINTS; i++) + if (cpu_has(c, i) && x86_cap_flags[i] != NULL) + seq_printf(m, " %s", x86_cap_flags[i]); + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + + if (c->x86_tlbsize > 0) + seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); + seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size); + seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); + + seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", + c->x86_phys_bits, c->x86_virt_bits); + + seq_printf(m, "power management:"); + for (i = 0; i < 32; i++) { + if (c->x86_power & (1 << i)) { + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); + else + seq_printf(m, " [%d]", i); + } + } + + seq_printf(m, "\n\n"); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_online_map); + if ((*pos) < NR_CPUS && cpu_online(*pos)) + return &cpu_data(*pos); + return NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + *pos = next_cpu(*pos, cpu_online_map); + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index e67925674eae..187f084b9491 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -1068,123 +1068,3 @@ static __init int setup_disablecpuid(char *arg) return 1; } __setup("clearcpuid=", setup_disablecpuid); - -/* - * Get CPU information for use by the procfs. - */ - -static int show_cpuinfo(struct seq_file *m, void *v) -{ - struct cpuinfo_x86 *c = v; - int cpu = 0, i; - -#ifdef CONFIG_SMP - cpu = c->cpu_index; -#endif - - seq_printf(m, "processor\t: %u\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - (unsigned)cpu, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - (int)c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)cpu); - - if (!freq) - freq = cpu_khz; - seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - -#ifdef CONFIG_SMP - if (smp_num_siblings * c->x86_max_cores > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif - - seq_printf(m, - "fpu\t\t: yes\n" - "fpu_exception\t: yes\n" - "cpuid level\t: %d\n" - "wp\t\t: yes\n" - "flags\t\t:", - c->cpuid_level); - - for (i = 0; i < 32*NCAPINTS; i++) - if (cpu_has(c, i) && x86_cap_flags[i] != NULL) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - - if (c->x86_tlbsize > 0) - seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); - seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size); - seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); - - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", - c->x86_phys_bits, c->x86_virt_bits); - - seq_printf(m, "power management:"); - for (i = 0; i < 32; i++) { - if (c->x86_power & (1 << i)) { - if (i < ARRAY_SIZE(x86_power_flags) && - x86_power_flags[i]) - seq_printf(m, "%s%s", - x86_power_flags[i][0]?" ":"", - x86_power_flags[i]); - else - seq_printf(m, " [%d]", i); - } - } - - seq_printf(m, "\n\n"); - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = first_cpu(cpu_online_map); - if ((*pos) < NR_CPUS && cpu_online(*pos)) - return &cpu_data(*pos); - return NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - *pos = next_cpu(*pos, cpu_online_map); - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; -- cgit v1.2.3 From a967ceac01cd3847011e2a777b8365b30afa770a Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 20 Feb 2008 10:45:29 -0800 Subject: x86: make cpu/proc|_64.c similar clean up for unification. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/proc.c | 120 ++++++++++++++++++++++++------------------ arch/x86/kernel/cpu/proc_64.c | 63 ++++++++++++---------- 2 files changed, 105 insertions(+), 78 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index af11d31dce0a..9bc3b04421cd 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -8,75 +8,90 @@ /* * Get CPU information for use by the procfs. */ +static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, + unsigned int cpu) +{ +#ifdef CONFIG_X86_HT + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif +} + +static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) +{ + /* + * We use exception 16 if we have hardware math and we've either seen + * it or the CPU claims it is internal + */ + int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); + seq_printf(m, + "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); +} + static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; - int i, n = 0; - int fpu_exception; + unsigned int cpu = 0; + int i; #ifdef CONFIG_SMP - n = c->cpu_index; + cpu = c->cpu_index; #endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - n, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); + seq_printf(m, "processor\t: %u\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %u\n" + "model name\t: %s\n", + cpu, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); if (c->x86_mask || c->cpuid_level >= 0) seq_printf(m, "stepping\t: %d\n", c->x86_mask); else seq_printf(m, "stepping\t: unknown\n"); - if ( cpu_has(c, X86_FEATURE_TSC) ) { - unsigned int freq = cpufreq_quick_get(n); + if (cpu_has(c, X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get(cpu); + if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); -#ifdef CONFIG_X86_HT - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, n))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, c->x86_capability) && - x86_cap_flags[i] != NULL ) + + show_cpuinfo_core(m, c, cpu); + show_cpuinfo_misc(m, c); + + seq_printf(m, "flags\t\t:"); + for (i = 0; i < 32*NCAPINTS; i++) + if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); for (i = 0; i < 32; i++) @@ -91,8 +106,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); return 0; @@ -106,14 +121,17 @@ static void *c_start(struct seq_file *m, loff_t *pos) return &cpu_data(*pos); return NULL; } + static void *c_next(struct seq_file *m, void *v, loff_t *pos) { *pos = next_cpu(*pos, cpu_online_map); return c_start(m, pos); } + static void c_stop(struct seq_file *m, void *v) { } + const struct seq_operations cpuinfo_op = { .start = c_start, .next = c_next, diff --git a/arch/x86/kernel/cpu/proc_64.c b/arch/x86/kernel/cpu/proc_64.c index bf4a94b4b0f0..ce1b08f96820 100644 --- a/arch/x86/kernel/cpu/proc_64.c +++ b/arch/x86/kernel/cpu/proc_64.c @@ -8,25 +8,48 @@ /* * Get CPU information for use by the procfs. */ +static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, + unsigned int cpu) +{ +#ifdef CONFIG_SMP + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif +} + +static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) +{ + seq_printf(m, + "fpu\t\t: yes\n" + "fpu_exception\t: yes\n" + "cpuid level\t: %d\n" + "wp\t\t: yes\n", + c->cpuid_level); +} static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; - int cpu = 0, i; + unsigned int cpu = 0; + int i; #ifdef CONFIG_SMP cpu = c->cpu_index; #endif - seq_printf(m, "processor\t: %u\n" "vendor_id\t: %s\n" "cpu family\t: %d\n" - "model\t\t: %d\n" + "model\t\t: %u\n" "model name\t: %s\n", - (unsigned)cpu, + cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, - (int)c->x86_model, + c->x86_model, c->x86_model_id[0] ? c->x86_model_id : "unknown"); if (c->x86_mask || c->cpuid_level >= 0) @@ -35,7 +58,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)cpu); + unsigned int freq = cpufreq_quick_get(cpu); if (!freq) freq = cpu_khz; @@ -47,24 +70,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); -#ifdef CONFIG_SMP - if (smp_num_siblings * c->x86_max_cores > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif - - seq_printf(m, - "fpu\t\t: yes\n" - "fpu_exception\t: yes\n" - "cpuid level\t: %d\n" - "wp\t\t: yes\n" - "flags\t\t:", - c->cpuid_level); + show_cpuinfo_core(m, c, cpu); + show_cpuinfo_misc(m, c); + seq_printf(m, "flags\t\t:"); for (i = 0; i < 32*NCAPINTS; i++) if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); @@ -119,8 +128,8 @@ static void c_stop(struct seq_file *m, void *v) } const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, }; -- cgit v1.2.3 From f84c3a429f83a98bb0b0fd7eed7ad1edc512b91c Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 20 Feb 2008 10:47:12 -0800 Subject: x86: add power management line in /proc/cpuinfo Change /proc/cpuinfo on 32-bit, it will look like on 64-bit. 'power management' line is added and power management information will be printed at the line. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/proc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 9bc3b04421cd..fd3823a18c0b 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -94,7 +94,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); - for (i = 0; i < 32; i++) + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); + + seq_printf(m, "power management:"); + for (i = 0; i < 32; i++) { if (c->x86_power & (1 << i)) { if (i < ARRAY_SIZE(x86_power_flags) && x86_power_flags[i]) @@ -104,11 +110,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) else seq_printf(m, " [%d]", i); } + } - seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); + seq_printf(m, "\n\n"); return 0; } -- cgit v1.2.3 From 2aef77204e1e3a8ed6345727afbcb2c1efdf7fc0 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 20 Feb 2008 10:48:02 -0800 Subject: x86: cosmetic unification cpu/proc|_64.c make cpu/proc.c and cpu/proc_64.c same. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/proc.c | 36 +++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/proc_64.c | 49 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index fd3823a18c0b..15043a335ef1 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -8,6 +8,7 @@ /* * Get CPU information for use by the procfs. */ +#ifdef CONFIG_X86_32 static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, unsigned int cpu) { @@ -47,6 +48,31 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) c->cpuid_level, c->wp_works_ok ? "yes" : "no"); } +#else +static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, + unsigned int cpu) +{ +#ifdef CONFIG_SMP + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif +} + +static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) +{ + seq_printf(m, + "fpu\t\t: yes\n" + "fpu_exception\t: yes\n" + "cpuid level\t: %d\n" + "wp\t\t: yes\n", + c->cpuid_level); +} +#endif static int show_cpuinfo(struct seq_file *m, void *v) { @@ -97,7 +123,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "\nbogomips\t: %lu.%02lu\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); + +#ifdef CONFIG_X86_64 + if (c->x86_tlbsize > 0) + seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); +#endif seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); +#ifdef CONFIG_X86_64 + seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); + seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", + c->x86_phys_bits, c->x86_virt_bits); +#endif seq_printf(m, "power management:"); for (i = 0; i < 32; i++) { diff --git a/arch/x86/kernel/cpu/proc_64.c b/arch/x86/kernel/cpu/proc_64.c index ce1b08f96820..15043a335ef1 100644 --- a/arch/x86/kernel/cpu/proc_64.c +++ b/arch/x86/kernel/cpu/proc_64.c @@ -8,6 +8,47 @@ /* * Get CPU information for use by the procfs. */ +#ifdef CONFIG_X86_32 +static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, + unsigned int cpu) +{ +#ifdef CONFIG_X86_HT + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif +} + +static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) +{ + /* + * We use exception 16 if we have hardware math and we've either seen + * it or the CPU claims it is internal + */ + int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); + seq_printf(m, + "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); +} +#else static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, unsigned int cpu) { @@ -31,6 +72,7 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) "wp\t\t: yes\n", c->cpuid_level); } +#endif static int show_cpuinfo(struct seq_file *m, void *v) { @@ -82,13 +124,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); +#ifdef CONFIG_X86_64 if (c->x86_tlbsize > 0) seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); - seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size); +#endif + seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); +#ifdef CONFIG_X86_64 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", c->x86_phys_bits, c->x86_virt_bits); +#endif seq_printf(m, "power management:"); for (i = 0; i < 32; i++) { -- cgit v1.2.3 From eb19067d160416cd61fc92a8913ccfb3497b20b7 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 20 Feb 2008 10:48:55 -0800 Subject: x86: unify cpu/proc|_64.c Now cpu/proc.c and cpu/proc_64.c are same. So cpu/proc_64.c can be removed. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 5 +- arch/x86/kernel/cpu/proc_64.c | 180 ------------------------------------------ 2 files changed, 2 insertions(+), 183 deletions(-) delete mode 100644 arch/x86/kernel/cpu/proc_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 8ba7d281fbc2..ee7c45235e54 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,9 +3,9 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += feature_names.o +obj-y += proc.o feature_names.o -obj-$(CONFIG_X86_32) += common.o proc.o bugs.o +obj-$(CONFIG_X86_32) += common.o bugs.o obj-$(CONFIG_X86_32) += amd.o obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o @@ -13,7 +13,6 @@ obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o obj-$(CONFIG_X86_32) += nexgen.o obj-$(CONFIG_X86_32) += umc.o -obj-$(CONFIG_X86_64) += proc_64.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/proc_64.c b/arch/x86/kernel/cpu/proc_64.c deleted file mode 100644 index 15043a335ef1..000000000000 --- a/arch/x86/kernel/cpu/proc_64.c +++ /dev/null @@ -1,180 +0,0 @@ -#include -#include -#include -#include -#include -#include - -/* - * Get CPU information for use by the procfs. - */ -#ifdef CONFIG_X86_32 -static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, - unsigned int cpu) -{ -#ifdef CONFIG_X86_HT - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif -} - -static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) -{ - /* - * We use exception 16 if we have hardware math and we've either seen - * it or the CPU claims it is internal - */ - int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); - seq_printf(m, - "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); -} -#else -static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, - unsigned int cpu) -{ -#ifdef CONFIG_SMP - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif -} - -static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) -{ - seq_printf(m, - "fpu\t\t: yes\n" - "fpu_exception\t: yes\n" - "cpuid level\t: %d\n" - "wp\t\t: yes\n", - c->cpuid_level); -} -#endif - -static int show_cpuinfo(struct seq_file *m, void *v) -{ - struct cpuinfo_x86 *c = v; - unsigned int cpu = 0; - int i; - -#ifdef CONFIG_SMP - cpu = c->cpu_index; -#endif - seq_printf(m, "processor\t: %u\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %u\n" - "model name\t: %s\n", - cpu, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get(cpu); - - if (!freq) - freq = cpu_khz; - seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - - show_cpuinfo_core(m, c, cpu); - show_cpuinfo_misc(m, c); - - seq_printf(m, "flags\t\t:"); - for (i = 0; i < 32*NCAPINTS; i++) - if (cpu_has(c, i) && x86_cap_flags[i] != NULL) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - -#ifdef CONFIG_X86_64 - if (c->x86_tlbsize > 0) - seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); -#endif - seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); -#ifdef CONFIG_X86_64 - seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", - c->x86_phys_bits, c->x86_virt_bits); -#endif - - seq_printf(m, "power management:"); - for (i = 0; i < 32; i++) { - if (c->x86_power & (1 << i)) { - if (i < ARRAY_SIZE(x86_power_flags) && - x86_power_flags[i]) - seq_printf(m, "%s%s", - x86_power_flags[i][0]?" ":"", - x86_power_flags[i]); - else - seq_printf(m, " [%d]", i); - } - } - - seq_printf(m, "\n\n"); - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = first_cpu(cpu_online_map); - if ((*pos) < NR_CPUS && cpu_online(*pos)) - return &cpu_data(*pos); - return NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - *pos = next_cpu(*pos, cpu_online_map); - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; -- cgit v1.2.3 From 2c5847837fe76497934734330151f240f3e04925 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Thu, 21 Feb 2008 00:10:54 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/p6.c Before: total: 16 errors, 13 warnings, 122 lines checked After: total: 0 errors, 0 warnings, 122 lines checked No code changed: arch/x86/kernel/cpu/mcheck/p6.o: text data bss dec hex filename 1082 0 8 1090 442 p6.o.before 1082 0 8 1090 442 p6.o.after md5: 4e283fbc1b68240f1724d9725007d379 p6.o.before.asm 4e283fbc1b68240f1724d9725007d379 p6.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/p6.c | 48 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 74342604d30e..62efc9c2b3af 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -9,23 +9,23 @@ #include #include -#include +#include #include #include #include "mce.h" /* Machine Check Handler For PII/PIII */ -static void intel_machine_check(struct pt_regs * regs, long error_code) +static void intel_machine_check(struct pt_regs *regs, long error_code) { - int recover=1; + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; int i; - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; + recover = 0; printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); @@ -55,30 +55,30 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) } if (recover & 2) - panic ("CPU context corrupt"); + panic("CPU context corrupt"); if (recover & 1) - panic ("Unable to continue"); + panic("Unable to continue"); - printk (KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not + printk(KERN_EMERG "Attempting to continue.\n"); + /* + * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs * for errors if the OS could not log the error. */ - for (i=0; i Date: Thu, 21 Feb 2008 00:18:34 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/umc.c Before: total: 3 errors, 1 warnings, 23 lines checked After: total: 0 errors, 0 warnings, 25 lines checked No code changed: arch/x86/kernel/cpu/umc.o: text data bss dec hex filename 24 616 0 640 280 umc.o.before 24 616 0 640 280 umc.o.after md5: e8daa3eaed0963a0cdd2e83c2e1f9823 umc.o.before.asm e8daa3eaed0963a0cdd2e83c2e1f9823 umc.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/umc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1acf08245fb..b1fc90989d75 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -3,17 +3,19 @@ #include #include "cpu.h" -/* UMC chips appear to be only either 386 or 486, so no special init takes place. +/* + * UMC chips appear to be only either 386 or 486, + * so no special init takes place. */ static struct cpu_dev umc_cpu_dev __cpuinitdata = { .c_vendor = "UMC", - .c_ident = { "UMC UMC UMC" }, + .c_ident = { "UMC UMC UMC" }, .c_models = { { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = - { - [1] = "U5D", - [2] = "U5S", + { + [1] = "U5D", + [2] = "U5S", } }, }, -- cgit v1.2.3 From 513ad84bf60d96a6998bca10ed07c3d340449be8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Feb 2008 05:18:40 +0100 Subject: x86: de-macro start_thread() Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 15 +++++++++++++++ arch/x86/kernel/process_64.c | 15 +++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2cd89b8a7050..9230ce060d09 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -512,6 +512,21 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, return err; } +void +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) +{ + __asm__("movl %0, %%gs" :: "r"(0)); + regs->fs = 0; + set_fs(USER_DS); + regs->ds = __USER_DS; + regs->es = __USER_DS; + regs->ss = __USER_DS; + regs->cs = __USER_CS; + regs->ip = new_ip; + regs->sp = new_sp; +} +EXPORT_SYMBOL_GPL(start_thread); + #ifdef CONFIG_SECCOMP static void hard_disable_TSC(void) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 46c4c546b499..1ffce14cff6e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -528,6 +528,21 @@ out: return err; } +void +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) +{ + asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0)); + load_gs_index(0); + regs->ip = new_ip; + regs->sp = new_sp; + write_pda(oldrsp, new_sp); + regs->cs = __USER_CS; + regs->ss = __USER_DS; + regs->flags = 0x200; + set_fs(USER_DS); +} +EXPORT_SYMBOL_GPL(start_thread); + /* * This special macro can be used to load a debugging register */ -- cgit v1.2.3 From 322850af8d93735f67b8ebf84bb1350639be3f34 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 23 Feb 2008 21:48:42 -0800 Subject: x86: make amd quad core 8 socket system not be clustered_box, #2 quad core 8 socket system will have apic id lifting.the apic id range could be [4, 0x23]. and apic_is_clustered_box will think that need to three clusters and that is large than 2. So it is treated as clustered_box. and will get Marking TSC unstable due to TSCs unsynchronized even the CPUs have X86_FEATURE_CONSTANT_TSC set. this patch will check if the cpu is from AMD. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d8d03e09dea2..7d8ffdaa0ab3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1180,9 +1180,19 @@ __cpuinit int apic_is_clustered_box(void) { int i, clusters, zeros; unsigned id; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + u16 *bios_cpu_apicid; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + /* + * there is not this kind of box with AMD CPU yet. + * Some AMD box with quadcore cpu and 8 sockets apicid + * will be [4, 0x23] or [8, 0x27] could be thought to + * have three apic_clusters. So go out early. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return 0; + + bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { -- cgit v1.2.3 From 700efc1b9f6afe34caae231b87d129ad8ffb559f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 23 Feb 2008 09:58:20 +0100 Subject: x86: introduce kernel/head32.c Copy x86_64 and add a head32.c so we can start moving early architecture initialization out of assembly. [ Sam Ravnborg : updated it to x86 ] Signed-off-by: Eric W. Biederman Signed-off-by: Sam Ravnborg Cc: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/head32.c | 14 ++++++++++++++ arch/x86/kernel/head_32.S | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kernel/head32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 80e6695a12a3..df10327182d4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -2,8 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o init_task.o vmlinux.lds -extra-$(CONFIG_X86_64) += head64.o +extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c new file mode 100644 index 000000000000..3db059058927 --- /dev/null +++ b/arch/x86/kernel/head32.c @@ -0,0 +1,14 @@ +/* + * linux/arch/i386/kernel/head32.c -- prepare to run common code + * + * Copyright (C) 2000 Andrea Arcangeli SuSE + * Copyright (C) 2007 Eric Biederman + */ + +#include +#include + +void __init i386_start_kernel(void) +{ + start_kernel(); +} diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 74d87ea85b5c..826988a6e964 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -450,7 +450,7 @@ is386: movl $2,%ecx # set MP jmp initialize_secondary # all other CPUs call initialize_secondary 1: #endif /* CONFIG_SMP */ - jmp start_kernel + jmp i386_start_kernel /* * We depend on ET to be correct. This checks for 287/387. -- cgit v1.2.3 From 3def3d6ddf43dbe20c00c3cbc38dfacc8586998f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 22 Feb 2008 17:07:16 -0800 Subject: x86: clean up e820_reserve_resources on 64-bit e820_resource_resources could use insert_resource instead of request_resource also move code_resource, data_resource, bss_resource, and crashk_res out of e820_reserve_resources. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 19 ++----------------- arch/x86/kernel/setup_64.c | 8 +++++++- 2 files changed, 9 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index a8694a35352b..8b914a833ac6 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -229,8 +229,7 @@ unsigned long __init e820_end_of_ram(void) /* * Mark e820 reserved areas as busy for the resource manager. */ -void __init e820_reserve_resources(struct resource *code_resource, - struct resource *data_resource, struct resource *bss_resource) +void __init e820_reserve_resources(void) { int i; for (i = 0; i < e820.nr_map; i++) { @@ -245,21 +244,7 @@ void __init e820_reserve_resources(struct resource *code_resource, res->start = e820.map[i].addr; res->end = res->start + e820.map[i].size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - request_resource(&iomem_resource, res); - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); - request_resource(res, bss_resource); -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) - request_resource(res, &crashk_res); -#endif - } + insert_resource(&iomem_resource, res); } } diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 187f084b9491..e3cb3ea96ca1 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -248,6 +248,7 @@ static void __init reserve_crashkernel(void) (unsigned long)(total_mem >> 20)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); } } #else @@ -322,6 +323,11 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); + /* after parse_early_param, so could debug it */ + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); + early_gart_iommu_check(); e820_register_active_regions(0, 0, -1UL); @@ -454,7 +460,7 @@ void __init setup_arch(char **cmdline_p) /* * We trust e820 completely. No explicit ROM probing in memory. */ - e820_reserve_resources(&code_resource, &data_resource, &bss_resource); + e820_reserve_resources(); e820_mark_nosave_regions(); /* request I/O space for devices used on all i[345]86 PCs */ -- cgit v1.2.3 From 1e934dda0c77c8ad13fdda02074f2cfcea118a56 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 22 Feb 2008 13:37:26 -0800 Subject: x86: insert_resorce for lapic addr after e820_reserve_resources when comparing the e820 direct from BIOS, and the one by kexec: BIOS-provided physical RAM map: - BIOS-e820: 0000000000000000 - 0000000000097400 (usable) + BIOS-e820: 0000000000000100 - 0000000000097400 (usable) BIOS-e820: 0000000000097400 - 00000000000a0000 (reserved) BIOS-e820: 00000000000e6000 - 0000000000100000 (reserved) BIOS-e820: 0000000000100000 - 00000000dffa0000 (usable) - BIOS-e820: 00000000dffae000 - 00000000dffb0000 type 9 + BIOS-e820: 00000000dffae000 - 00000000dffb0000 (reserved) BIOS-e820: 00000000dffb0000 - 00000000dffbe000 (ACPI data) BIOS-e820: 00000000dffbe000 - 00000000dfff0000 (ACPI NVS) BIOS-e820: 00000000dfff0000 - 00000000e0000000 (reserved) BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved) - BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) =======> that is the local apic address... somewhere we lost it BIOS-e820: 00000000ff700000 - 0000000100000000 (reserved) BIOS-e820: 0000000100000000 - 0000004020000000 (usable) found one entry about reserved is missing for the kernel by kexec. it turns out init_apic_mappings is called before e820_reserve_resources in setup_arch. but e820_reserve_resources is using request_resource. it will not handle the conflicts. there are three ways to fix it: 1. change request_resource in e820_reserve_resources to to insert_resource 2. move init_apic_mappings after e820_reserve_resources 3. use late_initcall to insert lapic resource. this patch is using method 3, that is less intrusive. in later version could consider to use method 1. before patch fed20000-ffffffff : PCI Bus #00 fee00000-fee00fff : Local APIC fefff000-feffffff : pnp 00:09 ff700000-ffffffff : reserved with patch will get map in first kernel fed20000-ffffffff : PCI Bus #00 fee00000-fee00fff : Local APIC fee00000-fee00fff : reserved fefff000-feffffff : pnp 00:09 ff700000-ffffffff : reserved Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7d8ffdaa0ab3..ac2405ed504d 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -881,11 +881,6 @@ void __init init_apic_mappings(void) apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). @@ -1300,3 +1295,21 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); +static int __init lapic_insert_resource(void) +{ + if (!apic_phys) + return -1; + + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + + return 0; +} + +/* + * need call insert after e820_reserve_resources() + * that is using request_resource + */ +late_initcall(lapic_insert_resource); -- cgit v1.2.3 From 60e11746d97c099b305e25e587731148387d02eb Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:09:34 +0100 Subject: x86: coding style fixes to arch/x86/kernel/summit_32.c Before: total: 20 errors, 17 warnings, 180 lines checked After: total: 0 errors, 17 warnings, 183 lines checked No code changed: arch/x86/kernel/summit_32.o: text data bss dec hex filename 932 192 0 1124 464 summit_32.o.before 932 192 0 1124 464 summit_32.o.after md5: 217aa5f002f217e56ef9d8e5c74b60e0 summit_32.o.before.asm 217aa5f002f217e56ef9d8e5c74b60e0 summit_32.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/summit_32.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 72f463401592..c7b579db843d 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -40,38 +40,40 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) int twister = 0, node = 0; int i, bus, num_buses; - for(i = 0; i < rio_table_hdr->num_rio_dev; i++){ - if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id){ + for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { + if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) { twister = rio_devs[i]->owner_id; break; } } - if (i == rio_table_hdr->num_rio_dev){ + if (i == rio_table_hdr->num_rio_dev) { printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__); return last_bus; } - for(i = 0; i < rio_table_hdr->num_scal_dev; i++){ - if (scal_devs[i]->node_id == twister){ + for (i = 0; i < rio_table_hdr->num_scal_dev; i++) { + if (scal_devs[i]->node_id == twister) { node = scal_devs[i]->node_id; break; } } - if (i == rio_table_hdr->num_scal_dev){ + if (i == rio_table_hdr->num_scal_dev) { printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__); return last_bus; } - switch (rio_devs[wpeg_num]->type){ + switch (rio_devs[wpeg_num]->type) { case CompatWPEG: - /* The Compatibility Winnipeg controls the 2 legacy buses, + /* + * The Compatibility Winnipeg controls the 2 legacy buses, * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case * a PCI-PCI bridge card is used in either slot: total 5 buses. */ num_buses = 5; break; case AltWPEG: - /* The Alternate Winnipeg controls the 2 133MHz buses [1 slot + /* + * The Alternate Winnipeg controls the 2 133MHz buses [1 slot * each], their 2 "extra" buses, the 100MHz bus [2 slots] and * the "extra" buses for each of those slots: total 7 buses. */ @@ -79,7 +81,8 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) break; case LookOutAWPEG: case LookOutBWPEG: - /* A Lookout Winnipeg controls 3 100MHz buses [2 slots each] + /* + * A Lookout Winnipeg controls 3 100MHz buses [2 slots each] * & the "extra" buses for each of those slots: total 9 buses. */ num_buses = 9; @@ -89,7 +92,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) return last_bus; } - for(bus = last_bus; bus < last_bus + num_buses; bus++) + for (bus = last_bus; bus < last_bus + num_buses; bus++) mp_bus_id_to_node[bus] = node; return bus; } @@ -99,12 +102,12 @@ static int __init build_detail_arrays(void) unsigned long ptr; int i, scal_detail_size, rio_detail_size; - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); return 0; } - switch (rio_table_hdr->version){ + switch (rio_table_hdr->version) { default: printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version); return 0; @@ -119,10 +122,10 @@ static int __init build_detail_arrays(void) } ptr = (unsigned long)rio_table_hdr + 3; - for(i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) + for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) scal_devs[i] = (struct scal_detail *)ptr; - for(i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) + for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) rio_devs[i] = (struct rio_detail *)ptr; return 1; @@ -140,9 +143,9 @@ void __init setup_summit(void) rio_table_hdr = NULL; offset = 0x180; - while (offset){ + while (offset) { /* The block id is stored in the 2nd word */ - if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ + if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) { /* set the pointer past the offset & block id */ rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); break; @@ -150,7 +153,7 @@ void __init setup_summit(void) /* The next offset is stored in the 1st word. 0 means no more */ offset = *((unsigned short *)(ptr + offset)); } - if (!rio_table_hdr){ + if (!rio_table_hdr) { printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__); return; } @@ -161,8 +164,8 @@ void __init setup_summit(void) /* The first Winnipeg we're looking for has an index of 0 */ next_wpeg = 0; do { - for(i = 0; i < rio_table_hdr->num_rio_dev; i++){ - if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg){ + for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { + if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) { /* It's the Winnipeg we're looking for! */ next_bus = setup_pci_node_map_for_wpeg(i, next_bus); next_wpeg++; -- cgit v1.2.3 From 65eb6b4326daddd1cccd003bd4df3fd75b06f0e1 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:09:42 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/intel.c Before: total: 37 errors, 16 warnings, 366 lines checked After: total: 0 errors, 15 warnings, 369 lines checked No code changed: arch/x86/kernel/cpu/intel.o: text data bss dec hex filename 1534 452 0 1986 7c2 intel.o.before 1534 452 0 1986 7c2 intel.o.after md5: 1ca348a06de6eb354c4b6ea715a57db5 intel.o.before.asm 1ca348a06de6eb354c4b6ea715a57db5 intel.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 83 +++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 34468b2e2507..c9ecf378cc41 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -45,7 +45,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) * * This is called before we do cpu ident work */ - + int __cpuinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ @@ -58,7 +58,7 @@ int __cpuinit ppro_with_ram_bug(void) } return 0; } - + /* * P4 Xeon errata 037 workaround. @@ -69,7 +69,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) unsigned long lo, hi; if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); + rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); if ((lo & (1<<9)) == 0) { printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); @@ -127,10 +127,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) */ c->f00f_bug = 0; if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled = 0; + static int f00f_workaround_enabled; c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { + if (!f00f_workaround_enabled) { trap_init_f00f_bug(); printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; @@ -139,7 +139,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) #endif l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9 ) { + if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) @@ -150,9 +150,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_bit(X86_FEATURE_SEP, c->x86_capability); - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ + /* + * Names for the Pentium II/Celeron processors + * detectable only by also checking the cache size. + * Dixon is NOT a Celeron. + */ if (c->x86 == 6) { switch (c->x86_model) { case 5: @@ -163,14 +165,14 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) p = "Mobile Pentium II (Dixon)"; } break; - + case 6: if (l2 == 128) p = "Celeron (Mendocino)"; else if (c->x86_mask == 0 || c->x86_mask == 5) p = "Celeron-A"; break; - + case 8: if (l2 == 128) p = "Celeron (Coppermine)"; @@ -178,9 +180,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } } - if ( p ) + if (p) strcpy(c->x86_model_id, p); - + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); @@ -211,7 +213,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 == 15) { set_bit(X86_FEATURE_P4, c->x86_capability); } - if (c->x86 == 6) + if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); if (cpu_has_ds) { unsigned int l1; @@ -226,9 +228,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { - /* Intel PIII Tualatin. This comes in two flavours. + /* + * Intel PIII Tualatin. This comes in two flavours. * One has 256kb of cache, the other 512. We have no way * to determine which, so we use a boottime override * for the 512kb model, and assume 256 otherwise. @@ -240,42 +243,42 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, + .c_ident = { "GenuineIntel" }, .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", [9] = "486 DX/4-WB" } }, { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", + [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", + [7] = "Mobile Pentium 75 - 200", [8] = "Mobile Pentium MMX" } }, { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { + { [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", [10] = "Pentium III (Cascades)", [11] = "Pentium III (Tualatin)", } @@ -361,5 +364,5 @@ unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) EXPORT_SYMBOL(cmpxchg_486_u64); #endif -// arch_initcall(intel_cpu_init); +/* arch_initcall(intel_cpu_init); */ -- cgit v1.2.3 From 0067cc996ee7a0dd282d8da5b64fa60aa2066bb2 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:10:06 +0100 Subject: x86: coding style fixes to arch/x86/kernel/mca_32.c Before: total: 42 errors, 3 warnings, 469 lines checked After: total: 0 errors, 3 warnings, 479 lines checked No code changed: arch/x86/kernel/mca_32.o: text data bss dec hex filename 1832 288 5 2125 84d mca_32.o.before 1832 288 5 2125 84d mca_32.o.after md5: c0e45e2b743ce26349eb07dc53e80b94 mca_32.o.before.asm c0e45e2b743ce26349eb07dc53e80b94 mca_32.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/mca_32.c | 96 ++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 9482033ed0fe..2dc183758be3 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -53,9 +53,9 @@ #include #include -static unsigned char which_scsi = 0; +static unsigned char which_scsi; -int MCA_bus = 0; +int MCA_bus; EXPORT_SYMBOL(MCA_bus); /* @@ -68,15 +68,17 @@ static DEFINE_SPINLOCK(mca_lock); /* Build the status info for the adapter */ -static void mca_configure_adapter_status(struct mca_device *mca_dev) { +static void mca_configure_adapter_status(struct mca_device *mca_dev) +{ mca_dev->status = MCA_ADAPTER_NONE; mca_dev->pos_id = mca_dev->pos[0] + (mca_dev->pos[1] << 8); - if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { + if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { - /* id = 0x0000 usually indicates hardware failure, + /* + * id = 0x0000 usually indicates hardware failure, * however, ZP Gu (zpg@castle.net> reports that his 9556 * has 0x0000 as id and everything still works. There * also seem to be an adapter with id = 0x0000; the @@ -87,9 +89,10 @@ static void mca_configure_adapter_status(struct mca_device *mca_dev) { mca_dev->status = MCA_ADAPTER_ERROR; return; - } else if(mca_dev->pos_id != 0xffff) { + } else if (mca_dev->pos_id != 0xffff) { - /* 0xffff usually indicates that there's no adapter, + /* + * 0xffff usually indicates that there's no adapter, * however, some integrated adapters may have 0xffff as * their id and still be valid. Examples are on-board * VGA of the 55sx, the integrated SCSI of the 56 & 57, @@ -99,19 +102,19 @@ static void mca_configure_adapter_status(struct mca_device *mca_dev) { mca_dev->status = MCA_ADAPTER_NORMAL; } - if((mca_dev->pos_id == 0xffff || + if ((mca_dev->pos_id == 0xffff || mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) { int j; - for(j = 2; j < 8; j++) { - if(mca_dev->pos[j] != 0xff) { + for (j = 2; j < 8; j++) { + if (mca_dev->pos[j] != 0xff) { mca_dev->status = MCA_ADAPTER_NORMAL; break; } } } - if(!(mca_dev->pos[2] & MCA_ENABLED)) { + if (!(mca_dev->pos[2] & MCA_ENABLED)) { /* enabled bit is in POS 2 */ @@ -133,7 +136,7 @@ static struct resource mca_standard_resources[] = { #define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) -/** +/* * mca_read_and_store_pos - read the POS registers into a memory buffer * @pos: a char pointer to 8 bytes, contains the POS register value on * successful return @@ -141,12 +144,14 @@ static struct resource mca_standard_resources[] = { * Returns 1 if a card actually exists (i.e. the pos isn't * all 0xff) or 0 otherwise */ -static int mca_read_and_store_pos(unsigned char *pos) { +static int mca_read_and_store_pos(unsigned char *pos) +{ int j; int found = 0; - for(j=0; j<8; j++) { - if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) { + for (j = 0; j < 8; j++) { + pos[j] = inb_p(MCA_POS_REG(j)); + if (pos[j] != 0xff) { /* 0xff all across means no device. 0x00 means * something's broken, but a device is * probably there. However, if you get 0x00 @@ -167,11 +172,11 @@ static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg) unsigned char byte; unsigned long flags; - if(reg < 0 || reg >= 8) + if (reg < 0 || reg >= 8) return 0; spin_lock_irqsave(&mca_lock, flags); - if(mca_dev->pos_register) { + if (mca_dev->pos_register) { /* Disable adapter setup, enable motherboard setup */ outb_p(0, MCA_ADAPTER_SETUP_REG); @@ -203,7 +208,7 @@ static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, { unsigned long flags; - if(reg < 0 || reg >= 8) + if (reg < 0 || reg >= 8) return; spin_lock_irqsave(&mca_lock, flags); @@ -227,17 +232,17 @@ static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, } /* for the primary MCA bus, we have identity transforms */ -static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq) +static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq) { return irq; } -static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port) +static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port) { return port; } -static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem) +static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem) { return mem; } @@ -251,7 +256,8 @@ static int __init mca_init(void) short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00}; struct mca_bus *bus; - /* WARNING: Be careful when making changes here. Putting an adapter + /* + * WARNING: Be careful when making changes here. Putting an adapter * and the motherboard simultaneously into setup mode may result in * damage to chips (according to The Indispensible PC Hardware Book * by Hans-Peter Messmer). Also, we disable system interrupts (so @@ -283,7 +289,7 @@ static int __init mca_init(void) /* get the motherboard device */ mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); - if(unlikely(!mca_dev)) + if (unlikely(!mca_dev)) goto out_nomem; /* @@ -309,7 +315,7 @@ static int __init mca_init(void) mca_register_device(MCA_PRIMARY_BUS, mca_dev); mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if(unlikely(!mca_dev)) + if (unlikely(!mca_dev)) goto out_unlock_nomem; /* Put motherboard into video setup mode, read integrated video @@ -326,7 +332,8 @@ static int __init mca_init(void) mca_dev->slot = MCA_INTEGVIDEO; mca_register_device(MCA_PRIMARY_BUS, mca_dev); - /* Put motherboard into scsi setup mode, read integrated scsi + /* + * Put motherboard into scsi setup mode, read integrated scsi * POS registers, and turn motherboard setup off. * * It seems there are two possible SCSI registers. Martin says that @@ -338,18 +345,18 @@ static int __init mca_init(void) * machine. */ - for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { + for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG); - if(mca_read_and_store_pos(pos)) + if (mca_read_and_store_pos(pos)) break; } - if(which_scsi) { + if (which_scsi) { /* found a scsi card */ mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if(unlikely(!mca_dev)) + if (unlikely(!mca_dev)) goto out_unlock_nomem; - for(j = 0; j < 8; j++) + for (j = 0; j < 8; j++) mca_dev->pos[j] = pos[j]; mca_configure_adapter_status(mca_dev); @@ -364,21 +371,22 @@ static int __init mca_init(void) outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - /* Now loop over MCA slots: put each adapter into setup mode, and + /* + * Now loop over MCA slots: put each adapter into setup mode, and * read its POS registers. Then put adapter setup off. */ - for(i=0; ipos[j]=pos[j]; + for (j = 0; j < 8; j++) + mca_dev->pos[j] = pos[j]; mca_dev->driver_loaded = 0; mca_dev->slot = i; @@ -414,20 +422,20 @@ mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) { int slot = mca_dev->slot; - if(slot == MCA_INTEGSCSI) { + if (slot == MCA_INTEGSCSI) { printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n", mca_dev->name); - } else if(slot == MCA_INTEGVIDEO) { + } else if (slot == MCA_INTEGVIDEO) { printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n", mca_dev->name); - } else if(slot == MCA_MOTHERBOARD) { + } else if (slot == MCA_MOTHERBOARD) { printk(KERN_CRIT "NMI: caused by motherboard (%s)\n", mca_dev->name); } /* More info available in POS 6 and 7? */ - if(check_flag) { + if (check_flag) { unsigned char pos6, pos7; pos6 = mca_device_read_pos(mca_dev, 6); @@ -447,8 +455,9 @@ static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) pos5 = mca_device_read_pos(mca_dev, 5); - if(!(pos5 & 0x80)) { - /* Bit 7 of POS 5 is reset when this adapter has a hardware + if (!(pos5 & 0x80)) { + /* + * Bit 7 of POS 5 is reset when this adapter has a hardware * error. Bit 7 it reset if there's error information * available in POS 6 and 7. */ @@ -460,7 +469,8 @@ static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) void __kprobes mca_handle_nmi(void) { - /* First try - scan the various adapters and see if a specific + /* + * First try - scan the various adapters and see if a specific * adapter was responsible for the error. */ bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); -- cgit v1.2.3 From e0f025704437dfd6cb5adc077f05709c31189edc Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:10:16 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/mtrr/state.c Before: total: 6 errors, 5 warnings, 80 lines checked After: total: 0 errors, 4 warnings, 82 lines checked No code changed: arch/x86/kernel/cpu/mtrr/state.o: text data bss dec hex filename 313 0 4 317 13d state.o.before 313 0 4 317 13d state.o.after md5: a0fbd61096205f9180f0bf45ed386d61 state.o.before.asm a0fbd61096205f9180f0bf45ed386d61 state.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/state.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 9f8ba923d1c9..7f7e2753685b 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -19,13 +19,15 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) if (use_intel() || is_cpu(CYRIX)) { /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { + if (cpu_has_pge) { ctxt->cr4val = read_cr4(); write_cr4(ctxt->cr4val & ~X86_CR4_PGE); } - /* Disable and flush caches. Note that wbinvd flushes the TLBs as - a side-effect */ + /* + * Disable and flush caches. Note that wbinvd flushes the TLBs + * as a side-effect + */ cr0 = read_cr0() | X86_CR0_CD; wbinvd(); write_cr0(cr0); @@ -42,7 +44,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) { - if (use_intel()) + if (use_intel()) /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); @@ -66,12 +68,12 @@ void set_mtrr_done(struct set_mtrr_context *ctxt) else /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, ctxt->ccr3); - + /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); /* Restore value of CR4 */ - if ( cpu_has_pge ) + if (cpu_has_pge) write_cr4(ctxt->cr4val); } /* Re-enable interrupts locally (if enabled previously) */ -- cgit v1.2.3 From f97518271941fdb2dab07d7bd58bf9fa39ba3f65 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:10:28 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/transmeta.c Before: total: 13 errors, 3 warnings, 105 lines checked After: total: 0 errors, 3 warnings, 107 lines checked No code changed: arch/x86/kernel/cpu/transmeta.o: text data bss dec hex filename 713 324 0 1037 40d transmeta.o.before 713 324 0 1037 40d transmeta.o.after md5: 19abe2cafac617e1e2aadc4aa4e9923b transmeta.o.before.asm 19abe2cafac617e1e2aadc4aa4e9923b transmeta.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/transmeta.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c2d168e992f4..daee21d208f4 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -18,8 +18,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); cpu_rev = 0; - if ( max >= 0x80860001 ) { - cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); + if (max >= 0x80860001) { + cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); if (cpu_rev != 0x02000000) { printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", (cpu_rev >> 24) & 0xff, @@ -29,7 +29,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) cpu_freq); } } - if ( max >= 0x80860002 ) { + if (max >= 0x80860002) { cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); if (cpu_rev == 0x02000000) { printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", @@ -42,7 +42,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) cms_rev1 & 0xff, cms_rev2); } - if ( max >= 0x80860006 ) { + if (max >= 0x80860006) { cpuid(0x80860003, (void *)&cpu_info[0], (void *)&cpu_info[4], @@ -75,22 +75,24 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) /* All Transmeta CPUs have a constant TSC */ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - + #ifdef CONFIG_SYSCTL - /* randomize_va_space slows us down enormously; - it probably triggers retranslation of x86->native bytecode */ + /* + * randomize_va_space slows us down enormously; + * it probably triggers retranslation of x86->native bytecode + */ randomize_va_space = 0; #endif } -static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) +static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) { u32 xlvl; /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) + if ((xlvl & 0xffff0000) == 0x80860000) { + if (xlvl >= 0x80860001) c->x86_capability[2] = cpuid_edx(0x80860001); } } -- cgit v1.2.3 From fb87a298fb79357fa5b27e6916ae1c45bf94dac7 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:10:33 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/amd.c Before: total: 42 errors, 26 warnings, 350 lines checked After: total: 0 errors, 26 warnings, 352 lines checked No code changed: arch/x86/kernel/cpu/amd.o: text data bss dec hex filename 1936 328 0 2264 8d8 amd.o.before 1936 328 0 2264 8d8 amd.o.after md5: 873430a88faaf31bb4bbfe3a2a691e45 amd.o.before.asm 873430a88faaf31bb4bbfe3a2a691e45 amd.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 94 ++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 46 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cab4e562b5cb..1a3e1bb4d758 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,7 +20,7 @@ * the chip setting when fixing the bug but they also tweaked some * performance at the same time.. */ - + extern void vide(void); __asm__(".align 4\nvide: ret"); @@ -81,7 +81,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned long long value; - /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 * bit 6 of msr C001_0015 * * Errata 63 for SH-B3 steppings @@ -102,15 +103,16 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * no bus pipeline) */ - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_bit(0*32+31, c->x86_capability); - + r = get_model_name(c); - switch(c->x86) - { - case 4: + switch (c->x86) { + case 4: /* * General Systems BIOSen alias the cpu frequency registers * of the Elan at 0x000df000. Unfortuantly, one of the Linux @@ -120,61 +122,60 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ #define CBAR_ENB (0x80000000) #define CBAR_KEY (0X000000CB) - if (c->x86_model==9 || c->x86_model == 10) { + if (c->x86_model == 9 || c->x86_model == 10) { if (inl (CBAR) & CBAR_ENB) outl (0 | CBAR_KEY, CBAR); } break; - case 5: - if( c->x86_model < 6 ) - { + case 5: + if (c->x86_model < 6) { /* Based on AMD doc 20734R - June 2000 */ - if ( c->x86_model == 0 ) { + if (c->x86_model == 0) { clear_bit(X86_FEATURE_APIC, c->x86_capability); set_bit(X86_FEATURE_PGE, c->x86_capability); } break; } - - if ( c->x86_model == 6 && c->x86_mask == 1 ) { + + if (c->x86_model == 6 && c->x86_mask == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); unsigned long d, d2; - + printk(KERN_INFO "AMD K6 stepping B detected - "); - + /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect + * It looks like AMD fixed the 2.6.2 bug and improved indirect * calls at the same time. */ n = K6_BUG_LOOP; f_vide = vide; rdtscl(d); - while (n--) + while (n--) f_vide(); rdtscl(d2); d = d2-d; - if (d > 20*K6_BUG_LOOP) + if (d > 20*K6_BUG_LOOP) printk("system stability may be impaired when more than 32 MB are used.\n"); - else + else printk("probably OK (after B9730xxxx).\n"); printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); } /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model== 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_mask < 8)) { /* We can only write allocate on the low 508Mb */ - if(mbytes>508) - mbytes=508; + if (mbytes > 508) + mbytes = 508; rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF)==0) { + if ((l&0x0000FFFF) == 0) { unsigned long flags; - l=(1<<0)|((mbytes/4)<<1); + l = (1<<0)|((mbytes/4)<<1); local_irq_save(flags); wbinvd(); wrmsr(MSR_K6_WHCR, l, h); @@ -185,17 +186,17 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) break; } - if ((c->x86_model == 8 && c->x86_mask >7) || + if ((c->x86_model == 8 && c->x86_mask > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ - if(mbytes>4092) - mbytes=4092; + if (mbytes > 4092) + mbytes = 4092; rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000)==0) { + if ((l&0xFFFF0000) == 0) { unsigned long flags; - l=((mbytes>>2)<<22)|(1<<16); + l = ((mbytes>>2)<<22)|(1<<16); local_irq_save(flags); wbinvd(); wrmsr(MSR_K6_WHCR, l, h); @@ -217,10 +218,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) break; } break; - case 6: /* An Athlon/Duron */ - - /* Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. + case 6: /* An Athlon/Duron */ + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. * If the BIOS didn't enable it already, enable it here. */ if (c->x86_model >= 6 && c->x86_model <= 10) { @@ -233,11 +235,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - /* It's been determined by AMD that Athlons since model 8 stepping 1 + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx * As per AMD technical note 27212 0.2 */ - if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, @@ -256,7 +259,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K8, c->x86_capability); break; case 6: - set_bit(X86_FEATURE_K7, c->x86_capability); + set_bit(X86_FEATURE_K7, c->x86_capability); break; } if (c->x86 >= 6) @@ -264,9 +267,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); - if (cpuid_eax(0x80000000) >= 0x80000008) { + if (cpuid_eax(0x80000000) >= 0x80000008) c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - } #ifdef CONFIG_X86_HT /* @@ -308,14 +310,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); } -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ size = 64; if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ size = 256; } return size; @@ -323,16 +325,16 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned in static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, + .c_ident = { "AuthenticAMD" }, .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { [3] = "486 DX/2", [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", [14] = "Am5x86-WT", - [15] = "Am5x86-WB" + [15] = "Am5x86-WB" } }, }, -- cgit v1.2.3 From 83e714e82f3434a32c3f54f7223f78345d873218 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:10:40 +0100 Subject: x86: coding style fixes to arch/x86/kernel/vm86_32.c Before: total: 64 errors, 18 warnings, 840 lines checked After: total: 12 errors, 15 warnings, 844 lines checked No code changed: arch/x86/kernel/vm86_32.o: text data bss dec hex filename 4449 28 132 4609 1201 vm86_32.o.before 4449 28 132 4609 1201 vm86_32.o.after md5: e4e51ed7689d17f04148554a3c6d5bb6 vm86_32.o.before.asm e4e51ed7689d17f04148554a3c6d5bb6 vm86_32.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/vm86_32.c | 174 ++++++++++++++++++++++++---------------------- 1 file changed, 89 insertions(+), 85 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 738c2104df30..6a91fcf92d67 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -64,7 +64,7 @@ #define KVM86 ((struct kernel_vm86_struct *)regs) -#define VMPI KVM86->vm86plus +#define VMPI KVM86->vm86plus /* @@ -81,7 +81,7 @@ #define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) #define VEFLAGS (current->thread.v86flags) -#define set_flags(X,new,mask) \ +#define set_flags(X, new, mask) \ ((X) = ((X) & ~(mask)) | ((new) & (mask))) #define SAFE_MASK (0xDD5) @@ -93,8 +93,10 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user, { int ret = 0; - /* kernel_vm86_regs is missing gs, so copy everything up to - (but not including) orig_eax, and then rest including orig_eax. */ + /* + * kernel_vm86_regs is missing gs, so copy everything up to + * (but not including) orig_eax, and then rest including orig_eax. + */ ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, sizeof(struct kernel_vm86_regs) - @@ -120,7 +122,7 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, return ret; } -struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs) +struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) { struct tss_struct *tss; struct pt_regs *ret; @@ -138,8 +140,8 @@ struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs) do_exit(SIGSEGV); } set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask); - tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); - tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); + tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); + tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); if (tmp) { printk("vm86: could not access userspace vm86_info\n"); do_exit(SIGSEGV); @@ -237,20 +239,21 @@ asmlinkage int sys_vm86(struct pt_regs regs) tsk = current; switch (regs.bx) { - case VM86_REQUEST_IRQ: - case VM86_FREE_IRQ: - case VM86_GET_IRQ_BITS: - case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); - goto out; - case VM86_PLUS_INSTALL_CHECK: - /* NOTE: on old vm86 stuff this will return the error - from access_ok(), because the subfunction is - interpreted as (invalid) address to vm86_struct. - So the installation check works. - */ - ret = 0; - goto out; + case VM86_REQUEST_IRQ: + case VM86_FREE_IRQ: + case VM86_GET_IRQ_BITS: + case VM86_GET_AND_RESET_IRQ: + ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); + goto out; + case VM86_PLUS_INSTALL_CHECK: + /* + * NOTE: on old vm86 stuff this will return the error + * from access_ok(), because the subfunction is + * interpreted as (invalid) address to vm86_struct. + * So the installation check works. + */ + ret = 0; + goto out; } /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ @@ -299,18 +302,18 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs.pt.flags |= VM_MASK; switch (info->cpu_type) { - case CPU_286: - tsk->thread.v86mask = 0; - break; - case CPU_386: - tsk->thread.v86mask = NT_MASK | IOPL_MASK; - break; - case CPU_486: - tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; - break; - default: - tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; - break; + case CPU_286: + tsk->thread.v86mask = 0; + break; + case CPU_386: + tsk->thread.v86mask = NT_MASK | IOPL_MASK; + break; + case CPU_486: + tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; + break; + default: + tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; + break; } /* @@ -346,9 +349,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk /* we never return here */ } -static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) +static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) { - struct pt_regs * regs32; + struct pt_regs *regs32; regs32 = save_v86_state(regs16); regs32->ax = retval; @@ -358,29 +361,30 @@ static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) : : "r" (regs32), "r" (current_thread_info())); } -static inline void set_IF(struct kernel_vm86_regs * regs) +static inline void set_IF(struct kernel_vm86_regs *regs) { VEFLAGS |= VIF_MASK; if (VEFLAGS & VIP_MASK) return_to_32bit(regs, VM86_STI); } -static inline void clear_IF(struct kernel_vm86_regs * regs) +static inline void clear_IF(struct kernel_vm86_regs *regs) { VEFLAGS &= ~VIF_MASK; } -static inline void clear_TF(struct kernel_vm86_regs * regs) +static inline void clear_TF(struct kernel_vm86_regs *regs) { regs->pt.flags &= ~TF_MASK; } -static inline void clear_AC(struct kernel_vm86_regs * regs) +static inline void clear_AC(struct kernel_vm86_regs *regs) { regs->pt.flags &= ~AC_MASK; } -/* It is correct to call set_IF(regs) from the set_vflags_* +/* + * It is correct to call set_IF(regs) from the set_vflags_* * functions. However someone forgot to call clear_IF(regs) * in the opposite case. * After the command sequence CLI PUSHF STI POPF you should @@ -391,7 +395,7 @@ static inline void clear_AC(struct kernel_vm86_regs * regs) * [KD] */ -static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs * regs) +static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) { set_flags(VEFLAGS, flags, current->thread.v86mask); set_flags(regs->pt.flags, flags, SAFE_MASK); @@ -401,7 +405,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs clear_IF(regs); } -static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) +static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) { set_flags(VFLAGS, flags, current->thread.v86mask); set_flags(regs->pt.flags, flags, SAFE_MASK); @@ -411,7 +415,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg clear_IF(regs); } -static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) +static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) { unsigned long flags = regs->pt.flags & RETURN_MASK; @@ -421,11 +425,11 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) return flags | (VEFLAGS & current->thread.v86mask); } -static inline int is_revectored(int nr, struct revectored_struct * bitmap) +static inline int is_revectored(int nr, struct revectored_struct *bitmap) { __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" :"=r" (nr) - :"m" (*bitmap),"r" (nr)); + :"m" (*bitmap), "r" (nr)); return nr; } @@ -437,7 +441,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap) ptr--; \ if (put_user(__val, base + ptr) < 0) \ goto err_label; \ - } while(0) + } while (0) #define pushw(base, ptr, val, err_label) \ do { \ @@ -448,7 +452,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap) ptr--; \ if (put_user(val_byte(__val, 0), base + ptr) < 0) \ goto err_label; \ - } while(0) + } while (0) #define pushl(base, ptr, val, err_label) \ do { \ @@ -465,7 +469,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap) ptr--; \ if (put_user(val_byte(__val, 0), base + ptr) < 0) \ goto err_label; \ - } while(0) + } while (0) #define popb(base, ptr, err_label) \ ({ \ @@ -512,7 +516,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap) * in userspace is always better than an Oops anyway.) [KD] */ static void do_int(struct kernel_vm86_regs *regs, int i, - unsigned char __user * ssp, unsigned short sp) + unsigned char __user *ssp, unsigned short sp) { unsigned long __user *intr_ptr; unsigned long segoffs; @@ -521,7 +525,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, goto cannot_handle; if (is_revectored(i, &KVM86->int_revectored)) goto cannot_handle; - if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored)) + if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) goto cannot_handle; intr_ptr = (unsigned long __user *) (i << 2); if (get_user(segoffs, intr_ptr)) @@ -543,15 +547,15 @@ cannot_handle: return_to_32bit(regs, VM86_INTx + (i << 8)); } -int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno) +int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) { if (VMPI.is_vm86pus) { - if ( (trapno==3) || (trapno==1) ) + if ((trapno == 3) || (trapno == 1)) return_to_32bit(regs, VM86_TRAP + (trapno << 8)); do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); return 0; } - if (trapno !=1) + if (trapno != 1) return 1; /* we let this handle by the calling routine */ if (current->ptrace & PT_PTRACED) { unsigned long flags; @@ -566,7 +570,7 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno return 0; } -void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) +void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) { unsigned char opcode; unsigned char __user *csp; @@ -595,17 +599,17 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) pref_done = 0; do { switch (opcode = popb(csp, ip, simulate_sigsegv)) { - case 0x66: /* 32-bit data */ data32=1; break; - case 0x67: /* 32-bit address */ break; - case 0x2e: /* CS */ break; - case 0x3e: /* DS */ break; - case 0x26: /* ES */ break; - case 0x36: /* SS */ break; - case 0x65: /* GS */ break; - case 0x64: /* FS */ break; - case 0xf2: /* repnz */ break; - case 0xf3: /* rep */ break; - default: pref_done = 1; + case 0x66: /* 32-bit data */ data32 = 1; break; + case 0x67: /* 32-bit address */ break; + case 0x2e: /* CS */ break; + case 0x3e: /* DS */ break; + case 0x26: /* ES */ break; + case 0x36: /* SS */ break; + case 0x65: /* GS */ break; + case 0x64: /* FS */ break; + case 0xf2: /* repnz */ break; + case 0xf3: /* rep */ break; + default: pref_done = 1; } } while (!pref_done); @@ -628,7 +632,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) { unsigned long newflags; if (data32) { - newflags=popl(ssp, sp, simulate_sigsegv); + newflags = popl(ssp, sp, simulate_sigsegv); SP(regs) += 4; } else { newflags = popw(ssp, sp, simulate_sigsegv); @@ -636,20 +640,20 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) } IP(regs) = ip; CHECK_IF_IN_TRAP; - if (data32) { + if (data32) set_vflags_long(newflags, regs); - } else { + else set_vflags_short(newflags, regs); - } + VM86_FAULT_RETURN; } /* int xx */ case 0xcd: { - int intno=popb(csp, ip, simulate_sigsegv); + int intno = popb(csp, ip, simulate_sigsegv); IP(regs) = ip; if (VMPI.vm86dbg_active) { - if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] ) + if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3]) return_to_32bit(regs, VM86_INTx + (intno << 8)); } do_int(regs, intno, ssp, sp); @@ -663,9 +667,9 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) unsigned long newcs; unsigned long newflags; if (data32) { - newip=popl(ssp, sp, simulate_sigsegv); - newcs=popl(ssp, sp, simulate_sigsegv); - newflags=popl(ssp, sp, simulate_sigsegv); + newip = popl(ssp, sp, simulate_sigsegv); + newcs = popl(ssp, sp, simulate_sigsegv); + newflags = popl(ssp, sp, simulate_sigsegv); SP(regs) += 12; } else { newip = popw(ssp, sp, simulate_sigsegv); @@ -734,18 +738,18 @@ static struct vm86_irqs { static DEFINE_SPINLOCK(irqbits_lock); static int irqbits; -#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \ +#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \ | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ - | (1 << SIGUNUSED) ) - + | (1 << SIGUNUSED)) + static irqreturn_t irq_handler(int intno, void *dev_id) { int irq_bit; unsigned long flags; - spin_lock_irqsave(&irqbits_lock, flags); + spin_lock_irqsave(&irqbits_lock, flags); irq_bit = 1 << intno; - if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk) + if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk) goto out; irqbits |= irq_bit; if (vm86_irqs[intno].sig) @@ -759,7 +763,7 @@ static irqreturn_t irq_handler(int intno, void *dev_id) return IRQ_HANDLED; out: - spin_unlock_irqrestore(&irqbits_lock, flags); + spin_unlock_irqrestore(&irqbits_lock, flags); return IRQ_NONE; } @@ -770,9 +774,9 @@ static inline void free_vm86_irq(int irqnumber) free_irq(irqnumber, NULL); vm86_irqs[irqnumber].tsk = NULL; - spin_lock_irqsave(&irqbits_lock, flags); + spin_lock_irqsave(&irqbits_lock, flags); irqbits &= ~(1 << irqnumber); - spin_unlock_irqrestore(&irqbits_lock, flags); + spin_unlock_irqrestore(&irqbits_lock, flags); } void release_vm86_irqs(struct task_struct *task) @@ -788,10 +792,10 @@ static inline int get_and_reset_irq(int irqnumber) int bit; unsigned long flags; int ret = 0; - + if (invalid_vm86_irq(irqnumber)) return 0; if (vm86_irqs[irqnumber].tsk != current) return 0; - spin_lock_irqsave(&irqbits_lock, flags); + spin_lock_irqsave(&irqbits_lock, flags); bit = irqbits & (1 << irqnumber); irqbits &= ~bit; if (bit) { @@ -799,7 +803,7 @@ static inline int get_and_reset_irq(int irqnumber) ret = 1; } - spin_unlock_irqrestore(&irqbits_lock, flags); + spin_unlock_irqrestore(&irqbits_lock, flags); return ret; } -- cgit v1.2.3 From 714a9ac2efe5e20fdccfcf91b27dbd384bd34685 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:10:49 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/non-fatal.c Before: total: 5 errors, 5 warnings, 91 lines checked After: total: 0 errors, 0 warnings, 94 lines checked No code changed: arch/x86/kernel/cpu/mcheck/non-fatal.o: text data bss dec hex filename 441 80 4 525 20d non-fatal.o.before 441 80 4 525 20d non-fatal.o.after md5: 137bc114d2020ad331d5e76444a2c7d3 non-fatal.o.before.asm 137bc114d2020ad331d5e76444a2c7d3 non-fatal.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/non-fatal.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index bf39409b3838..00ccb6c14ec2 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include @@ -26,23 +26,26 @@ static int firstbank; #define MCE_RATE 15*HZ /* timer rate is 15s */ -static void mce_checkregs (void *info) +static void mce_checkregs(void *info) { u32 low, high; int i; - for (i=firstbank; i Date: Fri, 22 Feb 2008 23:10:57 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/winchip.c Before: total: 4 errors, 0 warnings, 36 lines checked After: total: 0 errors, 0 warnings, 36 lines checked No code changed: arch/x86/kernel/cpu/mcheck/winchip.o: text data bss dec hex filename 222 0 4 226 e2 winchip.o.before 222 0 4 226 e2 winchip.o.after md5: 9caefa12256c5f7d71ef324f6d01a2d5 winchip.o.before.asm 9caefa12256c5f7d71ef324f6d01a2d5 winchip.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/winchip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 3d428d5afc52..f2be3e190c6b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -8,14 +8,14 @@ #include #include -#include +#include #include #include #include "mce.h" /* Machine check handler for WinChip C6 */ -static void winchip_machine_check(struct pt_regs * regs, long error_code) +static void winchip_machine_check(struct pt_regs *regs, long error_code) { printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK); @@ -28,8 +28,8 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) machine_check_vector = winchip_machine_check; wmb(); rdmsr(MSR_IDT_FCR1, lo, hi); - lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ - lo&= ~(1<<4); /* Enable MCE */ + lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ + lo &= ~(1<<4); /* Enable MCE */ wrmsr(MSR_IDT_FCR1, lo, hi); set_in_cr4(X86_CR4_MCE); printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); -- cgit v1.2.3 From d677759e99b51f50a75b2adfabb25e9d656ee33c Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:11:07 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/mce_32.c Before: total: 10 errors, 3 warnings, 90 lines checked After: total: 0 errors, 3 warnings, 90 lines checked No code changed: arch/x86/kernel/cpu/mcheck/mce_32.o: text data bss dec hex filename 287 42 12 341 155 mce_32.o.before 287 42 12 341 155 mce_32.o.after md5: fede5ff8e6bc3f62e8e691ca6c45eb39 mce_32.o.before.asm fede5ff8e6bc3f62e8e691ca6c45eb39 mce_32.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_32.c | 50 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index a5182dcd94ae..774d87cfd8cd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -10,20 +10,20 @@ #include #include -#include +#include #include #include #include "mce.h" -int mce_disabled = 0; +int mce_disabled; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs * regs, long error_code) -{ +static void unexpected_machine_check(struct pt_regs *regs, long error_code) +{ printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); } @@ -33,30 +33,30 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_mac /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { - if (mce_disabled==1) + if (mce_disabled == 1) return; switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86==5) - intel_p5_mcheck_init(c); - if (c->x86==6) - intel_p6_mcheck_init(c); - if (c->x86==15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86==5) - winchip_mcheck_init(c); - break; - - default: - break; + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86 == 5) + intel_p5_mcheck_init(c); + if (c->x86 == 6) + intel_p6_mcheck_init(c); + if (c->x86 == 15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86 == 5) + winchip_mcheck_init(c); + break; + + default: + break; } } -- cgit v1.2.3 From adf85265b455f096fa9caf4aea51f274cdaca3c6 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:11:23 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/cyrix.c Before: total: 46 errors, 10 warnings, 450 lines checked After: total: 1 errors, 10 warnings, 449 lines checked No code changed: arch/x86/kernel/cpu/cyrix.o: text data bss dec hex filename 2048 908 4 2960 b90 cyrix.o.before 2048 908 4 2960 b90 cyrix.o.after md5: 9add5e69dbd788f91ff24eea8462dad7 cyrix.o.before.asm 9add5e69dbd788f91ff24eea8462dad7 cyrix.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 113 ++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 57 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 9c4ee98f2cb8..f7085bde4c28 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -19,7 +19,7 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; unsigned long flags; - + /* we test for DEVID by checking whether CCR3 is writable */ local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); @@ -37,8 +37,7 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) setCx86(CX86_CCR2, ccr2); *dir0 = 0xfe; } - } - else { + } else { setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ /* read DIR0 and DIR1 CPU registers */ @@ -86,7 +85,7 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445"; static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) { unsigned long flags; - + if (Cx86_dir0_msb == 3) { unsigned char ccr3, ccr5; @@ -132,7 +131,7 @@ static void __cpuinit set_cx86_memwb(void) /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } static void __cpuinit set_cx86_inc(void) @@ -148,7 +147,7 @@ static void __cpuinit set_cx86_inc(void) setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); /* PCR0 -- Performance Control */ /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ } @@ -167,16 +166,16 @@ static void __cpuinit geode_configure(void) ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - + /* FPU fast, DTE cache, Mem bypass */ setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - + set_cx86_memwb(); - set_cx86_reorder(); + set_cx86_reorder(); set_cx86_inc(); - + local_irq_restore(flags); } @@ -187,12 +186,14 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) char *buf = c->x86_model_id; const char *p = NULL; - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_bit(0*32+31, c->x86_capability); /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ - if ( test_bit(1*32+24, c->x86_capability) ) { + if (test_bit(1*32+24, c->x86_capability)) { clear_bit(1*32+24, c->x86_capability); set_bit(X86_FEATURE_CXMMX, c->x86_capability); } @@ -213,7 +214,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) * the model, multiplier and stepping. Black magic included, * to make the silicon step/rev numbers match the printed ones. */ - + switch (dir0_msn) { unsigned char tmp; @@ -250,17 +251,18 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) #ifdef CONFIG_PCI { u32 vendor, device; - /* It isn't really a PCI quirk directly, but the cure is the - same. The MediaGX has deep magic SMM stuff that handles the - SB emulation. It throws away the fifo on disable_dma() which - is wrong and ruins the audio. - - Bug2: VSA1 has a wrap bug so that using maximum sized DMA - causes bad things. According to NatSemi VSA2 has another - bug to do with 'hlt'. I've not seen any boards using VSA2 - and X doesn't seem to support it either so who cares 8). - VSA1 we work around however. - */ + /* + * It isn't really a PCI quirk directly, but the cure is the + * same. The MediaGX has deep magic SMM stuff that handles the + * SB emulation. It throws away the fifo on disable_dma() which + * is wrong and ruins the audio. + * + * Bug2: VSA1 has a wrap bug so that using maximum sized DMA + * causes bad things. According to NatSemi VSA2 has another + * bug to do with 'hlt'. I've not seen any boards using VSA2 + * and X doesn't seem to support it either so who cares 8). + * VSA1 we work around however. + */ printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; @@ -273,52 +275,48 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* * The 5510/5520 companion chips have a funky PIT. - */ + */ if (vendor == PCI_VENDOR_ID_CYRIX && (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) mark_tsc_unstable("cyrix 5510/5520 detected"); } #endif - c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ + c->x86_cache_size = 16; /* Yep 16K integrated cache thats it */ /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); - + /* * GXm : 0x30 ... 0x5f GXm datasheet 51 * GXlv: 0x6x GXlv datasheet 54 * ? : 0x7x * GX1 : 0x8x GX1 datasheet 56 */ - if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) + if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); get_model_name(c); /* get CPU marketing name */ return; - } - else { /* MediaGX */ + } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; p = Cx86_cb+2; c->x86_model = (dir1 & 0x20) ? 1 : 2; } break; - case 5: /* 6x86MX/M II */ - if (dir1 > 7) - { + case 5: /* 6x86MX/M II */ + if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); - } - else - { + } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; p = Cx86_cb+tmp; - if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) + if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) (c->x86_model)++; /* Emulate MTRRs using Cyrix's ARRs. */ set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); @@ -343,7 +341,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) break; } strcpy(buf, Cx86_model[dir0_msn & 7]); - if (p) strcat(buf, p); + if (p) + strcat(buf, p); return; } @@ -352,7 +351,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) { - /* There may be GX1 processors in the wild that are branded + /* + * There may be GX1 processors in the wild that are branded * NSC and not Cyrix. * * This function only handles the GX processor, and kicks every @@ -377,7 +377,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) * by the fact that they preserve the flags across the division of 5/2. * PII and PPro exhibit this behavior too, but they have cpuid available. */ - + /* * Perform the Cyrix 5/2 test. A Cyrix won't change * the flags, while other 486 chips will. @@ -398,27 +398,26 @@ static inline int test_cyrix_52div(void) return (unsigned char) (test >> 8) == 0x02; } -static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) +static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) { /* Detect Cyrix with disabled CPUID */ - if ( c->x86 == 4 && test_cyrix_52div() ) { + if (c->x86 == 4 && test_cyrix_52div()) { unsigned char dir0, dir1; - + strcpy(c->x86_vendor_id, "CyrixInstead"); - c->x86_vendor = X86_VENDOR_CYRIX; - - /* Actually enable cpuid on the older cyrix */ - - /* Retrieve CPU revisions */ - + c->x86_vendor = X86_VENDOR_CYRIX; + + /* Actually enable cpuid on the older cyrix */ + + /* Retrieve CPU revisions */ + do_cyrix_devid(&dir0, &dir1); - dir0>>=4; - + dir0 >>= 4; + /* Check it is an affected model */ - - if (dir0 == 5 || dir0 == 3) - { + + if (dir0 == 5 || dir0 == 3) { unsigned char ccr3; unsigned long flags; printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); @@ -434,7 +433,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", - .c_ident = { "CyrixInstead" }, + .c_ident = { "CyrixInstead" }, .c_init = init_cyrix, .c_identify = cyrix_identify, }; @@ -443,7 +442,7 @@ cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", - .c_ident = { "Geode by NSC" }, + .c_ident = { "Geode by NSC" }, .c_init = init_nsc, }; -- cgit v1.2.3 From 94a9fa41421625caedc328a2b0a0d5c78919a20f Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:11:52 +0100 Subject: x86: coding style fixes to arch/x86/kernel/msr.c Before: total: 2 errors, 0 warnings, 231 lines checked After: total: 0 errors, 0 warnings, 231 lines checked No code changed: arch/x86/kernel/msr.o: text data bss dec hex filename 1199 12 4 1215 4bf msr.o.before 1199 12 4 1215 4bf msr.o.after md5: 604be0d07d829bc52a9346babd084bdc msr.o.before.asm 604be0d07d829bc52a9346babd084bdc msr.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index af51ea8400b2..4dfb40530057 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -65,8 +65,8 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig) return ret; } -static ssize_t msr_read(struct file *file, char __user * buf, - size_t count, loff_t * ppos) +static ssize_t msr_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { u32 __user *tmp = (u32 __user *) buf; u32 data[2]; -- cgit v1.2.3 From 34048c9e927d5ae29c6ba802c826370de2a046d2 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sun, 24 Feb 2008 11:58:13 +0100 Subject: x86: coding style fixes to arch/x86/kernel/cpu/common.c Before: total: 55 errors, 6 warnings, 727 lines checked After: total: 0 errors, 3 warnings, 734 lines checked No code changed: arch/x86/kernel/cpu/common.o: text data bss dec hex filename 3500 4611 44 8155 1fdb common.o.before 3500 4611 44 8155 1fdb common.o.after md5: e37091f11fbeb682c0db152ac3022a38 common.o.before.asm e37091f11fbeb682c0db152ac3022a38 common.o.after.asm Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 105 +++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 49 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0fd6be154d5d..ef9e31b89b35 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -62,9 +62,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; -static void __cpuinit default_init(struct cpuinfo_x86 * c) +static void __cpuinit default_init(struct cpuinfo_x86 *c) { /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -81,11 +81,11 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", }; -static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; static int __init cachesize_setup(char *str) { - get_option (&str, &cachesize_override); + get_option(&str, &cachesize_override); return 1; } __setup("cachesize=", cachesize_setup); @@ -107,12 +107,12 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) /* Intel chips right-justify this string for some dumb reason; undo that brain damage */ p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) + while (*p == ' ') p++; - if ( p != q ) { - while ( *p ) + if (p != q) { + while (*p) *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) + while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } @@ -130,7 +130,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); + c->x86_cache_size = (ecx>>24)+(edx>>24); } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -138,16 +138,16 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) ecx = cpuid_ecx(0x80000006); l2size = ecx >> 16; - + /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c,l2size); + l2size = this_cpu->c_size_cache(c, l2size); /* Allow user to override all this if necessary. */ if (cachesize_override != -1) l2size = cachesize_override; - if ( l2size == 0 ) + if (l2size == 0) return; /* Again, no L2 cache is possible */ c->x86_cache_size = l2size; @@ -156,16 +156,19 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) l2size, ecx & 0xFF); } -/* Naming convention should be: [()] */ -/* This table only is used unless init_() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ /* Look up CPU names by table lookup. */ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; - if ( c->x86_model >= 16 ) + if (c->x86_model >= 16) return NULL; /* Range check */ if (!this_cpu) @@ -190,9 +193,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) for (i = 0; i < X86_VENDOR_NUM; i++) { if (cpu_devs[i]) { - if (!strcmp(v,cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v,cpu_devs[i]->c_ident[1]))) { + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; if (!early) this_cpu = cpu_devs[i]; @@ -210,7 +213,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) } -static int __init x86_fxsr_setup(char * s) +static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); setup_clear_cpu_cap(X86_FEATURE_XMM); @@ -219,7 +222,7 @@ static int __init x86_fxsr_setup(char * s) __setup("nofxsr", x86_fxsr_setup); -static int __init x86_sep_setup(char * s) +static int __init x86_sep_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_SEP); return 1; @@ -308,12 +311,15 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) } -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ +/* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, + * cache alignment. + * The others are not touched to avoid unwanted side effects. + * + * WARNING: this function is only called on the BP. Don't add code here + * that is supposed to run on all CPUs. + */ static void __init early_cpu_detect(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -335,7 +341,7 @@ static void __init early_cpu_detect(void) early_get_cap(c); } -static void __cpuinit generic_identify(struct cpuinfo_x86 * c) +static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; unsigned int ebx; @@ -346,13 +352,12 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) (unsigned int *)&c->x86_vendor_id[0], (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); - + get_cpu_vendor(c, 0); /* Initialize the standard set of capabilities */ /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { + if (c->cpuid_level >= 0x00000001) { u32 capability, excap; cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; @@ -378,12 +383,12 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) { + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { c->x86_capability[1] = cpuid_edx(0x80000001); c->x86_capability[6] = cpuid_ecx(0x80000001); } - if ( xlvl >= 0x80000004 ) + if (xlvl >= 0x80000004) get_model_name(c); /* Default name */ } @@ -397,12 +402,12 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { /* Disable processor serial number */ - unsigned long lo,hi; - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); printk(KERN_NOTICE "CPU serial number disabled.\n"); clear_bit(X86_FEATURE_PN, c->x86_capability); @@ -439,9 +444,11 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { - /* First of all, decide if this is a 486 or higher */ - /* It's a 486 if we can modify the AC flag */ - if ( flag_is_changeable_p(X86_EFLAGS_AC) ) + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) c->x86 = 4; else c->x86 = 3; @@ -474,10 +481,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { + if (!c->x86_model_id[0]) { char *p; p = table_lookup_model(c); - if ( p ) + if (p) strcpy(c->x86_model_id, p); else /* Last resort... */ @@ -491,9 +498,9 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * common between the CPUs. The first time this routine gets * executed, c == &boot_cpu_data. */ - if ( c != &boot_cpu_data ) { + if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) + for (i = 0 ; i < NCAPINTS ; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -537,7 +544,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1 ) { + } else if (smp_num_siblings > 1) { if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the " @@ -592,7 +599,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) else printk("%s", c->x86_model_id); - if (c->x86_mask || c->cpuid_level >= 0) + if (c->x86_mask || c->cpuid_level >= 0) printk(" stepping %02x\n", c->x86_mask); else printk("\n"); @@ -653,7 +660,7 @@ void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct *t = &per_cpu(init_tss, cpu); struct thread_struct *thread = &curr->thread; if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -679,7 +686,7 @@ void __cpuinit cpu_init(void) enter_lazy_tlb(&init_mm, curr); load_sp0(t, thread); - set_tss_desc(cpu,t); + set_tss_desc(cpu, t); load_TR_desc(); load_LDT(&init_mm.context); -- cgit v1.2.3 From f8fffa458368ed3d57385698f775880db629bd1a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Feb 2008 21:36:28 -0800 Subject: x86: apic_is_clustered_box for vsmp quad core 8 socket system will have apic id lifting.the apic id range could be [4, 0x23]. and apic_is_clustered_box will think that need to three clusters and that is larger than 2. So it is treated as a clustered_box. and will get: Marking TSC unstable due to TSCs unsynchronized even if the CPUs have X86_FEATURE_CONSTANT_TSC set. this quick fix will check if the cpu is from AMD. but vsmp still needs that checking... this patch is fix to make sure that vsmp not to be passed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 4 ++-- arch/x86/kernel/vsmp_64.c | 27 +++++++++++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ac2405ed504d..f6eb01d8923a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1182,9 +1182,9 @@ __cpuinit int apic_is_clustered_box(void) * there is not this kind of box with AMD CPU yet. * Some AMD box with quadcore cpu and 8 sockets apicid * will be [4, 0x23] or [8, 0x27] could be thought to - * have three apic_clusters. So go out early. + * vsmp box still need checking... */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (!is_vsmp_box() && (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) return 0; bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 54202b1805da..a00961d42e75 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -72,19 +72,34 @@ static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, } +static int vsmp = -1; + +int is_vsmp_box(void) +{ + if (vsmp != -1) + return vsmp; + + vsmp = 0; + if (!early_pci_allowed()) + return vsmp; + + /* Check if we are running on a ScaleMP vSMP box */ + if (read_pci_config(0, 0x1f, 0, PCI_VENDOR_ID) == + (PCI_VENDOR_ID_SCALEMP || (PCI_DEVICE_ID_SCALEMP_VSMP_CTL << 16))) + vsmp = 1; + + return vsmp; +} + void __init vsmp_init(void) { void *address; unsigned int cap, ctl, cfg; - if (!early_pci_allowed()) + if (!is_vsmp_box()) return; - /* Check if we are running on a ScaleMP vSMP box */ - if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != - PCI_VENDOR_ID_SCALEMP) || - (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != - PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) + if (!early_pci_allowed()) return; /* If we are, use the distinguished irq functions */ -- cgit v1.2.3 From d32de19ca9076eeed1e9bfd678e550fbad437535 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Feb 2008 23:20:01 -0800 Subject: x86: remove wrong setting about CONSTANT_TSC for intel cpu early_init_intel() on 64-bit is introduced by commit 2b16a2353814a513cdb5c5c739b76a19d7ea39ce Author: Andi Kleen Date: Wed Jan 30 13:32:40 2008 +0100 x86: move X86_FEATURE_CONSTANT_TSC into early cpu feature detection sets CONSTANT_TSC for intel cpus - but it is already set in init_intel(). don't need to set that two times in early_init_intel() and init_intel(). this patch removes the init_intel() one. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index e3cb3ea96ca1..164200257b68 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -866,9 +866,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -- cgit v1.2.3 From 9716951efd98ada69c417adddc85d9bbe1d7835a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:54:01 +0100 Subject: x86: clean up cpu capabilities accesses, generic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/setup_64.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ef9e31b89b35..bd111ce8f605 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -374,7 +374,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) #else c->apicid = (ebx >> 24) & 0xFF; #endif - if (c->x86_capability[0] & (1<<19)) + if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 164200257b68..d65b73e63384 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -687,7 +687,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, (unsigned long *)&c->x86_capability); + clear_cpu_cap(c, 0*32+31); /* On C+ stepping K8 rep microcode works well for copy/memset */ level = cpuid_eax(1); @@ -823,7 +823,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } static void __cpuinit init_intel(struct cpuinfo_x86 *c) @@ -929,7 +929,7 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (c->x86_capability[0] & (1<<19)) + if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ -- cgit v1.2.3 From 16282a8e25f1783f296e5116dcef810a8e68d1a0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:49:57 +0100 Subject: x86: clean up cpu capabilities accesses, amd.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1a3e1bb4d758..33d38f8305ee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -68,7 +68,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (cpuid_eax(0x80000000) >= 0x80000007) { c->x86_power = cpuid_edx(0x80000007); if (c->x86_power & (1<<8)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } } @@ -105,9 +105,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); + clear_cpu_cap(c, 0*32+31); r = get_model_name(c); @@ -131,8 +131,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86_model < 6) { /* Based on AMD doc 20734R - June 2000 */ if (c->x86_model == 0) { - clear_bit(X86_FEATURE_APIC, c->x86_capability); - set_bit(X86_FEATURE_PGE, c->x86_capability); + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); } break; } @@ -208,7 +208,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Set MTRR capability flag if appropriate */ if (c->x86_model == 13 || c->x86_model == 9 || (c->x86_model == 8 && c->x86_mask >= 8)) - set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } @@ -231,7 +231,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) rdmsr(MSR_K7_HWCR, l, h); l &= ~0x00008000; wrmsr(MSR_K7_HWCR, l, h); - set_bit(X86_FEATURE_XMM, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_XMM); } } @@ -256,14 +256,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Use K8 tuning for Fam10h and Fam11h */ case 0x10: case 0x11: - set_bit(X86_FEATURE_K8, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_K8); break; case 6: - set_bit(X86_FEATURE_K7, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_K7); break; } if (c->x86 >= 6) - set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); display_cacheinfo(c); @@ -304,10 +304,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) - clear_bit(X86_FEATURE_MCE, c->x86_capability); + clear_cpu_cap(c, X86_FEATURE_MCE); if (cpu_has_xmm2) - set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) -- cgit v1.2.3 From e1a94a974c2aa3c0a7c1a915c805211fb6773de1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:51:22 +0100 Subject: x86: clean up cpu capabilities accesses, centaur.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/centaur.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index efe8da88da53..e0f45edd6a55 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -282,12 +282,12 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_FCR, lo, hi); lo |= (1<<1 | 1<<7); wrmsr(MSR_VIA_FCR, lo, hi); - set_bit(X86_FEATURE_CX8, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CX8); } /* Before Nehemiah, the C3's had 3dNOW! */ if (c->x86_model >= 6 && c->x86_model < 9) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_3DNOW); get_model_name(c); display_cacheinfo(c); @@ -327,7 +327,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); + clear_cpu_cap(c, 0*32+31); switch (c->x86) { case 5: @@ -337,7 +337,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; fcr_clr = DPDC; printk(KERN_NOTICE "Disabling bugged TSC.\n"); - clear_bit(X86_FEATURE_TSC, c->x86_capability); + clear_cpu_cap(c, X86_FEATURE_TSC); #ifdef CONFIG_X86_OOSTORE centaur_create_optimal_mcr(); /* @@ -418,12 +418,12 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); } /* Emulate MTRRs using Centaur's MCR. */ - set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); /* Report CX8 */ - set_bit(X86_FEATURE_CX8, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CX8); /* Set 3DNow! on Winchip 2 and above. */ if (c->x86_model >= 8) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_3DNOW); /* See if we can find out some more. */ if (cpuid_eax(0x80000000) >= 0x80000005) { /* Yes, we can. */ -- cgit v1.2.3 From 4cbe668add030a35e0592a9bb292e0f2a1bcea88 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:51:32 +0100 Subject: x86: clean up cpu capabilities accesses, common.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bd111ce8f605..57a46c36fa23 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -409,7 +409,7 @@ static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) lo |= 0x200000; wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_bit(X86_FEATURE_PN, c->x86_capability); + clear_cpu_cap(c, X86_FEATURE_PN); /* Disabling the serial number may affect the cpuid level */ c->cpuid_level = cpuid_eax(0); -- cgit v1.2.3 From 1d007cd5aeea2c9283e01433dbce4c9f91dd7823 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:52:27 +0100 Subject: x86: clean up cpu capabilities accesses, cyrix.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index f7085bde4c28..3fd7a67bb06a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -190,12 +190,12 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); + clear_cpu_cap(c, 0*32+31); /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ - if (test_bit(1*32+24, c->x86_capability)) { - clear_bit(1*32+24, c->x86_capability); - set_bit(X86_FEATURE_CXMMX, c->x86_capability); + if (test_cpu_cap(c, 1*32+24)) { + clear_cpu_cap(c, 1*32+24); + set_cpu_cap(c, X86_FEATURE_CXMMX); } do_cyrix_devid(&dir0, &dir1); @@ -242,7 +242,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) } else /* 686 */ p = Cx86_cb+1; /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); /* 6x86's contain this bug */ c->coma_bug = 1; break; @@ -319,7 +319,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) (c->x86_model)++; /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; case 0xf: /* Cyrix 486 without DEVID registers */ -- cgit v1.2.3 From d0e95ebdc5cf5fe6fa29f2e0a5c6a0fe5c5aa50f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:52:33 +0100 Subject: x86: clean up cpu capabilities in arch/x86/kernel/cpu/intel.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c9ecf378cc41..fe9224c51d37 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -143,12 +143,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_bit(X86_FEATURE_SEP, c->x86_capability); + clear_cpu_cap(c, X86_FEATURE_SEP); /* * Names for the Pentium II/Celeron processors @@ -209,19 +209,19 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) #endif if (cpu_has_xmm2) - set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (c->x86 == 15) { - set_bit(X86_FEATURE_P4, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_P4); } if (c->x86 == 6) - set_bit(X86_FEATURE_P3, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) - set_bit(X86_FEATURE_BTS, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) - set_bit(X86_FEATURE_PEBS, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_PEBS); } if (cpu_has_bts) -- cgit v1.2.3 From 10cd5a1e5403d79a2d53425e6a4c8612e02ba973 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:52:39 +0100 Subject: x86: clean up cpu capabilities accesses, transmeta.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/transmeta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index daee21d208f4..b911a2c61b8f 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -74,7 +74,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) wrmsr(0x80860004, cap_mask, uk); /* All Transmeta CPUs have a constant TSC */ - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); #ifdef CONFIG_SYSCTL /* -- cgit v1.2.3 From b5964405fbc4fd4c57e0e1f86bc9f1b3dbfa040a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 11:15:50 +0100 Subject: x86: clean up traps_32.c Before: total: 86 errors, 29 warnings, 1248 lines checked After: total: 0 errors, 17 warnings, 1281 lines checked No code changed: arch/x86/kernel/traps_32.o: text data bss dec hex filename 8711 2168 72 10951 2ac7 traps_32.o.before 8711 2168 72 10951 2ac7 traps_32.o.after (md5 sums differ because some stack offset positions changed.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 604 ++++++++++++++++++++++++--------------------- 1 file changed, 318 insertions(+), 286 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index b22c01e05a18..57a5704e3f6c 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -9,26 +9,28 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include #include +#include +#include #include +#include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include #ifdef CONFIG_EISA #include @@ -43,21 +45,18 @@ #include #endif +#include +#include #include -#include -#include -#include #include +#include +#include +#include #include #include #include -#include #include -#include -#include -#include - -#include +#include #include "mach_traps.h" @@ -69,7 +68,7 @@ EXPORT_SYMBOL_GPL(used_vectors); asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ -char ignore_fpu_irq = 0; +char ignore_fpu_irq; /* * The IDT has to be page-aligned to simplify the Pentium @@ -105,12 +104,13 @@ static unsigned int code_bytes = 64; void printk_address(unsigned long address, int reliable) { #ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; + char namebuf[KSYM_NAME_LEN]; + unsigned long offset = 0; + unsigned long symsize; const char *symname; - char *modname; - char *delim = ":"; - char namebuf[128]; char reliab[4] = ""; + char *delim = ":"; + char *modname; symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); @@ -138,13 +138,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned s /* The form of the top of the frame on the stack */ struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; + struct stack_frame *next_frame; + unsigned long return_address; }; -static inline unsigned long print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) +static inline unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data) { struct stack_frame *frame = (struct stack_frame *)bp; @@ -166,7 +167,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return bp; } -#define MSG(msg) ops->warning(data, msg) +#define MSG(msg) ops->warning(data, msg) void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, @@ -177,6 +178,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; + stack = &dummy; if (task != current) stack = (unsigned long *)task->thread.sp; @@ -186,7 +188,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm ("movl %%ebp, %0" : "=r" (bp) : ); + asm("movl %%ebp, %0" : "=r" (bp) :); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -196,15 +198,18 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, while (1) { struct thread_info *context; + context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); bp = print_context_stack(context, stack, bp, ops, data); - /* Should be after the line below, but somewhere - in early boot context comes out corrupted and we - can't reference it -AK */ + /* + * Should be after the line below, but somewhere + * in early boot context comes out corrupted and we + * can't reference it: + */ if (ops->stack(data, "IRQ") < 0) break; - stack = (unsigned long*)context->previous_esp; + stack = (unsigned long *)context->previous_esp; if (!stack) break; touch_nmi_watchdog(); @@ -243,15 +248,15 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, }; static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) + unsigned long *stack, unsigned long bp, char *log_lvl) { dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); printk("%s =======================\n", log_lvl); @@ -263,21 +268,22 @@ void show_trace(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, stack, bp, ""); } -static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) +static void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; if (sp == NULL) { if (task) - sp = (unsigned long*)task->thread.sp; + sp = (unsigned long *)task->thread.sp; else sp = (unsigned long *)&sp; } stack = sp; - for(i = 0; i < kstack_depth_to_print; i++) { + for (i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; if (i && ((i % 8) == 0)) @@ -285,6 +291,7 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%08lx ", *stack++); } printk("\n%sCall Trace:\n", log_lvl); + show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -299,8 +306,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) */ void dump_stack(void) { - unsigned long stack; unsigned long bp = 0; + unsigned long stack; #ifdef CONFIG_FRAME_POINTER if (!bp) @@ -312,6 +319,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); + show_trace(current, NULL, &stack, bp); } @@ -323,6 +331,7 @@ void show_registers(struct pt_regs *regs) print_modules(); __show_registers(regs, 0); + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); @@ -331,10 +340,10 @@ void show_registers(struct pt_regs *regs) * time of the fault.. */ if (!user_mode_vm(regs)) { - u8 *ip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; + u8 *ip; printk("\n" KERN_EMERG "Stack: "); show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); @@ -361,7 +370,7 @@ void show_registers(struct pt_regs *regs) } } printk("\n"); -} +} int is_valid_bugaddr(unsigned long ip) { @@ -377,10 +386,10 @@ int is_valid_bugaddr(unsigned long ip) static int die_counter; -int __kprobes __die(const char * str, struct pt_regs * regs, long err) +int __kprobes __die(const char *str, struct pt_regs *regs, long err) { - unsigned long sp; unsigned short ss; + unsigned long sp; printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT @@ -395,8 +404,8 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err) printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) != - NOTIFY_STOP) { + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { + show_registers(regs); /* Executive summary in case the oops scrolled away */ sp = (unsigned long) (®s->sp); @@ -408,17 +417,18 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err) printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); print_symbol("%s", regs->ip); printk(" SS:ESP %04x:%08lx\n", ss, sp); + return 0; - } else { - return 1; } + + return 1; } /* - * This is gone through when something in the kernel has done something bad and - * is about to be terminated. + * This is gone through when something in the kernel has done something bad + * and is about to be terminated: */ -void die(const char * str, struct pt_regs * regs, long err) +void die(const char *str, struct pt_regs *regs, long err) { static struct { raw_spinlock_t lock; @@ -440,8 +450,9 @@ void die(const char * str, struct pt_regs * regs, long err) die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); - } else + } else { raw_local_irq_save(flags); + } if (++die.lock_owner_depth < 3) { report_bug(regs->ip, regs); @@ -474,15 +485,16 @@ void die(const char * str, struct pt_regs * regs, long err) do_exit(SIGSEGV); } -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) +static inline void +die_if_kernel(const char *str, struct pt_regs *regs, long err) { if (!user_mode_vm(regs)) die(str, regs, err); } -static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, - struct pt_regs * regs, long error_code, - siginfo_t *info) +static void __kprobes +do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, + long error_code, siginfo_t *info) { struct task_struct *tsk = current; @@ -495,111 +507,112 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, if (!user_mode(regs)) goto kernel_trap; - trap_signal: { - /* - * We want error_code and trap_no set for userspace faults and - * kernelspace faults which result in die(), but not - * kernelspace faults which are fixed up. die() gives the - * process no chance to handle the signal and notice the - * kernel fault information, so that won't result in polluting - * the information about previously queued, but not yet - * delivered, faults. See also do_general_protection below. - */ - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; +trap_signal: + /* + * We want error_code and trap_no set for userspace faults and + * kernelspace faults which result in die(), but not + * kernelspace faults which are fixed up. die() gives the + * process no chance to handle the signal and notice the + * kernel fault information, so that won't result in polluting + * the information about previously queued, but not yet + * delivered, faults. See also do_general_protection below. + */ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - } + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; - kernel_trap: { - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - die(str, regs, error_code); - } - return; +kernel_trap: + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + die(str, regs, error_code); } + return; - vm86_trap: { - int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); - if (ret) goto trap_signal; - return; - } +vm86_trap: + if (handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + goto trap_signal; + return; } -#define DO_ERROR(trapnr, signr, str, name) \ -void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ +#define DO_ERROR(trapnr, signr, str, name) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ -void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - siginfo_t info; \ - if (irq) \ - local_irq_enable(); \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + siginfo_t info; \ + if (irq) \ + local_irq_enable(); \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void __user *)siaddr; \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ } -#define DO_VM86_ERROR(trapnr, signr, str, name) \ -void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ +#define DO_VM86_ERROR(trapnr, signr, str, name) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ } -#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - trace_hardirqs_fixup(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ +#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ } -DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) #ifndef CONFIG_KPROBES -DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) +DO_VM86_ERROR(3, SIGTRAP, "int3", int3) #endif -DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) -DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) +DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) -void __kprobes do_general_protection(struct pt_regs * regs, - long error_code) +void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - struct thread_struct *thread = ¤t->thread; + struct thread_struct *thread; + struct tss_struct *tss; + int cpu; + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); + thread = ¤t->thread; /* * Perform the lazy TSS's I/O bitmap copy. If the TSS has an @@ -616,14 +629,16 @@ void __kprobes do_general_protection(struct pt_regs * regs, * If the previously set map was extending to higher ports * than the current one, pad extra space with 0xff (no access). */ - if (thread->io_bitmap_max < tss->io_bitmap_max) + if (thread->io_bitmap_max < tss->io_bitmap_max) { memset((char *) tss->io_bitmap + thread->io_bitmap_max, 0xff, tss->io_bitmap_max - thread->io_bitmap_max); + } tss->io_bitmap_max = thread->io_bitmap_max; tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; tss->io_bitmap_owner = thread; put_cpu(); + return; } put_cpu(); @@ -636,6 +651,7 @@ void __kprobes do_general_protection(struct pt_regs * regs, current->thread.error_code = error_code; current->thread.trap_no = 13; + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && printk_ratelimit()) { printk(KERN_INFO @@ -666,21 +682,24 @@ gp_in_kernel: } static __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs * regs) +mem_parity_error(unsigned char reason, struct pt_regs *regs) { - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " - "CPU %d.\n", reason, smp_processor_id()); - printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + printk(KERN_EMERG + "You have some hardware problem, likely on the PCI bus.\n"); #if defined(CONFIG_EDAC) - if(edac_handler_set()) { + if (edac_handler_set()) { edac_atomic_assert_error(); return; } #endif if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); + panic("NMI: Not continuing"); printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); @@ -689,7 +708,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) } static __kprobes void -io_check_error(unsigned char reason, struct pt_regs * regs) +io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -699,28 +718,35 @@ io_check_error(unsigned char reason, struct pt_regs * regs) /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & 0xf) | 8; outb(reason, 0x61); + i = 2000; - while (--i) udelay(1000); + while (--i) + udelay(1000); + reason &= ~8; outb(reason, 0x61); } static __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { #ifdef CONFIG_MCA - /* Might actually be able to figure out what the guilty party - * is. */ - if( MCA_bus ) { + /* + * Might actually be able to figure out what the guilty party + * is: + */ + if (MCA_bus) { mca_handle_nmi(); return; } #endif - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " - "CPU %d.\n", reason, smp_processor_id()); + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); + panic("NMI: Not continuing"); printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } @@ -729,14 +755,13 @@ static DEFINE_SPINLOCK(nmi_print_lock); void __kprobes die_nmi(struct pt_regs *regs, const char *msg) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == - NOTIFY_STOP) + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try - * to get a message out. + * to get a message out: */ bust_spinlocks(1); printk(KERN_EMERG "%s", msg); @@ -747,9 +772,10 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) spin_unlock(&nmi_print_lock); bust_spinlocks(0); - /* If we are in kernel we are probably nested up pretty bad - * and might aswell get out now while we still can. - */ + /* + * If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can: + */ if (!user_mode_vm(regs)) { current->thread.trap_no = 2; crash_kexec(regs); @@ -758,14 +784,14 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs * regs) +static __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; - /* Only the BSP gets external NMIs from the system. */ + /* Only the BSP gets external NMIs from the system: */ if (!smp_processor_id()) reason = get_nmi_reason(); - + if (!(reason & 0xc0)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -778,8 +804,10 @@ static __kprobes void default_do_nmi(struct pt_regs * regs) if (nmi_watchdog_tick(regs, reason)) return; if (!do_nmi_callback(regs, smp_processor_id())) -#endif unknown_nmi_error(reason, regs); +#else + unknown_nmi_error(reason, regs); +#endif return; } @@ -791,14 +819,14 @@ static __kprobes void default_do_nmi(struct pt_regs * regs) io_check_error(reason, regs); /* * Reassert NMI in case it became active meanwhile - * as it's edge-triggered. + * as it's edge-triggered: */ reassert_nmi(); } static int ignore_nmis; -__kprobes void do_nmi(struct pt_regs * regs, long error_code) +__kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; @@ -834,9 +862,12 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; - /* This is an interrupt gate, because kprobes wants interrupts - disabled. Normal trap handlers don't. */ + /* + * This is an interrupt gate, because kprobes wants interrupts + * disabled. Normal trap handlers don't. + */ restore_interrupts(regs); + do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); } #endif @@ -851,7 +882,7 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) * from user space. Such code must not hold kernel locks (since it * can equally take a page fault), therefore it is safe to call * force_sig_info even though that claims and releases locks. - * + * * Code in ./signal.c ensures that the debug control register * is restored before we deliver any signal, and therefore that * user code runs with the correct debug control register even though @@ -863,10 +894,10 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) * find every occurrence of the TF bit that could be saved away even * by user code) */ -void __kprobes do_debug(struct pt_regs * regs, long error_code) +void __kprobes do_debug(struct pt_regs *regs, long error_code) { - unsigned int condition; struct task_struct *tsk = current; + unsigned int condition; trace_hardirqs_fixup(); @@ -914,7 +945,8 @@ void __kprobes do_debug(struct pt_regs * regs, long error_code) /* Ok, finally something we can handle */ send_sigtrap(tsk, regs, error_code); - /* Disable additional traps. They'll be re-enabled when + /* + * Disable additional traps. They'll be re-enabled when * the signal is delivered. */ clear_dr7: @@ -938,9 +970,10 @@ clear_TF_reenable: */ void math_error(void __user *ip) { - struct task_struct * task; + struct task_struct *task; + unsigned short cwd; + unsigned short swd; siginfo_t info; - unsigned short cwd, swd; /* * Save the info for the exception handler and clear the error. @@ -966,36 +999,36 @@ void math_error(void __user *ip) cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); switch (swd & ~cwd & 0x3f) { - case 0x000: /* No unmasked exception */ - return; - default: /* Multiple exceptions */ - break; - case 0x001: /* Invalid Op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; + case 0x000: /* No unmasked exception */ + return; + default: /* Multiple exceptions */ + break; + case 0x001: /* Invalid Op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; } force_sig_info(SIGFPE, &info, task); } -void do_coprocessor_error(struct pt_regs * regs, long error_code) +void do_coprocessor_error(struct pt_regs *regs, long error_code) { ignore_fpu_irq = 1; math_error((void __user *)regs->ip); @@ -1003,9 +1036,9 @@ void do_coprocessor_error(struct pt_regs * regs, long error_code) static void simd_math_error(void __user *ip) { - struct task_struct * task; - siginfo_t info; + struct task_struct *task; unsigned short mxcsr; + siginfo_t info; /* * Save the info for the exception handler and clear the error. @@ -1026,82 +1059,80 @@ static void simd_math_error(void __user *ip) */ mxcsr = get_fpu_mxcsr(task); switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; } force_sig_info(SIGFPE, &info, task); } -void do_simd_coprocessor_error(struct pt_regs * regs, - long error_code) +void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { if (cpu_has_xmm) { /* Handle SIMD FPU exceptions on PIII+ processors. */ ignore_fpu_irq = 1; simd_math_error((void __user *)regs->ip); - } else { - /* - * Handle strange cache flush from user space exception - * in all other cases. This is undocumented behaviour. - */ - if (regs->flags & VM_MASK) { - handle_vm86_fault((struct kernel_vm86_regs *)regs, - error_code); - return; - } - current->thread.trap_no = 19; - current->thread.error_code = error_code; - die_if_kernel("cache flush denied", regs, error_code); - force_sig(SIGSEGV, current); + return; + } + /* + * Handle strange cache flush from user space exception + * in all other cases. This is undocumented behaviour. + */ + if (regs->flags & VM_MASK) { + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); + return; } + current->thread.trap_no = 19; + current->thread.error_code = error_code; + die_if_kernel("cache flush denied", regs, error_code); + force_sig(SIGSEGV, current); } -void do_spurious_interrupt_bug(struct pt_regs * regs, - long error_code) +void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { #if 0 /* No need to warn about this any longer. */ - printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); + printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); #endif } -unsigned long patch_espfix_desc(unsigned long uesp, - unsigned long kesp) +unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) { struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; unsigned long base = (kesp - uesp) & -THREAD_SIZE; unsigned long new_kesp = kesp - base; unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; + /* Set up base for espfix segment */ - desc &= 0x00f0ff0000000000ULL; - desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | + desc &= 0x00f0ff0000000000ULL; + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | ((((__u64)base) << 32) & 0xff00000000000000ULL) | ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | (lim_pages & 0xffff); *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; + return new_kesp; } /* - * 'math_state_restore()' saves the current math information in the + * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. @@ -1115,7 +1146,7 @@ asmlinkage void math_state_restore(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; - clts(); /* Allow maths ops (or we recurse) */ + clts(); /* Allow maths ops (or we recurse) */ if (!tsk_used_math(tsk)) init_fpu(tsk); restore_fpu(tsk); @@ -1128,53 +1159,52 @@ EXPORT_SYMBOL_GPL(math_state_restore); asmlinkage void math_emulate(long arg) { - printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); - printk(KERN_EMERG "killing %s.\n",current->comm); - force_sig(SIGFPE,current); + printk(KERN_EMERG + "math-emulation not enabled and no coprocessor found.\n"); + printk(KERN_EMERG "killing %s.\n", current->comm); + force_sig(SIGFPE, current); schedule(); } #endif /* CONFIG_MATH_EMULATION */ - void __init trap_init(void) { int i; #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); - if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { + + if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) EISA_bus = 1; - } early_iounmap(p, 4); #endif #ifdef CONFIG_X86_LOCAL_APIC init_apic_mappings(); #endif - - set_trap_gate(0,÷_error); - set_intr_gate(1,&debug); - set_intr_gate(2,&nmi); + set_trap_gate(0, ÷_error); + set_intr_gate(1, &debug); + set_intr_gate(2, &nmi); set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ - set_system_gate(4,&overflow); - set_trap_gate(5,&bounds); - set_trap_gate(6,&invalid_op); - set_trap_gate(7,&device_not_available); - set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); - set_trap_gate(9,&coprocessor_segment_overrun); - set_trap_gate(10,&invalid_TSS); - set_trap_gate(11,&segment_not_present); - set_trap_gate(12,&stack_segment); - set_trap_gate(13,&general_protection); - set_intr_gate(14,&page_fault); - set_trap_gate(15,&spurious_interrupt_bug); - set_trap_gate(16,&coprocessor_error); - set_trap_gate(17,&alignment_check); + set_system_gate(4, &overflow); + set_trap_gate(5, &bounds); + set_trap_gate(6, &invalid_op); + set_trap_gate(7, &device_not_available); + set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); + set_trap_gate(9, &coprocessor_segment_overrun); + set_trap_gate(10, &invalid_TSS); + set_trap_gate(11, &segment_not_present); + set_trap_gate(12, &stack_segment); + set_trap_gate(13, &general_protection); + set_intr_gate(14, &page_fault); + set_trap_gate(15, &spurious_interrupt_bug); + set_trap_gate(16, &coprocessor_error); + set_trap_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE - set_trap_gate(18,&machine_check); + set_trap_gate(18, &machine_check); #endif - set_trap_gate(19,&simd_coprocessor_error); + set_trap_gate(19, &simd_coprocessor_error); /* * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. @@ -1187,21 +1217,22 @@ void __init trap_init(void) printk("done.\n"); } if (cpu_has_xmm) { - printk(KERN_INFO "Enabling unmasked SIMD FPU exception " - "support... "); + printk(KERN_INFO + "Enabling unmasked SIMD FPU exception support... "); set_in_cr4(X86_CR4_OSXMMEXCPT); printk("done.\n"); } - set_system_gate(SYSCALL_VECTOR,&system_call); + set_system_gate(SYSCALL_VECTOR, &system_call); - /* Reserve all the builtin and the syscall vector. */ + /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); + set_bit(SYSCALL_VECTOR, used_vectors); /* - * Should be a barrier for any external CPU state. + * Should be a barrier for any external CPU state: */ cpu_init(); @@ -1211,6 +1242,7 @@ void __init trap_init(void) static int __init kstack_setup(char *s) { kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 1; } __setup("kstack=", kstack_setup); -- cgit v1.2.3 From 11ae9dd48128790d3d2ece6bc916c001b4a1d147 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 26 Feb 2008 13:23:32 +0300 Subject: x86: switch to proc_create() Signed-off-by: Alexey Dobriyan Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 5 +---- arch/x86/kernel/cpu/mtrr/if.c | 7 +++---- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d4438ef296d8..f0030a0999c7 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2217,7 +2217,6 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { */ static int __init apm_init(void) { - struct proc_dir_entry *apm_proc; struct desc_struct *gdt; int err; @@ -2322,9 +2321,7 @@ static int __init apm_init(void) set_base(gdt[APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); - apm_proc = create_proc_entry("apm", 0, NULL); - if (apm_proc) - apm_proc->proc_fops = &apm_file_ops; + proc_create("apm", 0, NULL, &apm_file_ops); kapmd_task = kthread_create(apm, NULL, "kapmd"); if (IS_ERR(kapmd_task)) { diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 91e150acb46c..1960f1985e5e 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -424,11 +424,10 @@ static int __init mtrr_if_init(void) return -ENODEV; proc_root_mtrr = - create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); - if (proc_root_mtrr) { + proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops); + + if (proc_root_mtrr) proc_root_mtrr->owner = THIS_MODULE; - proc_root_mtrr->proc_fops = &mtrr_fops; - } return 0; } -- cgit v1.2.3 From e941f27a7a0f4ecac9ba8237b8a329bab4bd622f Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 29 Feb 2008 13:25:30 +0100 Subject: x86: coding style fixes to arch/x86/kernel/early_printk.c Before: total: 17 errors, 3 warnings, 254 lines checked After: total: 2 errors, 3 warnings, 254 lines checked paolo@paolo-desktop:/tmp/b$ md5sum * da32f5cd8f248970e4809e1005393e95 early_printk.o.after da32f5cd8f248970e4809e1005393e95 early_printk.o.before paolo@paolo-desktop:/tmp/b$ size * text data bss dec hex filename 1172 280 12 1464 5b8 early_printk.o.after 1172 280 12 1464 5b8 early_printk.o.befor Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cff84cd9987f..a8f965dd58df 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -108,12 +108,12 @@ static __init void early_serial_init(char *s) if (*s) { unsigned port; - if (!strncmp(s,"0x",2)) { + if (!strncmp(s, "0x", 2)) { early_serial_base = simple_strtoul(s, &e, 16); } else { static int bases[] = { 0x3f8, 0x2f8 }; - if (!strncmp(s,"ttyS",4)) + if (!strncmp(s, "ttyS", 4)) s += 4; port = simple_strtoul(s, &e, 10); if (port > 1 || s == e) @@ -202,9 +202,9 @@ void early_printk(const char *fmt, ...) int n; va_list ap; - va_start(ap,fmt); - n = vscnprintf(buf,512,fmt,ap); - early_console->write(early_console,buf,n); + va_start(ap, fmt); + n = vscnprintf(buf, 512, fmt, ap); + early_console->write(early_console, buf, n); va_end(ap); } @@ -229,15 +229,15 @@ static int __init setup_early_printk(char *buf) early_serial_init(buf); early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3) - && boot_params.screen_info.orig_video_isVGA == 1) { + && boot_params.screen_info.orig_video_isVGA == 1) { max_xpos = boot_params.screen_info.orig_video_cols; max_ypos = boot_params.screen_info.orig_video_lines; current_ypos = boot_params.screen_info.orig_y; early_console = &early_vga_console; - } else if (!strncmp(buf, "simnow", 6)) { - simnow_init(buf + 6); - early_console = &simnow_console; - keep_early = 1; + } else if (!strncmp(buf, "simnow", 6)) { + simnow_init(buf + 6); + early_console = &simnow_console; + keep_early = 1; #ifdef CONFIG_HVC_XEN } else if (!strncmp(buf, "xen", 3)) { early_console = &xenboot_console; -- cgit v1.2.3 From c9cf39ae64a6c86872e580f921afec64ab9770f8 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 29 Feb 2008 13:26:56 +0100 Subject: x86: coding style fixes to x86/kernel/early_printk.c Depends on: [PATCH 2/3] x86: coding style fixes to arch/x86/kernel/early_printk.c Remove two: ERROR: do not initialise statics to 0 or NULL paolo@paolo-desktop:/tmp/c$ size * text data bss dec hex filename 1172 280 12 1464 5b8 early_printk.o.after 1172 280 12 1464 5b8 early_printk.o.before This patch is changing the binary output: paolo@paolo-desktop:/tmp/c$ md5sum * dad9a9a881e0eeda62cc5645bd3d7cad early_printk.o.after da32f5cd8f248970e4809e1005393e95 early_printk.o.before because the two variables moved to another section. No change in functionality. Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index a8f965dd58df..643fd861b724 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -13,7 +13,7 @@ #define VGABASE (__ISA_IO_base + 0xb8000) static int max_ypos = 25, max_xpos = 80; -static int current_ypos = 25, current_xpos = 0; +static int current_ypos = 25, current_xpos; static void early_vga_write(struct console *con, const char *str, unsigned n) { @@ -194,7 +194,7 @@ static struct console simnow_console = { /* Direct interface for emergencies */ static struct console *early_console = &early_vga_console; -static int early_console_initialized = 0; +static int early_console_initialized; void early_printk(const char *fmt, ...) { -- cgit v1.2.3 From dedd04be71cea3d5adb14c8f674e801911c89a2f Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 1 Mar 2008 17:09:12 +0100 Subject: x86: reserve end-of-conventional-memory to 1MB on 32-bit This patch adds explicit detection of the EBDA and reservation of the rom and adapter address space 0xa0000-0x100000 to the i386 kernels. Before this patch, the EBDA size was hardcoded as 4Kb. Also, the reservation of the adapter range was done by modifying the e820 map which is now not necessary any longer, and that code is removed from copy_e820_map. The amount of conventional memory and the start of the EBDA are detected by reading the BIOS data area directly. Paravirtual environments do not provide this area, so we bail out early in that case. They will just have to set up a correct memory map to start with. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_32.c | 27 +++++-------------- arch/x86/kernel/setup_32.c | 64 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 80444c5c9b14..0240cd778365 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -450,38 +450,25 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) * thinkpad 560x, for example, does not cooperate with the memory * detection code.) */ -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +int __init copy_e820_map(struct e820entry *biosmap, int nr_map) { /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) return -1; do { - unsigned long long start = biosmap->addr; - unsigned long long size = biosmap->size; - unsigned long long end = start + size; - unsigned long type = biosmap->type; + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; /* Overflow in 64 bits? Ignore the memory map. */ if (start > end) return -1; - /* - * Some BIOSes claim RAM in the 640k - 1M region. - * Not right. Fix it up. - */ - if (type == E820_RAM) { - if (start < 0x100000ULL && end > 0xA0000ULL) { - if (start < 0xA0000ULL) - add_memory_region(start, 0xA0000ULL-start, type); - if (end <= 0x100000ULL) - continue; - start = 0x100000ULL; - size = end - start; - } - } add_memory_region(start, size, type); - } while (biosmap++,--nr_map); + } while (biosmap++, --nr_map); + return 0; } diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2b3e5d45176b..14e293edd23f 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -385,15 +385,60 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } +#define BIOS_EBDA_SEGMENT 0x40E +#define BIOS_LOWMEM_KILOBYTES 0x413 + /* - * workaround for Dell systems that neglect to reserve EBDA + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. */ static void __init reserve_ebda_region(void) { - unsigned int addr; - addr = get_bios_ebda(); - if (addr) - reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT); + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ +#ifdef CONFIG_PARAVIRT + if ((boot_params.hdr.version >= 0x207) && + (boot_params.hdr.hardware_subarch != 0)) { + return; + } +#endif + + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); + ebda_addr <<= 4; + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; + + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; + + /* Paranoia: should never happen, but... */ + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); } #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -617,16 +662,9 @@ void __init setup_bootmem_allocator(void) */ reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); - /* reserve EBDA region, it's a 4K region */ + /* reserve EBDA region */ reserve_ebda_region(); - /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent - PCI prefetch into it (errata #56). Usually the page is reserved anyways, - unless you have no PS/2 mouse plugged in. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 6) - reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT); - #ifdef CONFIG_SMP /* * But first pinch a few for the stack/trampoline stuff -- cgit v1.2.3 From f6eb62b6924b99ec7da97fb6f554685a9ad6dce4 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 25 Feb 2008 19:07:51 +0100 Subject: x86: reserve_early end-of-conventional-memory to 1MB, 64-bit Explicitly reserve_early the whole address range from the end of conventional memory as reported by the bios data area up to the 1Mb mark. Regard the info retrieved from the BIOS data area with a bit of paranoia, though, because some biosses forget to register the EBDA correctly. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 38f32e798a99..b684552347df 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -49,33 +49,42 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define EBDA_ADDR_POINTER 0x40E +#define BIOS_EBDA_SEGMENT 0x40E +#define BIOS_LOWMEM_KILOBYTES 0x413 +/* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. + */ static __init void reserve_ebda(void) { - unsigned ebda_addr, ebda_size; + unsigned int lowmem, ebda_addr; - /* - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E - */ - ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); ebda_addr <<= 4; - if (!ebda_addr) - return; + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; - ebda_size = *(unsigned short *)__va(ebda_addr); + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; - /* Round EBDA up to pages */ - if (ebda_size == 0) - ebda_size = 1; - ebda_size <<= 10; - ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); - if (ebda_size > 64*1024) - ebda_size = 64*1024; + /* Paranoia: should never happen, but... */ + if (lowmem >= 0x100000) + lowmem = 0xa0000; - reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA"); + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early(lowmem, 0x100000, "BIOS reserved"); } void __init x86_64_start_kernel(char * real_mode_data) -- cgit v1.2.3 From 320a6b2efceccb652befca0b1c9a92d6e4256ef6 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 1 Mar 2008 17:12:43 +0100 Subject: x86: reserve end-of-conventional-memory to 1MB, 64-bit This patch is an add-on to the 64-bit ebda patch. It makes the functions reserve_ebda_region (renamed from reserve_ebda) and copy_e820_map equal to the 32-bit versions of the previous patch. Changes: Use u64 and u32 for local variables in copy_e820_map. The amount of conventional memory and the start of the EBDA are detected by reading the BIOS data area directly. Paravirtual environments do not provide this area, so we bail out early in that case. They will just have to set up a correct memory map to start with. Add a safety net for zeroed out BIOS data area. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 8 ++++---- arch/x86/kernel/head64.c | 28 +++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 8b914a833ac6..4a0953857cb2 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -621,10 +621,10 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) return -1; do { - unsigned long start = biosmap->addr; - unsigned long size = biosmap->size; - unsigned long end = start + size; - unsigned long type = biosmap->type; + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; /* Overflow in 64 bits? Ignore the memory map. */ if (start > end) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b684552347df..269a6b481fe6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -55,12 +55,30 @@ static void __init copy_bootdata(char *real_mode_data) /* * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. */ -static __init void reserve_ebda(void) +static void __init reserve_ebda_region(void) { unsigned int lowmem, ebda_addr; + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ +#ifdef CONFIG_PARAVIRT + if ((boot_params.hdr.version >= 0x207) && + (boot_params.hdr.hardware_subarch != 0)) { + return; + } +#endif + /* end of low (conventional) memory */ lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); lowmem <<= 10; @@ -80,8 +98,8 @@ static __init void reserve_ebda(void) lowmem = 0x9f000; /* Paranoia: should never happen, but... */ - if (lowmem >= 0x100000) - lowmem = 0xa0000; + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; /* reserve all memory between lowmem and the 1MB mark */ reserve_early(lowmem, 0x100000, "BIOS reserved"); @@ -140,7 +158,7 @@ void __init x86_64_start_kernel(char * real_mode_data) reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } - reserve_ebda(); + reserve_ebda_region(); /* * At this point everything still needed from the boot loader -- cgit v1.2.3 From 3c2047cd32b1a8c782d7efab72707e7daa251625 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 4 Mar 2008 23:07:50 +1100 Subject: x86: if we cannot calibrate the TSC, we panic. The current tsc_init() clears the TSC feature bit if the TSC khz cannot be calculated, causing us to panic in arch/x86/kernel/cpu/bugs.c check_config(). We should simply mark it unstable. Frankly, someone should take an axe to this code. mark_tsc_unstable() not only marks it unstable, but sets tsc_enabled to 0, which seems redundant but is actually important here because means it won't be used by sched_clock() either. Perhaps a tristate enum "UNUSABLE, UNSTABLE, OK" would be clearer, and separate mark_tsc_unstable() and mark_tsc_broken() functions? Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_32.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index c2241e04ea5f..68657d8526fb 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -392,13 +392,15 @@ void __init tsc_init(void) int cpu; if (!cpu_has_tsc) - goto out_no_tsc; + return; cpu_khz = calculate_cpu_khz(); tsc_khz = cpu_khz; - if (!cpu_khz) - goto out_no_tsc; + if (!cpu_khz) { + mark_tsc_unstable("could not calculate TSC khz"); + return; + } printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, @@ -431,9 +433,4 @@ void __init tsc_init(void) tsc_enabled = 1; clocksource_register(&clocksource_tsc); - - return; - -out_no_tsc: - setup_clear_cpu_cap(X86_FEATURE_TSC); } -- cgit v1.2.3 From 2fde61fdb00c2337efc56cfbb05bde8a42864e65 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 4 Mar 2008 19:57:42 +0100 Subject: x86: reserve end-of-conventional-memory to 1MB, 32-bit, use paravirt_enabled Jeremy Fitzhardinge pointed out that looking at the boot_params struct to determine if the system is running in a paravirtual environment is not reliable for the Xen case, currently. He also points out that there already exists a function to determine if the system is running in a paravirtual environment. So let's use that instead. This gets rid of the preprocessor test too. Signed-off-by: Alexander van Heukelum Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_32.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 14e293edd23f..fd639d9f79b6 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -64,6 +64,7 @@ #include #include #include +#include /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* @@ -408,12 +409,8 @@ static void __init reserve_ebda_region(void) /* that area is absent. We'll just have to assume */ /* that the paravirt case can handle memory setup */ /* correctly, without our help. */ -#ifdef CONFIG_PARAVIRT - if ((boot_params.hdr.version >= 0x207) && - (boot_params.hdr.hardware_subarch != 0)) { + if (paravirt_enabled()) return; - } -#endif /* end of low (conventional) memory */ lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); -- cgit v1.2.3 From ecd94c0809eb0ff50b628fa061c531a6fbf2fbbc Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 4 Mar 2008 20:12:28 +0100 Subject: x86: reserve end-of-conventional-memory to 1MB, 64-bit, use paravirt_enabled Jeremy Fitzhardinge pointed out that looking at the boot_params struct to determine if the system is running in a paravirtual environment is not reliable for the Xen case, currently. He also points out that there already exists a function to determine if the system is running in a paravirtual environment. So let's use that instead. This gets rid of the preprocessor test too. Signed-off-by: Alexander van Heukelum Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 269a6b481fe6..48be76cda93b 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -72,12 +72,8 @@ static void __init reserve_ebda_region(void) /* that area is absent. We'll just have to assume */ /* that the paravirt case can handle memory setup */ /* correctly, without our help. */ -#ifdef CONFIG_PARAVIRT - if ((boot_params.hdr.version >= 0x207) && - (boot_params.hdr.hardware_subarch != 0)) { + if (paravirt_enabled()) return; - } -#endif /* end of low (conventional) memory */ lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); -- cgit v1.2.3 From c76cb36846da6d5d6fb2951968869faa4fd1001d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:33 -0300 Subject: x86: move smp_ops extern declaration to common header the smp_ops symbol is temporarily defined in smp_64.c, but it will soon be unified Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 2fd74b06db67..80dba12b56af 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -528,3 +528,5 @@ asmlinkage void smp_call_function_interrupt(void) } } +struct smp_ops smp_ops; +EXPORT_SYMBOL_GPL(smp_ops); -- cgit v1.2.3 From 8678969e60d80527d96d2af0011e72c87c9c1fe5 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:34 -0300 Subject: x86: merge smp_send_reschedule function definition is moved to common header, x86_64 version is now called native_smp_send_reschedule Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 80dba12b56af..fd1816123496 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -290,8 +290,9 @@ void flush_tlb_all(void) * anything. Worst case is that we lose a reschedule ... */ -void smp_send_reschedule(int cpu) +static void native_smp_send_reschedule(int cpu) { + WARN_ON(cpu_is_offline(cpu)); send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } @@ -528,5 +529,7 @@ asmlinkage void smp_call_function_interrupt(void) } } -struct smp_ops smp_ops; +struct smp_ops smp_ops = { + .smp_send_reschedule = native_smp_send_reschedule, +}; EXPORT_SYMBOL_GPL(smp_ops); -- cgit v1.2.3 From 64b1a21e0924dca7ea3b7cf4287fa719c8ba7fc5 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:35 -0300 Subject: x86: unify smp_call_function_mask definition is moved to common header, x86_64 function name now is native_smp_call_function_mask Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index fd1816123496..225b765db5a2 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -386,9 +386,9 @@ static int __smp_call_function_mask(cpumask_t mask, * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) +int native_smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { int ret; @@ -531,5 +531,6 @@ asmlinkage void smp_call_function_interrupt(void) struct smp_ops smp_ops = { .smp_send_reschedule = native_smp_send_reschedule, + .smp_call_function_mask = native_smp_call_function_mask, }; EXPORT_SYMBOL_GPL(smp_ops); -- cgit v1.2.3 From 71d195492a6e0b22135a7156af1b41c0f99a116b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:36 -0300 Subject: x86: unify __cpu_up. function definition is moved to common header. x86_64 version is now called native_cpu_up Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 1 + arch/x86/kernel/smpboot_64.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 225b765db5a2..7cc20a3c6c19 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -532,5 +532,6 @@ asmlinkage void smp_call_function_interrupt(void) struct smp_ops smp_ops = { .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, + .cpu_up = native_cpu_up, }; EXPORT_SYMBOL_GPL(smp_ops); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 0880f2c388a9..e381fe7792c4 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -929,7 +929,7 @@ void __init smp_prepare_boot_cpu(void) /* * Entry point to boot a CPU. */ -int __cpuinit __cpu_up(unsigned int cpu) +int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = cpu_present_to_apicid(cpu); unsigned long flags; -- cgit v1.2.3 From 1e3fac83da056f26bcb96e13967c157de55bf2ef Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:37 -0300 Subject: x86: unify prepare_boot_cpu definition is moved to common header. x86_64 version is now called native_prepare_boot_cpu Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 1 + arch/x86/kernel/smpboot_64.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 7cc20a3c6c19..05116c1ddb6f 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -530,6 +530,7 @@ asmlinkage void smp_call_function_interrupt(void) } struct smp_ops smp_ops = { + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, .cpu_up = native_cpu_up, diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index e381fe7792c4..47e654cdc92d 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -918,7 +918,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* * Early setup to make printk work. */ -void __init smp_prepare_boot_cpu(void) +void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); /* already set me in cpu_online_map in boot_cpu_init() */ -- cgit v1.2.3 From 7557da67208f6ed3a1073594b7597bf20c9eb63a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:38 -0300 Subject: x86: unify smp_prepare_cpus definition is moved to common header. x86_64 version is now called native_smp_prepare_cpus Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 1 + arch/x86/kernel/smpboot_64.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 05116c1ddb6f..c520374be1a8 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -531,6 +531,7 @@ asmlinkage void smp_call_function_interrupt(void) struct smp_ops smp_ops = { .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, .cpu_up = native_cpu_up, diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 47e654cdc92d..b106983050b0 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -867,7 +867,7 @@ static void __init smp_cpu_index_default(void) * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. */ -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init native_smp_prepare_cpus(unsigned int max_cpus) { nmi_watchdog_default(); smp_cpu_index_default(); -- cgit v1.2.3 From c559764923dacef301116a248695856e6eb96e48 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:39 -0300 Subject: x86: unify smp_cpus_done definition is moved to common header. x86_64 version is now called native_smp_cpus_done Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 2 ++ arch/x86/kernel/smpboot_64.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index c520374be1a8..275101ab4b6d 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -532,6 +532,8 @@ asmlinkage void smp_call_function_interrupt(void) struct smp_ops smp_ops = { .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_cpus = native_smp_prepare_cpus, + .smp_cpus_done = native_smp_cpus_done, + .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, .cpu_up = native_cpu_up, diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index b106983050b0..fd0d3a93b995 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -987,7 +987,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) /* * Finish the SMP boot. */ -void __init smp_cpus_done(unsigned int max_cpus) +void __init native_smp_cpus_done(unsigned int max_cpus) { smp_cleanup_boot(); setup_ioapic_dest(); -- cgit v1.2.3 From 7b1292e2371e3ae2ac69fbb899d539ddc7b53a27 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:41 -0300 Subject: x86: use disabled_cpus in i386 this patch allows a cpu to be marked as present but disabled in i386, just as x86_64 currently does. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index f349e68e45a0..b2aded3fbfec 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -70,6 +70,8 @@ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; + /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; @@ -108,8 +110,10 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) int ver, apicid; physid_mask_t phys_cpu; - if (!(m->mpc_cpuflag & CPU_ENABLED)) + if (!(m->mpc_cpuflag & CPU_ENABLED)) { + disabled_cpus++; return; + } apicid = mpc_apic_id(m, translation_table[mpc_record]); -- cgit v1.2.3 From 68a1c3f8cd893f5c3c1396fec5be7d8acac4fc93 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:42 -0300 Subject: x86: move prefill_possible_map to common file this patches moves prefill_possible_map() to smpboot.c Right now it is x86_64-specific, but nothing intrinsically prevents it to be used by i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/smpboot.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_64.c | 51 ------------------------------------------ 3 files changed, 54 insertions(+), 52 deletions(-) create mode 100644 arch/x86/kernel/smpboot.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index df10327182d4..4c68bfc6df1d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,7 +46,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o +obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smpboot.o tsc_sync.o obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c new file mode 100644 index 000000000000..bffe10861390 --- /dev/null +++ b/arch/x86/kernel/smpboot.c @@ -0,0 +1,53 @@ +#include +#include + +#ifdef CONFIG_HOTPLUG_CPU + +int additional_cpus __initdata = -1; + +static __init int setup_additional_cpus(char *s) +{ + return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; +} +early_param("additional_cpus", setup_additional_cpus); + +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with additional_cpus=NUM + * - Otherwise don't reserve additional CPUs. + * We do this because additional CPUs waste a lot of memory. + * -AK + */ +__init void prefill_possible_map(void) +{ + int i; + int possible; + + if (additional_cpus == -1) { + if (disabled_cpus > 0) + additional_cpus = disabled_cpus; + else + additional_cpus = 0; + } + possible = num_processors + additional_cpus; + if (possible > NR_CPUS) + possible = NR_CPUS; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, max_t(int, possible - num_processors, 0)); + + for (i = 0; i < possible; i++) + cpu_set(i, cpu_possible_map); +} +#endif + diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index fd0d3a93b995..953b0ff72b65 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -749,51 +749,6 @@ static __init void disable_smp(void) cpu_set(0, per_cpu(cpu_core_map, 0)); } -#ifdef CONFIG_HOTPLUG_CPU - -int additional_cpus __initdata = -1; - -/* - * cpu_possible_map should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and dont expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_map on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with additional_cpus=NUM - * - Otherwise don't reserve additional CPUs. - * We do this because additional CPUs waste a lot of memory. - * -AK - */ -__init void prefill_possible_map(void) -{ - int i; - int possible; - - if (additional_cpus == -1) { - if (disabled_cpus > 0) - additional_cpus = disabled_cpus; - else - additional_cpus = 0; - } - possible = num_processors + additional_cpus; - if (possible > NR_CPUS) - possible = NR_CPUS; - - printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", - possible, - max_t(int, possible - num_processors, 0)); - - for (i = 0; i < possible; i++) - cpu_set(i, cpu_possible_map); -} -#endif - /* * Various sanity checks. */ @@ -1087,12 +1042,6 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } -static __init int setup_additional_cpus(char *s) -{ - return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; -} -early_param("additional_cpus", setup_additional_cpus); - #else /* ... !CONFIG_HOTPLUG_CPU */ int __cpu_disable(void) -- cgit v1.2.3 From 7930e53422d3d06ea873199f66c288806d37cc94 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:43 -0300 Subject: x86: remove export for smp_call_function_mask. with this removal, exports for both i386 and x86_64, regarding the "smp_call_function" series are now the same. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 275101ab4b6d..a434f6c55f83 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -400,7 +400,6 @@ int native_smp_call_function_mask(cpumask_t mask, spin_unlock(&call_lock); return ret; } -EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU -- cgit v1.2.3 From 3428f3d6caa3bc2adde050a2771a2821eb46f901 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:44 -0300 Subject: x86: remove irqs disabled warning. there's already a warning in the topmost function Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index a434f6c55f83..b040224927ca 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -420,9 +420,6 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, /* prevent preemption and reschedule on another processor */ int ret, me = get_cpu(); - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - if (cpu == me) { local_irq_disable(); func(info); -- cgit v1.2.3 From e32640a2cd530e1259a06e34a72b0cdb73738ce2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:45 -0300 Subject: x86: create smpcommon.c This patch creates smpcommon.c with functions that are equal between architectures. The i386-only init_gdt is ifdef'd. Note that smpcommon.o figures twice in the Makefile: this is because sub-architectures like voyager that does not use the normal smp_$(BITS) files also have to access them Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/smp_64.c | 56 ---------------------------- arch/x86/kernel/smpcommon.c | 83 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpcommon_32.c | 81 ----------------------------------------- 4 files changed, 85 insertions(+), 139 deletions(-) create mode 100644 arch/x86/kernel/smpcommon.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c68bfc6df1d..018d04d880db 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -47,8 +47,8 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smpboot.o tsc_sync.o -obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o -obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o +obj-$(CONFIG_X86_32_SMP) += smpcommon.o +obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse_$(BITS).o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index b040224927ca..1d8b863fa357 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -401,62 +401,6 @@ int native_smp_call_function_mask(cpumask_t mask, return ret; } -/* - * smp_call_function_single - Run a function on a specific CPU - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Currently unused. - * @wait: If true, wait until function has completed on other CPUs. - * - * Retrurns 0 on success, else a negative status code. - * - * Does not return until the remote CPU is nearly ready to execute - * or is or has executed. - */ - -int smp_call_function_single (int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - /* prevent preemption and reschedule on another processor */ - int ret, me = get_cpu(); - - if (cpu == me) { - local_irq_disable(); - func(info); - local_irq_enable(); - put_cpu(); - return 0; - } - - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); - - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); - -/* - * smp_call_function - run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: currently unused. - * @wait: If true, wait (atomically) until function has completed on other - * CPUs. - * - * Returns 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute func or are or have executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - * Actually there are a few legal cases, like panic. - */ -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -{ - return smp_call_function_mask(cpu_online_map, func, info, wait); -} -EXPORT_SYMBOL(smp_call_function); - static void stop_this_cpu(void *dummy) { local_irq_disable(); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c new file mode 100644 index 000000000000..3449064d141a --- /dev/null +++ b/arch/x86/kernel/smpcommon.c @@ -0,0 +1,83 @@ +/* + * SMP stuff which is common to all sub-architectures. + */ +#include +#include + +#ifdef CONFIG_X86_32 +DEFINE_PER_CPU(unsigned long, this_cpu_off); +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + +/* Initialize the CPU's GDT. This is either the boot CPU doing itself + (still using the master per-cpu area), or a CPU doing it for a + secondary which will soon come up. */ +__cpuinit void init_gdt(int cpu) +{ + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + pack_descriptor(&gdt[GDT_ENTRY_PERCPU], + __per_cpu_offset[cpu], 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + + gdt[GDT_ENTRY_PERCPU].s = 1; + + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; + per_cpu(cpu_number, cpu) = cpu; +} +#endif + +/** + * smp_call_function(): Run a function on all other CPUs. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Unused. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function(void (*func) (void *info), void *info, int nonatomic, + int wait) +{ + return smp_call_function_mask(cpu_online_map, func, info, wait); +} +EXPORT_SYMBOL(smp_call_function); + +/** + * smp_call_function_single - Run a function on a specific CPU + * @cpu: The target CPU. Cannot be the calling CPU. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + */ +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int ret; + int me = get_cpu(); + if (cpu == me) { + local_irq_disable(); + func(info); + local_irq_enable(); + put_cpu(); + return 0; + } + + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + + put_cpu(); + return ret; +} +EXPORT_SYMBOL(smp_call_function_single); diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c index 8bc38af29aef..8b137891791f 100644 --- a/arch/x86/kernel/smpcommon_32.c +++ b/arch/x86/kernel/smpcommon_32.c @@ -1,82 +1 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include -#include -DEFINE_PER_CPU(unsigned long, this_cpu_off); -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct *gdt = get_cpu_gdt_table(cpu); - - pack_descriptor(&gdt[GDT_ENTRY_PERCPU], - __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - - gdt[GDT_ENTRY_PERCPU].s = 1; - - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; - per_cpu(cpu_number, cpu) = cpu; -} - - -/** - * smp_call_function(): Run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Unused. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int smp_call_function(void (*func) (void *info), void *info, int nonatomic, - int wait) -{ - return smp_call_function_mask(cpu_online_map, func, info, wait); -} -EXPORT_SYMBOL(smp_call_function); - -/** - * smp_call_function_single - Run a function on a specific CPU - * @cpu: The target CPU. Cannot be the calling CPU. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Unused. - * @wait: If true, wait until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - /* prevent preemption and reschedule on another processor */ - int ret; - int me = get_cpu(); - if (cpu == me) { - local_irq_disable(); - func(info); - local_irq_enable(); - put_cpu(); - return 0; - } - - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); - - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); -- cgit v1.2.3 From 3a36d1e435af79ec3bc5ead871e5b22d5558ebf3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:46 -0300 Subject: x86: provide __smp_call_function This function is used in smp_send_stop(). It's like smp_call_function_mask, but always go to all online cpus, and does not take any locks. It is added to x86_64, but will soon be unified in a common file Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 1d8b863fa357..aa2edb7f3a51 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -322,6 +322,38 @@ void unlock_ipi_call_lock(void) spin_unlock_irq(&call_lock); } +static void __smp_call_function(void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = num_online_cpus() - 1; + + if (!cpus) + return; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); +} + + /* * this function sends a 'generic call function' IPI to all other CPU * of the system defined in the mask. @@ -424,7 +456,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); + __smp_call_function(stop_this_cpu, NULL, 0, 0); if (!nolock) spin_unlock(&call_lock); disable_local_APIC(); -- cgit v1.2.3 From 2513926c286ca1d0d189c206966011bdd4080354 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:47 -0300 Subject: x86: change x86_64 smp_call_function_mask to look alike i386 the two versions (the inner version, and the outer version, that takes the locks) of smp_call_function_mask are made into one. With the changes, i386 and x86_64 versions look exactly the same. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_32.c | 2 +- arch/x86/kernel/smp_64.c | 57 ++++++++++++++---------------------------------- 2 files changed, 17 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index dc0cde9d16fb..e4a6b669a0b8 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -583,7 +583,7 @@ native_smp_call_function_mask(cpumask_t mask, atomic_set(&data.finished, 0); call_data = &data; - mb(); + wmb(); /* Send a message to other CPUs */ if (cpus_equal(mask, allbutself)) diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index aa2edb7f3a51..e4494e829dfa 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -354,26 +354,30 @@ static void __smp_call_function(void (*func) (void *info), void *info, } -/* - * this function sends a 'generic call function' IPI to all other CPU - * of the system defined in the mask. - */ -static int __smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) +int native_smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; cpumask_t allbutself; int cpus; + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); cpus_and(mask, mask, allbutself); cpus = cpus_weight(mask); - if (!cpus) + if (!cpus) { + spin_unlock(&call_lock); return 0; + } data.func = func; data.info = info; @@ -395,43 +399,14 @@ static int __smp_call_function_mask(cpumask_t mask, while (atomic_read(&data.started) != cpus) cpu_relax(); - if (!wait) - return 0; + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); - while (atomic_read(&data.finished) != cpus) - cpu_relax(); + spin_unlock(&call_lock); return 0; } -/** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int native_smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) -{ - int ret; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - spin_lock(&call_lock); - ret = __smp_call_function_mask(mask, func, info, wait); - spin_unlock(&call_lock); - return ret; -} static void stop_this_cpu(void *dummy) { -- cgit v1.2.3 From 3be5b49e8f1002bc562a2b4670093e4ebf27b4e9 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:49 -0300 Subject: x86: make stop_this_cpu looks exactly equal in both arches with the hlt_works change, it is possible to have i386 and x86_64 stop_this_cpu() looking exactly the same. They can, after that, be merged. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_32.c | 2 +- arch/x86/kernel/smp_64.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index e4a6b669a0b8..cde3a0ecd716 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -611,7 +611,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_local_APIC(); - if (cpu_data(smp_processor_id()).hlt_works_ok) + if (hlt_works(smp_processor_id())) for(;;) halt(); for (;;); } diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index e4494e829dfa..4e1e2bce969c 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -416,8 +416,9 @@ static void stop_this_cpu(void *dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_local_APIC(); - for (;;) - halt(); + if (hlt_works(smp_processor_id())) + for (;;) halt(); + for (;;); } void smp_send_stop(void) -- cgit v1.2.3 From 321183c145a37e6d31cc55e0f69a226f9006e621 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:50 -0300 Subject: x86: add reboot_force test to native_smp_send_stop This can be safely added to i386. After that, functions look exactly the same for both arches Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index cde3a0ecd716..8be3e091dcd0 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * Some notes on x86 processor bugs affecting SMP operation: @@ -622,10 +623,14 @@ static void stop_this_cpu (void * dummy) static void native_smp_send_stop(void) { - /* Don't deadlock on the call lock in panic */ - int nolock = !spin_trylock(&call_lock); + int nolock; unsigned long flags; + if (reboot_force) + return; + + /* Don't deadlock on the call lock in panic */ + nolock = !spin_trylock(&call_lock); local_irq_save(flags); __smp_call_function(stop_this_cpu, NULL, 0, 0); if (!nolock) -- cgit v1.2.3 From 377d698426b8c685fb6d48fe89694fe4ce3aa1f8 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:51 -0300 Subject: x86: unify smp_send_stop function definition is moved to common header. x86_64 version is now called native_smp_send_stop Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 4e1e2bce969c..ad11ef0c3fae 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -421,7 +421,7 @@ static void stop_this_cpu(void *dummy) for (;;); } -void smp_send_stop(void) +void native_smp_send_stop(void) { int nolock; unsigned long flags; @@ -482,6 +482,7 @@ struct smp_ops smp_ops = { .smp_prepare_cpus = native_smp_prepare_cpus, .smp_cpus_done = native_smp_cpus_done, + .smp_send_stop = native_smp_send_stop, .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, .cpu_up = native_cpu_up, -- cgit v1.2.3 From f9e47a126be2eaabf04a1a5c71ca7b23a473d0d8 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:52 -0300 Subject: x86: create smp.c this patch moves all the functions and data structures that look like exactly the same from smp_{32,64}.c to smp.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 3 +- arch/x86/kernel/smp.c | 253 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smp_32.c | 223 ----------------------------------------- arch/x86/kernel/smp_64.c | 205 -------------------------------------- 4 files changed, 255 insertions(+), 429 deletions(-) create mode 100644 arch/x86/kernel/smp.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 018d04d880db..0a4b088bab5d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,7 +46,8 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smpboot.o tsc_sync.o +obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smp.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c new file mode 100644 index 000000000000..b662300a88f3 --- /dev/null +++ b/arch/x86/kernel/smp.c @@ -0,0 +1,253 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#ifdef CONFIG_X86_32 +#include +#include +#else +#include +#endif + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +static void native_smp_send_reschedule(int cpu) +{ + WARN_ON(cpu_is_offline(cpu)); + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static DEFINE_SPINLOCK(call_lock); + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + +static struct call_data_struct *call_data; + +static void __smp_call_function(void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = num_online_cpus() - 1; + + if (!cpus) + return; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); +} + + +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +static int +native_smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + struct call_data_struct data; + cpumask_t allbutself; + int cpus; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) { + spin_unlock(&call_lock); + return 0; + } + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + wmb(); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); + spin_unlock(&call_lock); + + return 0; +} + +static void stop_this_cpu(void *dummy) +{ + local_irq_disable(); + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + disable_local_APIC(); + if (hlt_works(smp_processor_id())) + for (;;) halt(); + for (;;); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ + +static void native_smp_send_stop(void) +{ + int nolock; + unsigned long flags; + + if (reboot_force) + return; + + /* Don't deadlock on the call lock in panic */ + nolock = !spin_trylock(&call_lock); + local_irq_save(flags); + __smp_call_function(stop_this_cpu, NULL, 0, 0); + if (!nolock) + spin_unlock(&call_lock); + disable_local_APIC(); + local_irq_restore(flags); +} + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +void smp_reschedule_interrupt(struct pt_regs *regs) +{ + ack_APIC_irq(); +#ifdef CONFIG_X86_32 + __get_cpu_var(irq_stat).irq_resched_count++; +#else + add_pda(irq_resched_count, 1); +#endif +} + +void smp_call_function_interrupt(struct pt_regs *regs) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + ack_APIC_irq(); + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + irq_enter(); + (*func)(info); +#ifdef CONFIG_X86_32 + __get_cpu_var(irq_stat).irq_call_count++; +#else + add_pda(irq_call_count, 1); +#endif + irq_exit(); + + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } +} + +struct smp_ops smp_ops = { + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, + .cpu_up = native_cpu_up, + .smp_cpus_done = native_smp_cpus_done, + + .smp_send_stop = native_smp_send_stop, + .smp_send_reschedule = native_smp_send_reschedule, + .smp_call_function_mask = native_smp_call_function_mask, +}; +EXPORT_SYMBOL_GPL(smp_ops); + diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 8be3e091dcd0..61e546e85733 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -466,217 +466,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1, 1); } -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static void native_smp_send_reschedule(int cpu) -{ - WARN_ON(cpu_is_offline(cpu)); - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -} - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -void lock_ipi_call_lock(void) -{ - spin_lock_irq(&call_lock); -} - -void unlock_ipi_call_lock(void) -{ - spin_unlock_irq(&call_lock); -} - -static struct call_data_struct *call_data; - -static void __smp_call_function(void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus() - 1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - mb(); - - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - - -/** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -static int -native_smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) -{ - struct call_data_struct data; - cpumask_t allbutself; - int cpus; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - /* Holding any lock stops cpus from going down. */ - spin_lock(&call_lock); - - allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - - cpus_and(mask, mask, allbutself); - cpus = cpus_weight(mask); - - if (!cpus) { - spin_unlock(&call_lock); - return 0; - } - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - - /* Send a message to other CPUs */ - if (cpus_equal(mask, allbutself)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - spin_unlock(&call_lock); - - return 0; -} - -static void stop_this_cpu (void * dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_local_APIC(); - if (hlt_works(smp_processor_id())) - for(;;) halt(); - for (;;); -} - -/* - * this function calls the 'stop' function on all other CPUs in the system. - */ - -static void native_smp_send_stop(void) -{ - int nolock; - unsigned long flags; - - if (reboot_force) - return; - - /* Don't deadlock on the call lock in panic */ - nolock = !spin_trylock(&call_lock); - local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); - if (!nolock) - spin_unlock(&call_lock); - disable_local_APIC(); - local_irq_restore(flags); -} - -/* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. - */ -void smp_reschedule_interrupt(struct pt_regs *regs) -{ - ack_APIC_irq(); - __get_cpu_var(irq_stat).irq_resched_count++; -} - -void smp_call_function_interrupt(struct pt_regs *regs) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - ack_APIC_irq(); - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - irq_enter(); - (*func)(info); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); - - if (wait) { - mb(); - atomic_inc(&call_data->finished); - } -} - static int convert_apicid_to_cpu(int apic_id) { int i; @@ -703,15 +492,3 @@ int safe_smp_processor_id(void) return cpuid >= 0 ? cpuid : 0; } - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .cpu_up = native_cpu_up, - .smp_cpus_done = native_smp_cpus_done, - - .smp_send_stop = native_smp_send_stop, - .smp_send_reschedule = native_smp_send_reschedule, - .smp_call_function_mask = native_smp_call_function_mask, -}; -EXPORT_SYMBOL_GPL(smp_ops); diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index ad11ef0c3fae..d28e8685709d 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -283,208 +283,3 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, NULL, 1, 1); } - -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ - -static void native_smp_send_reschedule(int cpu) -{ - WARN_ON(cpu_is_offline(cpu)); - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -} - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -static struct call_data_struct * call_data; - -void lock_ipi_call_lock(void) -{ - spin_lock_irq(&call_lock); -} - -void unlock_ipi_call_lock(void) -{ - spin_unlock_irq(&call_lock); -} - -static void __smp_call_function(void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus() - 1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - mb(); - - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - - -int native_smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) -{ - struct call_data_struct data; - cpumask_t allbutself; - int cpus; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - /* Holding any lock stops cpus from going down. */ - spin_lock(&call_lock); - - allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - - cpus_and(mask, mask, allbutself); - cpus = cpus_weight(mask); - - if (!cpus) { - spin_unlock(&call_lock); - return 0; - } - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - - /* Send a message to other CPUs */ - if (cpus_equal(mask, allbutself)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - - spin_unlock(&call_lock); - - return 0; -} - -static void stop_this_cpu(void *dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_local_APIC(); - if (hlt_works(smp_processor_id())) - for (;;) halt(); - for (;;); -} - -void native_smp_send_stop(void) -{ - int nolock; - unsigned long flags; - - if (reboot_force) - return; - - /* Don't deadlock on the call lock in panic */ - nolock = !spin_trylock(&call_lock); - local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); - if (!nolock) - spin_unlock(&call_lock); - disable_local_APIC(); - local_irq_restore(flags); -} - -/* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. - */ -asmlinkage void smp_reschedule_interrupt(void) -{ - ack_APIC_irq(); - add_pda(irq_resched_count, 1); -} - -asmlinkage void smp_call_function_interrupt(void) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - ack_APIC_irq(); - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - exit_idle(); - irq_enter(); - (*func)(info); - add_pda(irq_call_count, 1); - irq_exit(); - if (wait) { - mb(); - atomic_inc(&call_data->finished); - } -} - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .smp_cpus_done = native_smp_cpus_done, - - .smp_send_stop = native_smp_send_stop, - .smp_send_reschedule = native_smp_send_reschedule, - .smp_call_function_mask = native_smp_call_function_mask, - .cpu_up = native_cpu_up, -}; -EXPORT_SYMBOL_GPL(smp_ops); -- cgit v1.2.3 From 8202350367ac11d571f6dd4c21c2027a4d235276 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:53 -0300 Subject: x86: create ipi.c This patch moves all ipi and apic related functions from smp_32.c to ipi.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/ipi.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smp_32.c | 153 ---------------------------------------- 3 files changed, 179 insertions(+), 154 deletions(-) create mode 100644 arch/x86/kernel/ipi.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0a4b088bab5d..e3b01f96c565 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c new file mode 100644 index 000000000000..c0df7b89ca23 --- /dev/null +++ b/arch/x86/kernel/ipi.c @@ -0,0 +1,178 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_32 +#include +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline int __prepare_ICR(unsigned int shortcut, int vector) +{ + unsigned int icr = shortcut | APIC_DEST_LOGICAL; + + switch (vector) { + default: + icr |= APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + icr |= APIC_DM_NMI; + break; + } + return icr; +} + +static inline int __prepare_ICR2(unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +void __send_IPI_shortcut(unsigned int shortcut, int vector) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +} + +void send_IPI_self(int vector) +{ + __send_IPI_shortcut(APIC_DEST_SELF, vector); +} + +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +static inline void __send_IPI_dest_field(unsigned long mask, int vector) +{ + unsigned long cfg; + + /* + * Wait for idle. + */ + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +} + +/* + * This is only used on smaller machines. + */ +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) +{ + unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long flags; + + local_irq_save(flags); + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); + __send_IPI_dest_field(mask, vector); + local_irq_restore(flags); +} + +void send_IPI_mask_sequence(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + for_each_possible_cpu(query_cpu) { + if (cpu_isset(query_cpu, mask)) { + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), + vector); + } + } + local_irq_restore(flags); +} + +/* must come after the send_IPI functions above for inlining */ +#include +static int convert_apicid_to_cpu(int apic_id) +{ + int i; + + for_each_possible_cpu(i) { + if (per_cpu(x86_cpu_to_apicid, i) == apic_id) + return i; + } + return -1; +} + +int safe_smp_processor_id(void) +{ + int apicid, cpuid; + + if (!boot_cpu_has(X86_FEATURE_APIC)) + return 0; + + apicid = hard_smp_processor_id(); + if (apicid == BAD_APICID) + return 0; + + cpuid = convert_apicid_to_cpu(apicid); + + return cpuid >= 0 ? cpuid : 0; +} +#endif diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 61e546e85733..d80623aba9c5 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -107,132 +107,6 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline int __prepare_ICR (unsigned int shortcut, int vector) -{ - unsigned int icr = shortcut | APIC_DEST_LOGICAL; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2 (unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -void __send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} - -void send_IPI_self(int vector) -{ - __send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -static inline void __send_IPI_dest_field(unsigned long mask, int vector) -{ - unsigned long cfg; - - /* - * Wait for idle. - */ - if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); - else - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - apic_write_around(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} - -/* - * This is only used on smaller machines. - */ -void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) -{ - unsigned long mask = cpus_addr(cpumask)[0]; - unsigned long flags; - - local_irq_save(flags); - WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); - __send_IPI_dest_field(mask, vector); - local_irq_restore(flags); -} - -void send_IPI_mask_sequence(cpumask_t mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_possible_cpu(query_cpu) { - if (cpu_isset(query_cpu, mask)) { - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), - vector); - } - } - local_irq_restore(flags); -} - #include /* must come after the send_IPI functions above for inlining */ /* @@ -465,30 +339,3 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, NULL, 1, 1); } - -static int convert_apicid_to_cpu(int apic_id) -{ - int i; - - for_each_possible_cpu(i) { - if (per_cpu(x86_cpu_to_apicid, i) == apic_id) - return i; - } - return -1; -} - -int safe_smp_processor_id(void) -{ - int apicid, cpuid; - - if (!boot_cpu_has(X86_FEATURE_APIC)) - return 0; - - apicid = hard_smp_processor_id(); - if (apicid == BAD_APICID) - return 0; - - cpuid = convert_apicid_to_cpu(apicid); - - return cpuid >= 0 ? cpuid : 0; -} -- cgit v1.2.3 From c048fdfe6178e082be918d4062c86d9764979112 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:54 -0300 Subject: x86: create tlb files this patch creates tlb_32.c and tlb_64.c, with tlb-related functions that used to live in smp*.c files. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/smp_32.c | 235 ---------------------------------------- arch/x86/kernel/smp_64.c | 275 ----------------------------------------------- arch/x86/kernel/tlb_32.c | 243 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/tlb_64.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 517 insertions(+), 511 deletions(-) create mode 100644 arch/x86/kernel/tlb_32.c create mode 100644 arch/x86/kernel/tlb_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e3b01f96c565..362ab6a9d5b2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index d80623aba9c5..d8fdec5f19bc 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -104,238 +104,3 @@ * or are signal timing bugs worked around in hardware and there's * about nothing of note with C stepping upwards. */ - -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; - -#include /* must come after the send_IPI functions above for inlining */ - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - */ - -static cpumask_t flush_cpumask; -static struct mm_struct * flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -void leave_mm(int cpu) -{ - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ - -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned long cpu; - - cpu = get_cpu(); - - if (!cpu_isset(cpu, flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); - smp_mb__after_clear_bit(); -out: - put_cpu_no_resched(); - __get_cpu_var(irq_stat).irq_tlb_count++; -} - -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) -{ - cpumask_t cpumask = *cpumaskp; - - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (unlikely(cpus_empty(cpumask))) - return; -#endif - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * AK: x86-64 has a faster method that could be ported. - */ - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); - - while (!cpus_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ - cpu_relax(); - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm (struct mm_struct * mm) -{ - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if(current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} -EXPORT_SYMBOL(flush_tlb_page); - -static void do_flush_tlb_all(void* info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); -} diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index d28e8685709d..26448fff0abd 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -8,278 +8,3 @@ * This code is released under the GNU General Public License version 2 or * later. */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - * - * More scalable flush, from Andi Kleen - * - * To avoid global state use 8 different call vectors. - * Each CPU uses a specific vector to trigger flushes on other - * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. - * - * With more than 8 CPUs they are hashed to the 8 available - * vectors. The limited global vector space forces us to this right now. - * In future when interrupts are split into per CPU domains this could be - * fixed, at the cost of triggering multiple IPIs in some cases. - */ - -union smp_flush_state { - struct { - cpumask_t flush_cpumask; - struct mm_struct *flush_mm; - unsigned long flush_va; - spinlock_t tlbstate_lock; - }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; - -/* State is put into the per CPU data section, but padded - to a full cache line because other CPUs can access it and we don't - want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - */ -void leave_mm(int cpu) -{ - if (read_pda(mmu_state) == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu mmu_state to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu mmu_state to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu mmu_state is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) -{ - int cpu; - int sender; - union smp_flush_state *f; - - cpu = smp_processor_id(); - /* - * orig_rax contains the negated interrupt vector. - * Use that to determine where the sender put the data. - */ - sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); - - if (!cpu_isset(cpu, f->flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { - if (f->flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(f->flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - cpu_clear(cpu, f->flush_cpumask); - add_pda(irq_tlb_count, 1); -} - -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) -{ - int sender; - union smp_flush_state *f; - cpumask_t cpumask = *cpumaskp; - - /* Caller has disabled preemption */ - sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); - - /* - * Could avoid this lock when - * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is - * probably not worth checking this for a cache-hot lock. - */ - spin_lock(&f->tlbstate_lock); - - f->flush_mm = mm; - f->flush_va = va; - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); - - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); - - while (!cpus_empty(f->flush_cpumask)) - cpu_relax(); - - f->flush_mm = NULL; - f->flush_va = 0; - spin_unlock(&f->tlbstate_lock); -} - -int __cpuinit init_smp_flush(void) -{ - int i; - - for_each_cpu_mask(i, cpu_possible_map) { - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); - } - return 0; -} -core_initcall(init_smp_flush); - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm (struct mm_struct * mm) -{ - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if(current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} - -static void do_flush_tlb_all(void* info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); -} diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c new file mode 100644 index 000000000000..9bb2363851af --- /dev/null +++ b/arch/x86/kernel/tlb_32.c @@ -0,0 +1,243 @@ +#include +#include +#include + +#include + +DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) + ____cacheline_aligned = { &init_mm, 0, }; + +/* must come after the send_IPI functions above for inlining */ +#include + +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + */ + +static cpumask_t flush_cpumask; +static struct mm_struct *flush_mm; +static unsigned long flush_va; +static DEFINE_SPINLOCK(tlbstate_lock); + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. + * + * We need to reload %cr3 since the page tables may be going + * away from under us.. + */ +void leave_mm(int cpu) +{ + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) + BUG(); + cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); + load_cr3(swapper_pg_dir); +} +EXPORT_SYMBOL_GPL(leave_mm); + +/* + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superfluous + * tlb flush. + * 1a2) set cpu_tlbstate to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu_tlbstate[].active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu_tlbstate[].active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu_tlbstate to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu_tlbstate is local to each cpu, no + * write/read ordering problems. + */ + +/* + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + */ + +void smp_invalidate_interrupt(struct pt_regs *regs) +{ + unsigned long cpu; + + cpu = get_cpu(); + + if (!cpu_isset(cpu, flush_cpumask)) + goto out; + /* + * This was a BUG() but until someone can quote me the + * line from the intel manual that guarantees an IPI to + * multiple CPUs is retried _only_ on the erroring CPUs + * its staying as a return + * + * BUG(); + */ + + if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { + if (flush_va == TLB_FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(flush_va); + } else + leave_mm(cpu); + } + ack_APIC_irq(); + smp_mb__before_clear_bit(); + cpu_clear(cpu, flush_cpumask); + smp_mb__after_clear_bit(); +out: + put_cpu_no_resched(); + __get_cpu_var(irq_stat).irq_tlb_count++; +} + +void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, + unsigned long va) +{ + cpumask_t cpumask = *cpumaskp; + + /* + * A couple of (to be removed) sanity checks: + * + * - current CPU must not be in mask + * - mask must exist :) + */ + BUG_ON(cpus_empty(cpumask)); + BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(!mm); + +#ifdef CONFIG_HOTPLUG_CPU + /* If a CPU which we ran on has gone down, OK. */ + cpus_and(cpumask, cpumask, cpu_online_map); + if (unlikely(cpus_empty(cpumask))) + return; +#endif + + /* + * i'm not happy about this global shared spinlock in the + * MM hot path, but we'll see how contended it is. + * AK: x86-64 has a faster method that could be ported. + */ + spin_lock(&tlbstate_lock); + + flush_mm = mm; + flush_va = va; + cpus_or(flush_cpumask, cpumask, flush_cpumask); + /* + * We have to send the IPI only to + * CPUs affected. + */ + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + + while (!cpus_empty(flush_cpumask)) + /* nothing. lockup detection does not belong here */ + cpu_relax(); + + flush_mm = NULL; + flush_va = 0; + spin_unlock(&tlbstate_lock); +} + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + local_flush_tlb(); + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + preempt_enable(); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if (current->mm) + local_flush_tlb(); + else + leave_mm(smp_processor_id()); + } + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if (current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, va); + + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_page); + +static void do_flush_tlb_all(void *info) +{ + unsigned long cpu = smp_processor_id(); + + __flush_tlb_all(); + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) + leave_mm(cpu); +} + +void flush_tlb_all(void) +{ + on_each_cpu(do_flush_tlb_all, NULL, 1, 1); +} + diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c new file mode 100644 index 000000000000..615d84817758 --- /dev/null +++ b/arch/x86/kernel/tlb_64.c @@ -0,0 +1,273 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + * + * More scalable flush, from Andi Kleen + * + * To avoid global state use 8 different call vectors. + * Each CPU uses a specific vector to trigger flushes on other + * CPUs. Depending on the received vector the target CPUs look into + * the right per cpu variable for the flush data. + * + * With more than 8 CPUs they are hashed to the 8 available + * vectors. The limited global vector space forces us to this right now. + * In future when interrupts are split into per CPU domains this could be + * fixed, at the cost of triggering multiple IPIs in some cases. + */ + +union smp_flush_state { + struct { + cpumask_t flush_cpumask; + struct mm_struct *flush_mm; + unsigned long flush_va; + spinlock_t tlbstate_lock; + }; + char pad[SMP_CACHE_BYTES]; +} ____cacheline_aligned; + +/* State is put into the per CPU data section, but padded + to a full cache line because other CPUs can access it and we don't + want false sharing in the per cpu data segment. */ +static DEFINE_PER_CPU(union smp_flush_state, flush_state); + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. + */ +void leave_mm(int cpu) +{ + if (read_pda(mmu_state) == TLBSTATE_OK) + BUG(); + cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + load_cr3(swapper_pg_dir); +} +EXPORT_SYMBOL_GPL(leave_mm); + +/* + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superfluous + * tlb flush. + * 1a2) set cpu mmu_state to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu mmu_state to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu mmu_state is local to each cpu, no + * write/read ordering problems. + */ + +/* + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. + */ + +asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) +{ + int cpu; + int sender; + union smp_flush_state *f; + + cpu = smp_processor_id(); + /* + * orig_rax contains the negated interrupt vector. + * Use that to determine where the sender put the data. + */ + sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; + f = &per_cpu(flush_state, sender); + + if (!cpu_isset(cpu, f->flush_cpumask)) + goto out; + /* + * This was a BUG() but until someone can quote me the + * line from the intel manual that guarantees an IPI to + * multiple CPUs is retried _only_ on the erroring CPUs + * its staying as a return + * + * BUG(); + */ + + if (f->flush_mm == read_pda(active_mm)) { + if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_va == TLB_FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(f->flush_va); + } else + leave_mm(cpu); + } +out: + ack_APIC_irq(); + cpu_clear(cpu, f->flush_cpumask); + add_pda(irq_tlb_count, 1); +} + +void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, + unsigned long va) +{ + int sender; + union smp_flush_state *f; + cpumask_t cpumask = *cpumaskp; + + /* Caller has disabled preemption */ + sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; + f = &per_cpu(flush_state, sender); + + /* + * Could avoid this lock when + * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is + * probably not worth checking this for a cache-hot lock. + */ + spin_lock(&f->tlbstate_lock); + + f->flush_mm = mm; + f->flush_va = va; + cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + + /* + * We have to send the IPI only to + * CPUs affected. + */ + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + + while (!cpus_empty(f->flush_cpumask)) + cpu_relax(); + + f->flush_mm = NULL; + f->flush_va = 0; + spin_unlock(&f->tlbstate_lock); +} + +int __cpuinit init_smp_flush(void) +{ + int i; + + for_each_cpu_mask(i, cpu_possible_map) { + spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + } + return 0; +} +core_initcall(init_smp_flush); + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + local_flush_tlb(); + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + preempt_enable(); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if (current->mm) + local_flush_tlb(); + else + leave_mm(smp_processor_id()); + } + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if (current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, va); + + preempt_enable(); +} + +static void do_flush_tlb_all(void *info) +{ + unsigned long cpu = smp_processor_id(); + + __flush_tlb_all(); + if (read_pda(mmu_state) == TLBSTATE_LAZY) + leave_mm(cpu); +} + +void flush_tlb_all(void) +{ + on_each_cpu(do_flush_tlb_all, NULL, 1, 1); +} -- cgit v1.2.3 From 0941ecb55fbfd2d8bcc62dfd2fcaba1b35f2f196 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:55 -0300 Subject: x86: get rid of smp_32.c and smp_64.c This patch merges the copyright notices, and valuable comments that were left back on smp_{32,64}.c. With that, files are empty, and are deleted Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/smp.c | 91 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smp_32.c | 106 ----------------------------------------------- arch/x86/kernel/smp_64.c | 10 ----- 4 files changed, 93 insertions(+), 118 deletions(-) delete mode 100644 arch/x86/kernel/smp_32.c delete mode 100644 arch/x86/kernel/smp_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 362ab6a9d5b2..c436e747f502 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,10 +46,10 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smp.o +obj-$(CONFIG_X86_SMP) += smpboot_$(BITS).o smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += smpboot_64.o tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse_$(BITS).o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index b662300a88f3..88c1e518a203 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -1,3 +1,16 @@ +/* + * Intel SMP support routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * + * i386 and x86_64 integration by Glauber Costa + * + * This code is released under the GNU General Public License version 2 or + * later. + */ + #include #include @@ -19,6 +32,84 @@ #else #include #endif +/* + * Some notes on x86 processor bugs affecting SMP operation: + * + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. + * The Linux implications for SMP are handled as follows: + * + * Pentium III / [Xeon] + * None of the E1AP-E3AP errata are visible to the user. + * + * E1AP. see PII A1AP + * E2AP. see PII A2AP + * E3AP. see PII A3AP + * + * Pentium II / [Xeon] + * None of the A1AP-A3AP errata are visible to the user. + * + * A1AP. see PPro 1AP + * A2AP. see PPro 2AP + * A3AP. see PPro 7AP + * + * Pentium Pro + * None of 1AP-9AP errata are visible to the normal user, + * except occasional delivery of 'spurious interrupt' as trap #15. + * This is very rare and a non-problem. + * + * 1AP. Linux maps APIC as non-cacheable + * 2AP. worked around in hardware + * 3AP. fixed in C0 and above steppings microcode update. + * Linux does not use excessive STARTUP_IPIs. + * 4AP. worked around in hardware + * 5AP. symmetric IO mode (normal Linux operation) not affected. + * 'noapic' mode has vector 0xf filled out properly. + * 6AP. 'noapic' mode might be affected - fixed in later steppings + * 7AP. We do not assume writes to the LVT deassering IRQs + * 8AP. We do not enable low power mode (deep sleep) during MP bootup + * 9AP. We do not use mixed mode + * + * Pentium + * There is a marginal case where REP MOVS on 100MHz SMP + * machines with B stepping processors can fail. XXX should provide + * an L1cache=Writethrough or L1cache=off option. + * + * B stepping CPUs may hang. There are hardware work arounds + * for this. We warn about it in case your board doesn't have the work + * arounds. Basically that's so I can tell anyone with a B stepping + * CPU and SMP problems "tough". + * + * Specific items [From Pentium Processor Specification Update] + * + * 1AP. Linux doesn't use remote read + * 2AP. Linux doesn't trust APIC errors + * 3AP. We work around this + * 4AP. Linux never generated 3 interrupts of the same priority + * to cause a lost local interrupt. + * 5AP. Remote read is never used + * 6AP. not affected - worked around in hardware + * 7AP. not affected - worked around in hardware + * 8AP. worked around in hardware - we get explicit CS errors if not + * 9AP. only 'noapic' mode affected. Might generate spurious + * interrupts, we log only the first one and count the + * rest silently. + * 10AP. not affected - worked around in hardware + * 11AP. Linux reads the APIC between writes to avoid this, as per + * the documentation. Make sure you preserve this as it affects + * the C stepping chips too. + * 12AP. not affected - worked around in hardware + * 13AP. not affected - worked around in hardware + * 14AP. we always deassert INIT during bootup + * 15AP. not affected - worked around in hardware + * 16AP. not affected - worked around in hardware + * 17AP. not affected - worked around in hardware + * 18AP. not affected - worked around in hardware + * 19AP. not affected - worked around in BIOS + * + * If this sounds worrying believe me these bugs are either ___RARE___, + * or are signal timing bugs worked around in hardware and there's + * about nothing of note with C stepping upwards. + */ /* * this function sends a 'reschedule' IPI to another CPU. diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c deleted file mode 100644 index d8fdec5f19bc..000000000000 --- a/arch/x86/kernel/smp_32.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Intel SMP support routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar - * - * This code is released under the GNU General Public License version 2 or - * later. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * Some notes on x86 processor bugs affecting SMP operation: - * - * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. - * The Linux implications for SMP are handled as follows: - * - * Pentium III / [Xeon] - * None of the E1AP-E3AP errata are visible to the user. - * - * E1AP. see PII A1AP - * E2AP. see PII A2AP - * E3AP. see PII A3AP - * - * Pentium II / [Xeon] - * None of the A1AP-A3AP errata are visible to the user. - * - * A1AP. see PPro 1AP - * A2AP. see PPro 2AP - * A3AP. see PPro 7AP - * - * Pentium Pro - * None of 1AP-9AP errata are visible to the normal user, - * except occasional delivery of 'spurious interrupt' as trap #15. - * This is very rare and a non-problem. - * - * 1AP. Linux maps APIC as non-cacheable - * 2AP. worked around in hardware - * 3AP. fixed in C0 and above steppings microcode update. - * Linux does not use excessive STARTUP_IPIs. - * 4AP. worked around in hardware - * 5AP. symmetric IO mode (normal Linux operation) not affected. - * 'noapic' mode has vector 0xf filled out properly. - * 6AP. 'noapic' mode might be affected - fixed in later steppings - * 7AP. We do not assume writes to the LVT deassering IRQs - * 8AP. We do not enable low power mode (deep sleep) during MP bootup - * 9AP. We do not use mixed mode - * - * Pentium - * There is a marginal case where REP MOVS on 100MHz SMP - * machines with B stepping processors can fail. XXX should provide - * an L1cache=Writethrough or L1cache=off option. - * - * B stepping CPUs may hang. There are hardware work arounds - * for this. We warn about it in case your board doesn't have the work - * arounds. Basically that's so I can tell anyone with a B stepping - * CPU and SMP problems "tough". - * - * Specific items [From Pentium Processor Specification Update] - * - * 1AP. Linux doesn't use remote read - * 2AP. Linux doesn't trust APIC errors - * 3AP. We work around this - * 4AP. Linux never generated 3 interrupts of the same priority - * to cause a lost local interrupt. - * 5AP. Remote read is never used - * 6AP. not affected - worked around in hardware - * 7AP. not affected - worked around in hardware - * 8AP. worked around in hardware - we get explicit CS errors if not - * 9AP. only 'noapic' mode affected. Might generate spurious - * interrupts, we log only the first one and count the - * rest silently. - * 10AP. not affected - worked around in hardware - * 11AP. Linux reads the APIC between writes to avoid this, as per - * the documentation. Make sure you preserve this as it affects - * the C stepping chips too. - * 12AP. not affected - worked around in hardware - * 13AP. not affected - worked around in hardware - * 14AP. we always deassert INIT during bootup - * 15AP. not affected - worked around in hardware - * 16AP. not affected - worked around in hardware - * 17AP. not affected - worked around in hardware - * 18AP. not affected - worked around in hardware - * 19AP. not affected - worked around in BIOS - * - * If this sounds worrying believe me these bugs are either ___RARE___, - * or are signal timing bugs worked around in hardware and there's - * about nothing of note with C stepping upwards. - */ diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c deleted file mode 100644 index 26448fff0abd..000000000000 --- a/arch/x86/kernel/smp_64.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Intel SMP support routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar - * (c) 2002,2003 Andi Kleen, SuSE Labs. - * - * This code is released under the GNU General Public License version 2 or - * later. - */ -- cgit v1.2.3 From 5382e89670399f9db8a58b3c6f850fa4a94f6cca Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:57 -0300 Subject: x86: adjust types in smpcommon_32.c so they can have the same type as x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 579b9b740c7c..5a446f079b33 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -67,7 +67,7 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; +DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); @@ -92,10 +92,10 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); /* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = +u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_cpu_to_apicid_early_ptr; -DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; -- cgit v1.2.3 From a355352b97901d987f54ea7c7d7161eb51a3799c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:58 -0300 Subject: x86: move equal types to common file move definitions that are now equal in type from smpboot_{32,64}.c to smpboot.c cpu_callin_map is put temporarily in smp_64.h (already exists in smp_32.h), and will soon be merged. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 28 ++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 27 --------------------------- arch/x86/kernel/smpboot_64.c | 33 --------------------------------- 3 files changed, 28 insertions(+), 60 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bffe10861390..40a3b56952ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,6 +1,34 @@ #include #include +#include +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); + +/* Last level cache ID of each logical CPU */ +DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; + +/* bitmap of online cpus */ +cpumask_t cpu_online_map __read_mostly; +EXPORT_SYMBOL(cpu_online_map); + +cpumask_t cpu_callin_map; +cpumask_t cpu_callout_map; +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + +/* representing HT siblings of each logical CPU */ +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); + +/* representing HT and core siblings of each logical CPU */ +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); + +/* Per CPU bogomips and other parameters */ +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); #ifdef CONFIG_HOTPLUG_CPU int additional_cpus __initdata = -1; diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5a446f079b33..0fbc98163b4e 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -62,35 +62,8 @@ /* Set if we find a B stepping CPU */ static int __cpuinitdata smp_b_stepping; -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; - -/* representing HT siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); -EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); - -/* representing HT and core siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_t, cpu_core_map); -EXPORT_PER_CPU_SYMBOL(cpu_core_map); - -/* bitmap of online cpus */ -cpumask_t cpu_online_map __read_mostly; -EXPORT_SYMBOL(cpu_online_map); - -cpumask_t cpu_callin_map; -cpumask_t cpu_callout_map; -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; -/* Per CPU bogomips and other parameters */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - /* which logical CPU number maps to which CPU (physical APIC ID) */ u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 953b0ff72b65..c51279f05316 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -60,42 +60,9 @@ #include #include -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; - -/* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map __read_mostly; - -EXPORT_SYMBOL(cpu_online_map); - -/* - * Private maps to synchronize booting between AP and BP. - * Probably not needed anymore, but it makes for easier debugging. -AK - */ -cpumask_t cpu_callin_map; -cpumask_t cpu_callout_map; -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); - -/* Per CPU bogomips and other parameters */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - /* Set when the idlers are all forked */ int smp_threads_ready; -/* representing HT siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); -EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); - -/* representing HT and core siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_t, cpu_core_map); -EXPORT_PER_CPU_SYMBOL(cpu_core_map); - /* * Trampoline 80x86 program as an array. */ -- cgit v1.2.3 From 1452207689b3c0dd2ffed40735289a3a4a8c0c7c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:12:59 -0300 Subject: x86: make set_cpu_sibling_map nonstatic And move its extern definition to smp.h, the common header Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index c51279f05316..1e8f00a1d624 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -228,7 +228,7 @@ cpumask_t cpu_coregroup_map(int cpu) /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -static inline void set_cpu_sibling_map(int cpu) +void __cpuinit set_cpu_sibling_map(int cpu) { int i; struct cpuinfo_x86 *c = &cpu_data(cpu); -- cgit v1.2.3 From 61d5989973cc52b0ef0f781e870dfe5da6d5023e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:00 -0300 Subject: x86: make remove_siblinginfo non-static this is done to match i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 1e8f00a1d624..20f1c7df86a3 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -918,7 +918,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_HOTPLUG_CPU -static void remove_siblinginfo(int cpu) +void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); -- cgit v1.2.3 From 768d95051bdaf60b4eb89b42c133b14627f478f2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:02 -0300 Subject: x86: move sibling functions to common file set_cpu_sibling_map() and remove_sibling_info() are equal between architectures, and are now moved to common file Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 88 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 88 ------------------------------------------- arch/x86/kernel/smpboot_64.c | 89 -------------------------------------------- 3 files changed, 88 insertions(+), 177 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 40a3b56952ef..d774520a6b48 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -29,7 +29,95 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); + +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + +void __cpuinit set_cpu_sibling_map(int cpu) +{ + int i; + struct cpuinfo_x86 *c = &cpu_data(cpu); + + cpu_set(cpu, cpu_sibling_setup_map); + + if (smp_num_siblings > 1) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); + } + } + } else { + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); + } + + cpu_set(cpu, c->llc_shared_map); + + if (current_cpu_data.x86_max_cores == 1) { + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); + c->booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); + } + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) + c->booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; + } + } +} + #ifdef CONFIG_HOTPLUG_CPU +void remove_siblinginfo(int cpu) +{ + int sibling; + struct cpuinfo_x86 *c = &cpu_data(cpu); + + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); + /*/ + * last thread sibling in this cpu core going down + */ + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) + cpu_data(sibling).booted_cores--; + } + + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); + c->phys_proc_id = 0; + c->cpu_core_id = 0; + cpu_clear(cpu, cpu_sibling_setup_map); +} int additional_cpus __initdata = -1; diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 0fbc98163b4e..322f46674d42 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -274,71 +274,6 @@ cpumask_t cpu_coregroup_map(int cpu) return c->llc_shared_map; } -/* representing cpus for which sibling maps can be computed */ -static cpumask_t cpu_sibling_setup_map; - -void __cpuinit set_cpu_sibling_map(int cpu) -{ - int i; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - cpu_set(cpu, cpu_sibling_setup_map); - - if (smp_num_siblings > 1) { - for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c->phys_proc_id == cpu_data(i).phys_proc_id && - c->cpu_core_id == cpu_data(i).cpu_core_id) { - cpu_set(i, per_cpu(cpu_sibling_map, cpu)); - cpu_set(cpu, per_cpu(cpu_sibling_map, i)); - cpu_set(i, per_cpu(cpu_core_map, cpu)); - cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, c->llc_shared_map); - cpu_set(cpu, cpu_data(i).llc_shared_map); - } - } - } else { - cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); - } - - cpu_set(cpu, c->llc_shared_map); - - if (current_cpu_data.x86_max_cores == 1) { - per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); - c->booted_cores = 1; - return; - } - - for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && - per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpu_set(i, c->llc_shared_map); - cpu_set(cpu, cpu_data(i).llc_shared_map); - } - if (c->phys_proc_id == cpu_data(i).phys_proc_id) { - cpu_set(i, per_cpu(cpu_core_map, cpu)); - cpu_set(cpu, per_cpu(cpu_core_map, i)); - /* - * Does this new cpu bringup a new core? - */ - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { - /* - * for each core in package, increment - * the booted_cores for this new cpu - */ - if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) - c->booted_cores++; - /* - * increment the core count for all - * the other cpus in this package - */ - if (i != cpu) - cpu_data(i).booted_cores++; - } else if (i != cpu && !c->booted_cores) - c->booted_cores = cpu_data(i).booted_cores; - } - } -} - /* * Activate a secondary processor. */ @@ -1120,29 +1055,6 @@ void __init native_smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU -void remove_siblinginfo(int cpu) -{ - int sibling; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { - cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); - /*/ - * last thread sibling in this cpu core going down - */ - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - cpu_data(sibling).booted_cores--; - } - - for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) - cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - cpus_clear(per_cpu(cpu_core_map, cpu)); - c->phys_proc_id = 0; - c->cpu_core_id = 0; - cpu_clear(cpu, cpu_sibling_setup_map); -} - int __cpu_disable(void) { cpumask_t map = cpu_online_map; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 20f1c7df86a3..329f9c53a335 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -225,71 +225,6 @@ cpumask_t cpu_coregroup_map(int cpu) return c->llc_shared_map; } -/* representing cpus for which sibling maps can be computed */ -static cpumask_t cpu_sibling_setup_map; - -void __cpuinit set_cpu_sibling_map(int cpu) -{ - int i; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - cpu_set(cpu, cpu_sibling_setup_map); - - if (smp_num_siblings > 1) { - for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c->phys_proc_id == cpu_data(i).phys_proc_id && - c->cpu_core_id == cpu_data(i).cpu_core_id) { - cpu_set(i, per_cpu(cpu_sibling_map, cpu)); - cpu_set(cpu, per_cpu(cpu_sibling_map, i)); - cpu_set(i, per_cpu(cpu_core_map, cpu)); - cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, c->llc_shared_map); - cpu_set(cpu, cpu_data(i).llc_shared_map); - } - } - } else { - cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); - } - - cpu_set(cpu, c->llc_shared_map); - - if (current_cpu_data.x86_max_cores == 1) { - per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); - c->booted_cores = 1; - return; - } - - for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && - per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpu_set(i, c->llc_shared_map); - cpu_set(cpu, cpu_data(i).llc_shared_map); - } - if (c->phys_proc_id == cpu_data(i).phys_proc_id) { - cpu_set(i, per_cpu(cpu_core_map, cpu)); - cpu_set(cpu, per_cpu(cpu_core_map, i)); - /* - * Does this new cpu bringup a new core? - */ - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { - /* - * for each core in package, increment - * the booted_cores for this new cpu - */ - if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) - c->booted_cores++; - /* - * increment the core count for all - * the other cpus in this package - */ - if (i != cpu) - cpu_data(i).booted_cores++; - } else if (i != cpu && !c->booted_cores) - c->booted_cores = cpu_data(i).booted_cores; - } - } -} - /* * Setup code on secondary processor (after comming out of the trampoline) */ @@ -917,30 +852,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) } #ifdef CONFIG_HOTPLUG_CPU - -void remove_siblinginfo(int cpu) -{ - int sibling; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { - cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); - /* - * last thread sibling in this cpu core going down - */ - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - cpu_data(sibling).booted_cores--; - } - - for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) - cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - cpus_clear(per_cpu(cpu_core_map, cpu)); - c->phys_proc_id = 0; - c->cpu_core_id = 0; - cpu_clear(cpu, cpu_sibling_setup_map); -} - static void __ref remove_cpu_from_maps(void) { int cpu = smp_processor_id(); -- cgit v1.2.3 From 70708a18e834fd709a4f497bb419ec84d1eb3511 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:03 -0300 Subject: x86: move cpu_coregroup_map to common file it is equal between architectures Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 16 ++++++++++++++++ arch/x86/kernel/smpboot_32.c | 14 -------------- arch/x86/kernel/smpboot_64.c | 14 -------------- 3 files changed, 16 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d774520a6b48..644e60969f90 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,6 +1,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -95,6 +96,21 @@ void __cpuinit set_cpu_sibling_map(int cpu) } } +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + /* + * For perf, we return last level cache shared map. + * And for power savings, we return cpu_core_map + */ + if (sched_mc_power_savings || sched_smt_power_savings) + return per_cpu(cpu_core_map, cpu); + else + return c->llc_shared_map; +} + + #ifdef CONFIG_HOTPLUG_CPU void remove_siblinginfo(int cpu) { diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 322f46674d42..a58ca7f18013 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -260,20 +260,6 @@ static void __cpuinit smp_callin(void) static int cpucount; -/* maps the cpu to the sched domain representing multi-core */ -cpumask_t cpu_coregroup_map(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if (sched_mc_power_savings || sched_smt_power_savings) - return per_cpu(cpu_core_map, cpu); - else - return c->llc_shared_map; -} - /* * Activate a secondary processor. */ diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 329f9c53a335..1a592400d94f 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -211,20 +211,6 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } -/* maps the cpu to the sched domain representing multi-core */ -cpumask_t cpu_coregroup_map(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if (sched_mc_power_savings || sched_smt_power_savings) - return per_cpu(cpu_core_map, cpu); - else - return c->llc_shared_map; -} - /* * Setup code on secondary processor (after comming out of the trampoline) */ -- cgit v1.2.3 From fc25da9ec6c910976b76c70f7604a838679f75b2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:04 -0300 Subject: x86: remove vector_lock around cpu_online_map This lock does not protect cpu_online_map, so its length can be shortened, and in some cases, removed. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 1a592400d94f..ca3a3c5b64fe 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -261,9 +261,9 @@ void __cpuinit start_secondary(void) /* * Allow the master to continue. */ + spin_unlock(&vector_lock); cpu_set(smp_processor_id(), cpu_online_map); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; - spin_unlock(&vector_lock); unlock_ipi_call_lock(); @@ -879,10 +879,8 @@ int __cpu_disable(void) local_irq_disable(); remove_siblinginfo(cpu); - spin_lock(&vector_lock); /* It's now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); - spin_unlock(&vector_lock); remove_cpu_from_maps(); fixup_irqs(cpu_online_map); return 0; -- cgit v1.2.3 From 045f9d22029e94d6609d46f8ee07c63f4693dfb3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:05 -0300 Subject: x86: use remove_from_maps in cpu_disable it is already used in x86_64. In i386, it only removes from cpu_online_map Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 7 ++++++- arch/x86/kernel/smpboot_64.c | 8 +++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index a58ca7f18013..4939b3a01b24 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -1041,6 +1041,11 @@ void __init native_smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU +static void __ref remove_cpu_from_maps(int cpu) +{ + cpu_clear(cpu, cpu_online_map); +} + int __cpu_disable(void) { cpumask_t map = cpu_online_map; @@ -1066,7 +1071,7 @@ int __cpu_disable(void) remove_siblinginfo(cpu); - cpu_clear(cpu, map); + remove_cpu_from_maps(cpu); fixup_irqs(map); /* It's now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index ca3a3c5b64fe..6509d3c1b3df 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -838,10 +838,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus) } #ifdef CONFIG_HOTPLUG_CPU -static void __ref remove_cpu_from_maps(void) +static void __ref remove_cpu_from_maps(int cpu) { - int cpu = smp_processor_id(); - + cpu_clear(cpu, cpu_online_map); cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ @@ -880,8 +879,7 @@ int __cpu_disable(void) remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ - cpu_clear(cpu, cpu_online_map); - remove_cpu_from_maps(); + remove_cpu_from_maps(cpu); fixup_irqs(cpu_online_map); return 0; } -- cgit v1.2.3 From e9a6cb96fafa4d4df2033ab6cf9c817f6f47e052 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:06 -0300 Subject: x86: do not clear cpu_online_map it was already cleared two lines above, and so, this removal is bogus Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 4939b3a01b24..3d21c663aa76 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -1073,8 +1073,7 @@ int __cpu_disable(void) remove_cpu_from_maps(cpu); fixup_irqs(map); - /* It's now safe to remove this processor from the online map */ - cpu_clear(cpu, cpu_online_map); + return 0; } -- cgit v1.2.3 From 69c18c15d39c4622c6e2f97e5db4d8c9c43adaaa Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:07 -0300 Subject: x86: merge __cpu_disable and cpu_die They are now equal, and are moved to a common file Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 67 ---------------------------------- arch/x86/kernel/smpboot_64.c | 79 ---------------------------------------- 3 files changed, 85 insertions(+), 146 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 644e60969f90..c35cd319d1ed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2,6 +2,13 @@ #include #include #include +#include + +#include +#include +#include +#include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -181,5 +188,83 @@ __init void prefill_possible_map(void) for (i = 0; i < possible; i++) cpu_set(i, cpu_possible_map); } + +static void __ref remove_cpu_from_maps(int cpu) +{ + cpu_clear(cpu, cpu_online_map); +#ifdef CONFIG_X86_64 + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + /* was set by cpu_init() */ + clear_bit(cpu, (unsigned long *)&cpu_initialized); + clear_node_cpumask(cpu); +#endif +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); + clear_local_APIC(); + + /* + * HACK: + * Allow any queued timer interrupts to get serviced + * This is only a temporary solution until we cleanup + * fixup_irqs as we do for IA64. + */ + local_irq_enable(); + mdelay(1); + + local_irq_disable(); + remove_siblinginfo(cpu); + + /* It's now safe to remove this processor from the online map */ + remove_cpu_from_maps(cpu); + fixup_irqs(cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk(KERN_INFO "CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); + return; + } + msleep(100); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} #endif diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 3d21c663aa76..00b1b59cd560 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -1040,73 +1040,6 @@ void __init native_smp_prepare_boot_cpu(void) __get_cpu_var(cpu_state) = CPU_ONLINE; } -#ifdef CONFIG_HOTPLUG_CPU -static void __ref remove_cpu_from_maps(int cpu) -{ - cpu_clear(cpu, cpu_online_map); -} - -int __cpu_disable(void) -{ - cpumask_t map = cpu_online_map; - int cpu = smp_processor_id(); - - /* - * Perhaps use cpufreq to drop frequency, but that could go - * into generic code. - * - * We won't take down the boot processor on i386 due to some - * interrupts only being able to be serviced by the BSP. - * Especially so if we're not using an IOAPIC -zwane - */ - if (cpu == 0) - return -EBUSY; - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); - clear_local_APIC(); - /* Allow any queued timer interrupts to get serviced */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); - - remove_siblinginfo(cpu); - - remove_cpu_from_maps(cpu); - fixup_irqs(map); - - return 0; -} - -void __cpu_die(unsigned int cpu) -{ - /* We don't do anything here: idle task is faking death itself. */ - unsigned int i; - - for (i = 0; i < 10; i++) { - /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - printk ("CPU %d is now offline\n", cpu); - if (1 == num_online_cpus()) - alternatives_smp_switch(0); - return; - } - msleep(100); - } - printk(KERN_ERR "CPU %u didn't die...\n", cpu); -} -#else /* ... !CONFIG_HOTPLUG_CPU */ -int __cpu_disable(void) -{ - return -ENOSYS; -} - -void __cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - int __cpuinit native_cpu_up(unsigned int cpu) { unsigned long flags; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 6509d3c1b3df..0c67e5ae9c9d 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -836,82 +836,3 @@ void __init native_smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); check_nmi_watchdog(); } - -#ifdef CONFIG_HOTPLUG_CPU -static void __ref remove_cpu_from_maps(int cpu) -{ - cpu_clear(cpu, cpu_online_map); - cpu_clear(cpu, cpu_callout_map); - cpu_clear(cpu, cpu_callin_map); - clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ - clear_node_cpumask(cpu); -} - -int __cpu_disable(void) -{ - int cpu = smp_processor_id(); - - /* - * Perhaps use cpufreq to drop frequency, but that could go - * into generic code. - * - * We won't take down the boot processor on i386 due to some - * interrupts only being able to be serviced by the BSP. - * Especially so if we're not using an IOAPIC -zwane - */ - if (cpu == 0) - return -EBUSY; - - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); - clear_local_APIC(); - - /* - * HACK: - * Allow any queued timer interrupts to get serviced - * This is only a temporary solution until we cleanup - * fixup_irqs as we do for IA64. - */ - local_irq_enable(); - mdelay(1); - - local_irq_disable(); - remove_siblinginfo(cpu); - - /* It's now safe to remove this processor from the online map */ - remove_cpu_from_maps(cpu); - fixup_irqs(cpu_online_map); - return 0; -} - -void __cpu_die(unsigned int cpu) -{ - /* We don't do anything here: idle task is faking death itself. */ - unsigned int i; - - for (i = 0; i < 10; i++) { - /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - printk ("CPU %d is now offline\n", cpu); - if (1 == num_online_cpus()) - alternatives_smp_switch(0); - return; - } - msleep(100); - } - printk(KERN_ERR "CPU %u didn't die...\n", cpu); -} - -#else /* ... !CONFIG_HOTPLUG_CPU */ - -int __cpu_disable(void) -{ - return -ENOSYS; -} - -void __cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 89b08200ad8bc8fb860da218c4f3bcc292bf286c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:08 -0300 Subject: x86: make x86_64 accept the max_cpus parameter The parameter passing parsing is done in the common smpboot.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 7 +++++++ arch/x86/kernel/smpboot.c | 12 ++++++++++++ arch/x86/kernel/smpboot_32.c | 13 ------------- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 72ab1403fed7..2a1f7881c75b 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -32,6 +32,7 @@ /* Have we found an MP table */ int smp_found_config; +unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -115,6 +116,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) return; } + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c35cd319d1ed..34c31178041b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -268,3 +268,15 @@ void __cpu_die(unsigned int cpu) } #endif +/* + * If the BIOS enumerates physical processors before logical, + * maxcpus=N at enumeration-time can be used to disable HT. + */ +static int __init parse_maxcpus(char *arg) +{ + extern unsigned int maxcpus; + + maxcpus = simple_strtoul(arg, NULL, 0); + return 0; +} +early_param("maxcpus", parse_maxcpus); diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 00b1b59cd560..3236e843a9ad 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -1113,16 +1113,3 @@ void __init smp_intr_init(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } - -/* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. - */ -static int __init parse_maxcpus(char *arg) -{ - extern unsigned int maxcpus; - - maxcpus = simple_strtoul(arg, NULL, 0); - return 0; -} -early_param("maxcpus", parse_maxcpus); -- cgit v1.2.3 From 420688293927a590d092ec76ef97c2565ae21aff Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:09 -0300 Subject: x86: move trampoline arrays extern definition to smp.h In here, they can serve both architectures Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 6 ------ arch/x86/kernel/smpboot_64.c | 7 ------- 2 files changed, 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 3236e843a9ad..a21f25418b3e 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -73,12 +73,6 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; -/* - * Trampoline 80x86 program as an array. - */ - -extern const unsigned char trampoline_data []; -extern const unsigned char trampoline_end []; static unsigned char *trampoline_base; static void map_cpu_to_logical_apicid(void); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 0c67e5ae9c9d..2cc1b8b0601c 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -63,13 +63,6 @@ /* Set when the idlers are all forked */ int smp_threads_ready; -/* - * Trampoline 80x86 program as an array. - */ - -extern const unsigned char trampoline_data[]; -extern const unsigned char trampoline_end[]; - /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -- cgit v1.2.3 From 91718e8d13c23bfe0aa6fa6b730c5c33ee9771bf Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:12 -0300 Subject: x86: unify setup_trampoline setup_trampoline() looks very similar between architectures, and this patch unifies them. The i386 version allocates bootmem memory, while the x86_64 version uses a fixed address. In this patch, we initialize the global trampoline_base to the x86_64 version, and i386 allocation can later override it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 33 +++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 29 ----------------------------- arch/x86/kernel/smpboot_64.c | 14 -------------- 3 files changed, 33 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 34c31178041b..b13b9d55f9ce 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -38,6 +39,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -117,6 +121,35 @@ cpumask_t cpu_coregroup_map(int cpu) return c->llc_shared_map; } +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ + +unsigned long __cpuinit setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, + trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} + +#ifdef CONFIG_X86_32 +/* + * We are called very early to get the low memory for the + * SMP bootup trampoline page. + */ +void __init smp_alloc_memory(void) +{ + trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + if (__pa(trampoline_base) >= 0x9F000) + BUG(); +} +#endif #ifdef CONFIG_HOTPLUG_CPU void remove_siblinginfo(int cpu) diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index a21f25418b3e..ee826594aa03 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -73,40 +73,11 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; -static unsigned char *trampoline_base; - static void map_cpu_to_logical_apicid(void); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __cpuinit setup_trampoline(void) -{ - memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(trampoline_base); -} - -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 2cc1b8b0601c..9f4935e70e72 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -85,20 +85,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) #endif - -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __cpuinit setup_trampoline(void) -{ - void *tramp = __va(SMP_TRAMPOLINE_BASE); - memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(tramp); -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU -- cgit v1.2.3 From e90009bcc1137c51d677262417f16c00ad2ce9a9 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:13 -0300 Subject: x86: use wait_for_init_deassert in x86_64 wraps the busy loop for wait_for_init_deasserted() in a function, so smp_callin in x86_64 looks like more i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 9f4935e70e72..4f6d9768648f 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -100,6 +100,13 @@ static void __cpuinit smp_store_cpu_info(int id) print_cpu_info(c); } +static inline void wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + static atomic_t init_deasserted __cpuinitdata; /* @@ -117,8 +124,7 @@ void __cpuinit smp_callin(void) * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. */ - while (!atomic_read(&init_deasserted)) - cpu_relax(); + wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) -- cgit v1.2.3 From e104383fbf26570968cbf060955f67cd5378300a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 3 Mar 2008 14:13:14 -0300 Subject: x86: use cpu_relax instead of rep_nop This is done for smpboot_32.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index ee826594aa03..2dd95bae2b96 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -185,7 +185,7 @@ static void __cpuinit smp_callin(void) */ if (cpu_isset(cpuid, cpu_callout_map)) break; - rep_nop(); + cpu_relax(); } if (!time_before(jiffies, timeout)) { @@ -242,7 +242,7 @@ static void __cpuinit start_secondary(void *unused) preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) - rep_nop(); + cpu_relax(); /* * Check TSC synchronization with the BP: */ -- cgit v1.2.3 From f668964ea1485c64cc9ab0721679fe9cd90cc406 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Mar 2008 15:37:32 +0100 Subject: x86: clean up i387.c minor coding style cleanups. Before: total: 0 errors, 3 warnings, 479 lines checked After: total: 0 errors, 1 warnings, 483 lines checked No code changed: arch/x86/kernel/i387.o: text data bss dec hex filename 2379 4 8 2391 957 i387.o.before 2379 4 8 2391 957 i387.o.after md5: e1434553a3b4ff1f52ad97a68b1fad8a i387.o.before.asm e1434553a3b4ff1f52ad97a68b1fad8a i387.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 88 ++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d2e39e69aaf8..8f8102d967b3 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -5,45 +5,41 @@ * General FPU state handling cleanups * Gareth Hughes , May 2000 */ - -#include #include #include +#include + +#include #include -#include #include -#include -#include -#include #include +#include +#include +#include #ifdef CONFIG_X86_64 - -#include -#include - +# include +# include #else - -#define save_i387_ia32 save_i387 -#define restore_i387_ia32 restore_i387 - -#define _fpstate_ia32 _fpstate -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - +# define save_i387_ia32 save_i387 +# define restore_i387_ia32 restore_i387 +# define _fpstate_ia32 _fpstate +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct #endif #ifdef CONFIG_MATH_EMULATION -#define HAVE_HWFP (boot_cpu_data.hard_math) +# define HAVE_HWFP (boot_cpu_data.hard_math) #else -#define HAVE_HWFP 1 +# define HAVE_HWFP 1 #endif -static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; void mxcsr_feature_mask_init(void) { unsigned long mask = 0; + clts(); if (cpu_has_fxsr) { memset(¤t->thread.i387.fxsave, 0, @@ -69,10 +65,11 @@ void __cpuinit fpu_init(void) if (offsetof(struct task_struct, thread.i387.fxsave) & 15) __bad_fxsave_alignment(); + set_in_cr4(X86_CR4_OSFXSR); set_in_cr4(X86_CR4_OSXMMEXCPT); - write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */ + write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ mxcsr_feature_mask_init(); /* clean state in init */ @@ -178,6 +175,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; } @@ -232,8 +230,8 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) * FXSR floating point environment conversions. */ -static void convert_from_fxsr(struct user_i387_ia32_struct *env, - struct task_struct *tsk) +static void +convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; @@ -252,10 +250,11 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env, * should be actually ds/cs at fpu exception time, but * that information is not available in 64bit mode. */ - asm("mov %%ds,%0" : "=r" (env->fos)); - asm("mov %%cs,%0" : "=r" (env->fcs)); + asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos)); + asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs)); } else { struct pt_regs *regs = task_pt_regs(tsk); + env->fos = 0xffff0000 | tsk->thread.ds; env->fcs = regs->cs; } @@ -309,9 +308,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, init_fpu(target); - if (!cpu_has_fxsr) + if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.i387.fsave, 0, -1); + } if (kbuf && pos == 0 && count == sizeof(env)) { convert_from_fxsr(kbuf, target); @@ -319,6 +319,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, } convert_from_fxsr(&env, target); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); } @@ -335,9 +336,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, init_fpu(target); set_stopped_child_used_math(target); - if (!cpu_has_fxsr) + if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.i387.fsave, 0, -1); + } if (pos > 0 || count < sizeof(env)) convert_from_fxsr(&env, target); @@ -392,28 +394,28 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) { if (!used_math()) return 0; - - /* This will cause a "finit" to be triggered by the next + /* + * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. */ clear_used_math(); - if (HAVE_HWFP) { - if (cpu_has_fxsr) { - return save_i387_fxsave(buf); - } else { - return save_i387_fsave(buf); - } - } else { + if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, buf) ? -1 : 1; } + + if (cpu_has_fxsr) + return save_i387_fxsave(buf); + else + return save_i387_fsave(buf); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + clear_fpu(tsk); return __copy_from_user(&tsk->thread.i387.fsave, buf, sizeof(struct i387_fsave_struct)); @@ -421,9 +423,10 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) { - int err; struct task_struct *tsk = current; struct user_i387_ia32_struct env; + int err; + clear_fpu(tsk); err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct)); @@ -432,6 +435,7 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err || __copy_from_user(&env, buf, sizeof(env))) return 1; convert_to_fxsr(tsk, &env); + return 0; } @@ -440,17 +444,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) int err; if (HAVE_HWFP) { - if (cpu_has_fxsr) { + if (cpu_has_fxsr) err = restore_i387_fxsave(buf); - } else { + else err = restore_i387_fsave(buf); - } } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, buf) != 0; } set_used_math(); + return err; } @@ -463,8 +467,8 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) */ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) { - int fpvalid; struct task_struct *tsk = current; + int fpvalid; fpvalid = !!used_math(); if (fpvalid) -- cgit v1.2.3 From c1db29dbc761e9a464b417df7d4dbbae7df81f4c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 4 Mar 2008 16:47:00 -0800 Subject: x86: arch/x86/kernel/cpu/feature_names.c - use angle brackets for include Signed-off-by: Joe Perches Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index ee975ac6bbcb..e43ad4ad4cba 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -4,7 +4,7 @@ * This file must not contain any executable code. */ -#include "asm/cpufeature.h" +#include /* * These flag bits must match the definitions in . -- cgit v1.2.3 From a7113170214b569d24e413326a56c4cc5cc1a152 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2008 10:24:04 +0100 Subject: x86: remove DEBUG_SIG Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 12 ------------ arch/x86/kernel/signal_64.c | 25 ------------------------- 2 files changed, 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index add9c6e9c44d..f4ec6a092951 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -26,8 +26,6 @@ #include #include "sigframe.h" -#define DEBUG_SIG 0 - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ @@ -412,11 +410,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif - return 0; give_sigsegv: @@ -505,11 +498,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif - return 0; give_sigsegv: diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 043294582f41..827179c5b32a 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -28,8 +28,6 @@ #include #include "sigframe.h" -#define DEBUG_SIG 0 - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ @@ -142,10 +140,6 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; -#if DEBUG_SIG - printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax); -#endif - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) goto badframe; @@ -274,10 +268,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (err) goto give_sigsegv; -#if DEBUG_SIG - printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax); -#endif - /* Set up registers for signal handler */ regs->di = sig; /* In case the signal handler was declared without prototypes */ @@ -302,10 +292,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif return 0; @@ -353,12 +339,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, { int ret; -#if DEBUG_SIG - printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n", - current->pid, sig, - regs->ip, regs->sp, regs); -#endif - /* Are we from a system call? */ if (current_syscall(regs) >= 0) { /* If so, check system call restarting.. */ @@ -491,11 +471,6 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#if DEBUG_SIG - printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n", - thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current)); -#endif - /* Pending single-step? */ if (thread_info_flags & _TIF_SINGLESTEP) { regs->flags |= X86_EFLAGS_TF; -- cgit v1.2.3 From 7e907f48980d6668f99206ba0dded40dca2d086f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2008 10:33:08 +0100 Subject: x86: clean up arch/x86/kernel/signal_32.c Before: total: 21 errors, 6 warnings, 665 lines checked After: total: 0 errors, 3 warnings, 685 lines checked No code changed: arch/x86/kernel/signal_32.o: text data bss dec hex filename 5333 0 4 5337 14d9 signal_32.o.before 5333 0 4 5337 14d9 signal_32.o.after md5: c279e98012a2808e90cfa2a7787e42a4 signal_32.o.before.asm c279e98012a2808e90cfa2a7787e42a4 signal_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 180 +++++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 79 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index f4ec6a092951..3da3ffa39e9a 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -4,26 +4,29 @@ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes */ +#include -#include -#include -#include +#include +#include +#include #include +#include #include +#include +#include #include +#include #include -#include -#include -#include -#include -#include #include -#include +#include +#include + #include #include #include #include #include + #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -55,10 +58,11 @@ sys_sigsuspend(int history0, int history1, old_sigset_t mask) current->state = TASK_INTERRUPTIBLE; schedule(); set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; } -asmlinkage int +asmlinkage int sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { @@ -67,10 +71,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, if (act) { old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); @@ -83,6 +89,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } @@ -90,10 +97,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, return ret; } -asmlinkage int -sys_sigaltstack(unsigned long bx) +asmlinkage int sys_sigaltstack(unsigned long bx) { - /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */ + /* + * This is needed to make gcc realize it doesn't own the + * "struct pt_regs" + */ struct pt_regs *regs = (struct pt_regs *)&bx; const stack_t __user *uss = (const stack_t __user *)bx; stack_t __user *uoss = (stack_t __user *)regs->cx; @@ -129,7 +138,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, #define GET_SEG(seg) \ { unsigned short tmp; \ err |= __get_user(tmp, &sc->seg); \ - loadsegment(seg,tmp); } + loadsegment(seg, tmp); } GET_SEG(gs); COPY_SEG(fs); @@ -139,16 +148,19 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, COPY(dx); COPY(cx); COPY(ip); COPY_SEG_STRICT(cs); COPY_SEG_STRICT(ss); - + { unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->flags = (regs->flags & ~FIX_EFLAGS) | + (tmpflags & FIX_EFLAGS); regs->orig_ax = -1; /* disable syscall checks */ } { - struct _fpstate __user * buf; + struct _fpstate __user *buf; + err |= __get_user(buf, &sc->fpstate); if (buf) { if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) @@ -156,6 +168,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, err |= restore_i387(buf); } else { struct task_struct *me = current; + if (used_math()) { clear_fpu(me); clear_used_math(); @@ -172,15 +185,17 @@ badframe: asmlinkage unsigned long sys_sigreturn(unsigned long __unused) { - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8); - sigset_t set; + struct sigframe __user *frame; + struct pt_regs *regs; unsigned long ax; + sigset_t set; + + regs = (struct pt_regs *) &__unused; + frame = (struct sigframe __user *)(regs->sp - 8); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; - if (__get_user(set.sig[0], &frame->sc.oldmask) - || (_NSIG_WORDS > 1 + if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 && __copy_from_user(&set.sig[1], &frame->extramask, sizeof(frame->extramask)))) goto badframe; @@ -190,7 +205,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (restore_sigcontext(regs, &frame->sc, &ax)) goto badframe; return ax; @@ -207,8 +222,9 @@ badframe: } force_sig(SIGSEGV, current); + return 0; -} +} asmlinkage int sys_rt_sigreturn(unsigned long __unused) { @@ -228,7 +244,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -240,12 +256,11 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) badframe: force_sig(SIGSEGV, current); return 0; -} +} /* * Set up a signal frame. */ - static int setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, struct pt_regs *regs, unsigned long mask) @@ -276,9 +291,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, tmp = save_i387(fpstate); if (tmp < 0) - err = 1; + err = 1; else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); /* non-iBCS2 extensions.. */ err |= __put_user(mask, &sc->oldmask); @@ -291,7 +306,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) { unsigned long sp; @@ -309,32 +324,37 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) if (ka->sa.sa_flags & SA_ONSTACK) { if (sas_ss_flags(sp) == 0) sp = current->sas_ss_sp + current->sas_ss_size; - } - - /* This is the legacy signal stack switching. */ - else if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; + } else { + /* This is the legacy signal stack switching. */ + if ((regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) + sp = (unsigned long) ka->sa.sa_restorer; } sp -= frame_size; - /* Align the stack pointer according to the i386 ABI, - * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + /* + * Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. + */ sp = ((sp + 4) & -16ul) - 4; + return (void __user *) sp; } -/* These symbols are defined with the addresses in the vsyscall page. - See vsyscall-sigreturn.S. */ +/* + * These symbols are defined with the addresses in the vsyscall page. + * See vsyscall-sigreturn.S. + */ extern void __user __kernel_sigreturn; extern void __user __kernel_rt_sigreturn; -static int setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) +static int +setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs) { - void __user *restorer; struct sigframe __user *frame; + void __user *restorer; int err = 0; int usig; @@ -373,9 +393,9 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up to return from userspace. */ err |= __put_user(restorer, &frame->pretcode); - + /* - * This is popl %eax ; movl $,%eax ; int $0x80 + * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80 * * WE DO NOT USE IT ANY MORE! It's only left here for historical * reasons and because gdb uses it as a signature to notice @@ -389,9 +409,9 @@ static int setup_frame(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - regs->ax = (unsigned long) sig; + regs->sp = (unsigned long)frame; + regs->ip = (unsigned long)ka->sa.sa_handler; + regs->ax = (unsigned long)sig; regs->dx = 0; regs->cx = 0; @@ -418,10 +438,10 @@ give_sigsegv: } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) + sigset_t *set, struct pt_regs *regs) { - void __user *restorer; struct rt_sigframe __user *frame; + void __user *restorer; int err = 0; int usig; @@ -451,7 +471,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, - regs, set->sig[0]); + regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; @@ -461,9 +481,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); - + /* - * This is movl $,%ax ; int $0x80 + * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 * * WE DO NOT USE IT ANY MORE! It's only left here for historical * reasons and because gdb uses it as a signature to notice @@ -477,11 +497,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - regs->ax = (unsigned long) usig; - regs->dx = (unsigned long) &frame->info; - regs->cx = (unsigned long) &frame->uc; + regs->sp = (unsigned long)frame; + regs->ip = (unsigned long)ka->sa.sa_handler; + regs->ax = (unsigned long)usig; + regs->dx = (unsigned long)&frame->info; + regs->cx = (unsigned long)&frame->uc; regs->ds = __USER_DS; regs->es = __USER_DS; @@ -506,9 +526,8 @@ give_sigsegv: } /* - * OK, we're invoking a handler - */ - + * OK, we're invoking a handler: + */ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) @@ -551,16 +570,17 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else ret = setup_frame(sig, ka, oldset, regs); - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + if (ret) + return ret; - return ret; + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + return 0; } /* @@ -592,7 +612,8 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* Re-enable any watchpoints before delivering the + /* + * Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered * inside the kernel. @@ -600,16 +621,17 @@ static void do_signal(struct pt_regs *regs) if (current->thread.debugreg7) set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ + /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { - /* a signal was successfully delivered; the saved + /* + * a signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag */ + * clear the TIF_RESTORE_SIGMASK flag + */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); } - return; } @@ -645,8 +667,8 @@ static void do_signal(struct pt_regs *regs) * notification of userspace execution resumption * - triggered by the TIF_WORK_MASK flags */ -void do_notify_resume(struct pt_regs *regs, void *unused, - __u32 thread_info_flags) +void +do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { /* Pending single-step? */ if (thread_info_flags & _TIF_SINGLESTEP) { @@ -660,6 +682,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused, if (thread_info_flags & _TIF_HRTICK_RESCHED) hrtick_resched(); - + clear_thread_flag(TIF_IRET); } -- cgit v1.2.3 From eee6dd15723639f9270e4c561a0c82e8e18bd587 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2008 10:39:07 +0100 Subject: x86: move extern declaration to vdso.h Before: total: 0 errors, 3 warnings, 685 lines checked After: total: 0 errors, 1 warnings, 678 lines checked No code changed: arch/x86/kernel/signal_32.o: text data bss dec hex filename 5333 0 4 5337 14d9 signal_32.o.before 5333 0 4 5337 14d9 signal_32.o.after md5: c279e98012a2808e90cfa2a7787e42a4 signal_32.o.before.asm c279e98012a2808e90cfa2a7787e42a4 signal_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 3da3ffa39e9a..ba168e5743be 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -342,13 +342,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) return (void __user *) sp; } -/* - * These symbols are defined with the addresses in the vsyscall page. - * See vsyscall-sigreturn.S. - */ -extern void __user __kernel_sigreturn; -extern void __user __kernel_rt_sigreturn; - static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) -- cgit v1.2.3 From 97b44ae6cd8117212d41bedc433b5571ee3b79d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2008 10:43:17 +0100 Subject: x86: add KERN_INFO to show_unhandled_signals printout Before: total: 0 errors, 1 warnings, 678 lines checked After: total: 0 errors, 0 warnings, 678 lines checked No code changed: arch/x86/kernel/signal_32.o: text data bss dec hex filename 5333 0 4 5337 14d9 signal_32.o.before 5336 0 4 5340 14dc signal_32.o.after md5: c279e98012a2808e90cfa2a7787e42a4 signal_32.o.before.asm c279e98012a2808e90cfa2a7787e42a4 signal_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index ba168e5743be..aa1b6a0a22e4 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -212,13 +212,13 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) badframe: if (show_unhandled_signals && printk_ratelimit()) { - printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx" - " sp:%lx oeax:%lx", + printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" + "%p ip:%lx sp:%lx oeax:%lx", task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, current->comm, task_pid_nr(current), frame, regs->ip, regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); - printk("\n"); + printk(KERN_CONT "\n"); } force_sig(SIGSEGV, current); -- cgit v1.2.3 From a7062211865efb53cda253d6e33a106f0fe20ebe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 6 Mar 2008 01:11:11 -0800 Subject: x86: fix amd_detect_cmp for system with apicid lifting, boot cpu apicid will be 4 got: CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line) CPU: L2 Cache: 512K (64 bytes/line) CPU 0/4 -> Node 0 CPU: Physical Processor ID: 1 CPU: Processor Core ID: 0 so try to offset apicid back before get phys_proc_id with bits shift. then we can get correct socket ID also remove remove cpu_data(0) reference. because cpu_data(0) only be ready after smp_prepare_cpus with the assignment from boot_cpu_data to current_cpu_data aka cpu_data(0). and check_bugs()==>identify_cpu(&boot_cpu_data) is quite before than smp_prepare_cpus. So just use boot_cpu_id instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index d65b73e63384..f303c70dd688 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -564,7 +564,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); /* Convert the APIC ID into the socket ID */ - c->phys_proc_id = phys_pkg_id(bits); + c->phys_proc_id = (c->apicid - boot_cpu_id) >> bits; #ifdef CONFIG_NUMA node = c->phys_proc_id; @@ -581,7 +581,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); + int ht_nodeid = apicid - boot_cpu_id; if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) -- cgit v1.2.3 From 282bfe21cf0e2af9eac052c89bcc0a5ace80352f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 6 Mar 2008 01:13:34 -0800 Subject: x86: show apicid for cpu in proc Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 15043a335ef1..e8e58c09625d 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -59,6 +59,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + seq_printf(m, "apicid\t\t: %d\n", c->apicid); } #endif } -- cgit v1.2.3 From 77bf90ed66116a1fc0e2f0554ecac75a54290cc0 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 3 Mar 2008 11:37:23 -0800 Subject: x86: replace remaining __FUNCTION__ occurances __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 8 ++++---- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/srat_32.c | 10 +++++----- arch/x86/kernel/summit_32.c | 12 ++++++------ 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5fed98ca0e1f..e2d30b8e08a2 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -205,7 +205,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) struct alt_instr *a; char insnbuf[MAX_PATCH_LEN]; - DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); for (a = start; a < end; a++) { u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); @@ -217,7 +217,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); DPRINTK("%s: vsyscall fixup: %p => %p\n", - __FUNCTION__, a->instr, instr); + __func__, a->instr, instr); } #endif memcpy(insnbuf, a->replacement, a->replacementlen); @@ -307,7 +307,7 @@ void alternatives_smp_module_add(struct module *mod, char *name, smp->text = text; smp->text_end = text_end; DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __FUNCTION__, smp->locks, smp->locks_end, + __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); spin_lock_irqsave(&smp_alt, flags); @@ -332,7 +332,7 @@ void alternatives_smp_module_del(struct module *mod) continue; list_del(&item->next); spin_unlock_irqrestore(&smp_alt, flags); - DPRINTK("%s: %s\n", __FUNCTION__, item->name); + DPRINTK("%s: %s\n", __func__, item->name); kfree(item); return; } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index cef054b09d27..6ea67b76a214 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs) if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __FUNCTION__, irq); + __func__, irq); BUG(); } diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c index b72e61359c36..70e4a374b4e8 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/kernel/srat_32.c @@ -277,14 +277,14 @@ int __init get_memcfg_from_srat(void) rsdp_address = acpi_os_get_root_pointer(); if (!rsdp_address) { printk("%s: System description tables not found\n", - __FUNCTION__); + __func__); goto out_err; } - printk("%s: assigning address to rsdp\n", __FUNCTION__); + printk("%s: assigning address to rsdp\n", __func__); rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address; if (!rsdp) { - printk("%s: Didn't find ACPI root!\n", __FUNCTION__); + printk("%s: Didn't find ACPI root!\n", __func__); goto out_err; } @@ -292,7 +292,7 @@ int __init get_memcfg_from_srat(void) rsdp->oem_id); if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) { - printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__); + printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__); goto out_err; } @@ -302,7 +302,7 @@ int __init get_memcfg_from_srat(void) if (!rsdt) { printk(KERN_WARNING "%s: ACPI: Invalid root system description tables (RSDT)\n", - __FUNCTION__); + __func__); goto out_err; } diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index c7b579db843d..30f04c3e68e4 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -47,7 +47,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) } } if (i == rio_table_hdr->num_rio_dev) { - printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__); + printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); return last_bus; } @@ -58,7 +58,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) } } if (i == rio_table_hdr->num_scal_dev) { - printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__); + printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); return last_bus; } @@ -88,7 +88,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) num_buses = 9; break; default: - printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __FUNCTION__); + printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); return last_bus; } @@ -103,13 +103,13 @@ static int __init build_detail_arrays(void) int i, scal_detail_size, rio_detail_size; if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { - printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); + printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); return 0; } switch (rio_table_hdr->version) { default: - printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version); + printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); return 0; case 2: scal_detail_size = 11; @@ -154,7 +154,7 @@ void __init setup_summit(void) offset = *((unsigned short *)(ptr + offset)); } if (!rio_table_hdr) { - printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__); + printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); return; } -- cgit v1.2.3 From e587cadd8f47e202a30712e2906a65a0606d5865 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 6 Mar 2008 08:48:49 -0500 Subject: x86: enhance DEBUG_RODATA support - alternatives Fix a memcpy that should be a text_poke (in apply_alternatives). Use kernel_wp_save/kernel_wp_restore in text_poke to support DEBUG_RODATA correctly and so the CPU HOTPLUG special case can be removed. Add text_poke_early, for alternatives and paravirt boot-time and module load time patching. Changelog: - Fix text_set and text_poke alignment check (mixed up bitwise and and or) - Remove text_set - Export add_nops, so it can be used by others. - Document text_poke_early. - Remove clflush, since it breaks some VIA architectures and is not strictly necessary. - Add kerneldoc to text_poke and text_poke_early. - Create a second vmap instead of using the WP bit to support Xen and VMI. - Move local_irq disable within text_poke and text_poke_early to be able to be sleepable in these functions. Signed-off-by: Mathieu Desnoyers CC: Andi Kleen CC: pageexec@freemail.hu CC: H. Peter Anvin CC: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 88 ++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e2d30b8e08a2..0c92ad4d257a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #define MAX_PATCH_LEN (255-1) @@ -177,7 +179,7 @@ static const unsigned char*const * find_nop_table(void) #endif /* CONFIG_X86_64 */ /* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void add_nops(void *insns, unsigned int len) +void add_nops(void *insns, unsigned int len) { const unsigned char *const *noptable = find_nop_table(); @@ -190,6 +192,7 @@ static void add_nops(void *insns, unsigned int len) len -= noplen; } } +EXPORT_SYMBOL_GPL(add_nops); extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern u8 *__smp_locks[], *__smp_locks_end[]; @@ -223,7 +226,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) memcpy(insnbuf, a->replacement, a->replacementlen); add_nops(insnbuf + a->replacementlen, a->instrlen - a->replacementlen); - text_poke(instr, insnbuf, a->instrlen); + text_poke_early(instr, insnbuf, a->instrlen); } } @@ -284,7 +287,6 @@ void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end) { struct smp_alt_module *smp; - unsigned long flags; if (noreplace_smp) return; @@ -310,39 +312,37 @@ void alternatives_smp_module_add(struct module *mod, char *name, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - spin_lock_irqsave(&smp_alt, flags); + spin_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(smp->locks, smp->locks_end, smp->text, smp->text_end); - spin_unlock_irqrestore(&smp_alt, flags); + spin_unlock(&smp_alt); } void alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - unsigned long flags; if (smp_alt_once || noreplace_smp) return; - spin_lock_irqsave(&smp_alt, flags); + spin_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - spin_unlock_irqrestore(&smp_alt, flags); + spin_unlock(&smp_alt); DPRINTK("%s: %s\n", __func__, item->name); kfree(item); return; } - spin_unlock_irqrestore(&smp_alt, flags); + spin_unlock(&smp_alt); } void alternatives_smp_switch(int smp) { struct smp_alt_module *mod; - unsigned long flags; #ifdef CONFIG_LOCKDEP /* @@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp) return; BUG_ON(!smp && (num_online_cpus() > 1)); - spin_lock_irqsave(&smp_alt, flags); + spin_lock(&smp_alt); /* * Avoid unnecessary switches because it forces JIT based VMs to @@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp) mod->text, mod->text_end); } smp_mode = smp; - spin_unlock_irqrestore(&smp_alt, flags); + spin_unlock(&smp_alt); } #endif @@ -411,7 +411,7 @@ void apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ add_nops(insnbuf + used, p->len - used); - text_poke(p->instr, insnbuf, p->len); + text_poke_early(p->instr, insnbuf, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], @@ -420,8 +420,6 @@ extern struct paravirt_patch_site __start_parainstructions[], void __init alternative_instructions(void) { - unsigned long flags; - /* The patching is not fully atomic, so try to avoid local interruptions that might execute the to be patched code. Other CPUs are not running. */ @@ -430,7 +428,6 @@ void __init alternative_instructions(void) stop_mce(); #endif - local_irq_save(flags); apply_alternatives(__alt_instructions, __alt_instructions_end); /* switch to patch-once-at-boottime-only mode and free the @@ -462,7 +459,6 @@ void __init alternative_instructions(void) } #endif apply_paravirt(__parainstructions, __parainstructions_end); - local_irq_restore(flags); if (smp_alt_once) free_init_pages("SMP alternatives", @@ -475,18 +471,64 @@ void __init alternative_instructions(void) #endif } -/* - * Warning: +/** + * text_poke_early - Update instructions on a live kernel at boot time + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * * When you use this code to patch more than one byte of an instruction * you need to make sure that other CPUs cannot execute this code in parallel. - * Also no thread must be currently preempted in the middle of these instructions. - * And on the local CPU you need to be protected again NMI or MCE handlers - * seeing an inconsistent instruction while you patch. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. */ -void __kprobes text_poke(void *addr, unsigned char *opcode, int len) +void *text_poke_early(void *addr, const void *opcode, size_t len) { + unsigned long flags; + local_irq_save(flags); memcpy(addr, opcode, len); + local_irq_restore(flags); + sync_core(); + /* Could also do a CLFLUSH here to speed up CPU recovery; but + that causes hangs on some VIA CPUs. */ + return addr; +} + +/** + * text_poke - Update instructions on a live kernel + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * + * Only atomic text poke/set should be allowed when not doing early patching. + * It means the size must be writable atomically and the address must be aligned + * in a way that permits an atomic write. It also makes sure we fit on a single + * page. + */ +void *__kprobes text_poke(void *addr, const void *opcode, size_t len) +{ + unsigned long flags; + char *vaddr; + int nr_pages = 2; + + BUG_ON(len > sizeof(long)); + BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) + - ((long)addr & ~(sizeof(long) - 1))); + { + struct page *pages[2] = { virt_to_page(addr), + virt_to_page(addr + PAGE_SIZE) }; + if (!pages[1]) + nr_pages = 1; + vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + BUG_ON(!vaddr); + local_irq_save(flags); + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); + local_irq_restore(flags); + vunmap(vaddr); + } sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ + return addr; } -- cgit v1.2.3 From 01aaea1afbcdb7c49fe4a567ebe3e295db9f720d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 6 Mar 2008 13:46:39 -0800 Subject: x86: introduce initial apicid store initial_apicid from early identify. it is could be different from phys_proc_id later. also print it out in /proc/cpuinfo. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 13 ++++++------- arch/x86/kernel/cpu/proc.c | 3 +++ arch/x86/kernel/setup_64.c | 11 ++++++----- 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 57a46c36fa23..0dd87b8d6707 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -369,10 +369,12 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; + c->initial_apicid = (ebx >> 24) & 0xFF; #ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->phys_proc_id = c->initial_apicid; #else - c->apicid = (ebx >> 24) & 0xFF; + c->apicid = c->initial_apicid; #endif if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; @@ -395,9 +397,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); } -#ifdef CONFIG_X86_HT - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; -#endif } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -554,7 +553,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); @@ -565,7 +564,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); if (c->x86_max_cores > 1) diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index e8e58c09625d..0978a4a39418 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -19,6 +19,8 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + seq_printf(m, "apicid\t\t: %d\n", c->apicid); + seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); } #endif } @@ -60,6 +62,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); seq_printf(m, "apicid\t\t: %d\n", c->apicid); + seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); } #endif } diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f303c70dd688..13fe525bf065 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -562,9 +562,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); - /* Convert the APIC ID into the socket ID */ - c->phys_proc_id = (c->apicid - boot_cpu_id) >> bits; + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; #ifdef CONFIG_NUMA node = c->phys_proc_id; @@ -581,7 +581,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - boot_cpu_id; + int ht_nodeid = c->initial_apicid; if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) @@ -936,8 +936,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 = 4; } + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; #ifdef CONFIG_SMP - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; + c->phys_proc_id = c->initial_apicid; #endif /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); -- cgit v1.2.3 From 537d916066f66de18dbca79adf82933cd12d2a36 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 7 Mar 2008 19:26:26 +0100 Subject: x86: coding style fixes to arch/x86/kernel/setup_32.c Fix: ERROR: do not initialise externals to 0 or NULL Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index fd639d9f79b6..d4ad6e8ae886 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -228,7 +228,7 @@ static inline void copy_edd(void) } #endif -int __initdata user_defined_memmap = 0; +int __initdata user_defined_memmap; /* * "mem=nopentium" disables the 4MB page tables. -- cgit v1.2.3 From e40c0fe6b0b5dd16aec3c0dad311d36b19d78fd9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 9 Mar 2008 12:35:00 -0700 Subject: x86: cleanup duplicate includes Signed-off-by: Joe Perches arch/x86/kernel/reboot.c | 1 - include/asm-x86/elf.h | 5 ++--- include/asm-x86/posix_types.h | 8 +------- include/asm-x86/processor.h | 3 +-- include/asm-x86/unistd.h | 8 +------- 5 files changed, 5 insertions(+), 20 deletions(-) Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 484c4a80d38a..66cd4afc1e57 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -1,5 +1,4 @@ #include -#include #include #include #include -- cgit v1.2.3 From f694010185c429629ad5a65245da08103e611852 Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Mon, 10 Mar 2008 17:44:03 +0530 Subject: x86: Don't send RESCHEDULE_VECTOR to offlined cpus In the x86 native_smp_send_reschedule_function(), don't send the IPI if the cpu has gone offline already. Warn nevertheless!! Signed-off-by: Gautham R Shenoy Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 88c1e518a203..16c52aaaca35 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,7 +118,10 @@ */ static void native_smp_send_reschedule(int cpu) { - WARN_ON(cpu_is_offline(cpu)); + if (unlikely(cpu_is_offline(cpu))) { + WARN_ON(1); + return; + } send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } -- cgit v1.2.3 From 5b0e508415a83989fe704b4718a1a214bc333ca7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 10 Mar 2008 13:11:17 +0000 Subject: x86: prevent unconditional writes to DebugCtl MSR Otherwise, enabling (or better, subsequent disabling) of single stepping would cause a kernel oops on CPUs not having this MSR. The patch could have been added a conditional to the MSR write in user_disable_single_step(), but centralizing the updates seems safer and (looking forward) better manageable. Signed-off-by: Jan Beulich Cc: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 4 ++-- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/step.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 34a591283f5d..1e3de7db9ad5 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -410,13 +410,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, static void __kprobes clear_btf(void) { if (test_thread_flag(TIF_DEBUGCTLMSR)) - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + update_debugctlmsr(0); } static void __kprobes restore_btf(void) { if (test_thread_flag(TIF_DEBUGCTLMSR)) - wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr); + update_debugctlmsr(current->thread.debugctlmsr); } static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9230ce060d09..ec05fb750dfc 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -564,12 +564,12 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + update_debugctlmsr(0); wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); } if (next->debugctlmsr != debugctl) - wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); + update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { set_debugreg(next->debugreg0, 0); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 1ffce14cff6e..4f40272474dd 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -563,12 +563,12 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + update_debugctlmsr(0); wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); } if (next->debugctlmsr != debugctl) - wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); + update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { loaddebug(next, 0); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 071ff4798236..92c20fee6781 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -148,7 +148,7 @@ static void write_debugctlmsr(struct task_struct *child, unsigned long val) if (child != current) return; - wrmsrl(MSR_IA32_DEBUGCTLMSR, val); + update_debugctlmsr(val); } /* -- cgit v1.2.3 From 6079d2d5d11122eb52721f0f3c828952a490e6c1 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Tue, 11 Mar 2008 19:45:48 +0300 Subject: x86: move quad_local_to_mp_bus_id to numa.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index b2aded3fbfec..2abc9392799a 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -45,7 +45,6 @@ int apic_version [MAX_APICS]; int mp_bus_id_to_type [MAX_MP_BUSSES]; int mp_bus_id_to_node [MAX_MP_BUSSES]; int mp_bus_id_to_local [MAX_MP_BUSSES]; -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; static int mp_current_pci_id; -- cgit v1.2.3 From 8643f9d02a7bb9db74634b4c062d8e70ce7c59b9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:06 -0800 Subject: x86: get boot_cpu_id as early for k8_scan_nodes When acpi=off or there is no SRAT defined, apicid_to_node is got from K8 Northbridge PCI configuration space in k8_scan_nodes() in arch/x86_64/mm/k8toplogy.c. The problem is that it assumes bsp apic id is 0 at that point. For four socket system with Quad core cpus installed, all cpus apic id is offset by 4, and bsp apic id is 4. For eight socket system with dual core cpus installed, all cpus apic id is offset by 2, and bsp apic id is 2. We need get boot_cpu_id --- bsp apic id, before k8_scan_nodes by called. So create early_acpi_boot_init and early_get_smp_config for get boot_cpu_id. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 24 ++++++++++++ arch/x86/kernel/mpparse_64.c | 89 ++++++++++++++++++++++++++++++++------------ 2 files changed, 90 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index f6eb01d8923a..8a475793f736 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -861,6 +861,30 @@ static int __init detect_init_APIC(void) return 0; } +void __init early_init_lapic_mapping(void) +{ + unsigned long apic_phys; + + /* + * If no local APIC can be found then go out + * : it means there is no mpatable and MADT + */ + if (!smp_found_config) + return; + + apic_phys = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, apic_phys); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); +} + /** * init_apic_mappings - initialize APIC mappings */ diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 2a1f7881c75b..529b1c22077e 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -224,8 +224,7 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) /* * Read/parse the MPC */ - -static int __init smp_read_mpc(struct mp_config_table *mpc) +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { char str[16]; int count=sizeof(*mpc); @@ -266,6 +265,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; + if (early) + return 1; + /* * Now process the configuration blocks. */ @@ -477,27 +479,38 @@ static struct intel_mp_floating *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ -void __init get_smp_config (void) +static void __init __get_smp_config(unsigned early) { struct intel_mp_floating *mpf = mpf_found; + if (acpi_lapic && early) + return; /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); - return; - } - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); + * ACPI supports both logical (e.g. Hyper-Threading) and physical + * processors, where MPS only supports physical. + */ + if (acpi_lapic && acpi_ioapic) { + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " + "information\n"); + return; + } else if (acpi_lapic) + printk(KERN_INFO "Using ACPI for processor (LAPIC) " + "configuration information\n"); - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", + mpf->mpf_specification); /* * Now see if we need to read further. */ if (mpf->mpf_feature1 != 0) { + if (early) { + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + return; + } printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); construct_default_ISA_mptable(mpf->mpf_feature1); @@ -508,12 +521,15 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); return; } + + if (early) + return; /* * If there are no explicit MP IRQ entries, then we are * broken. We set up most of the low 16 IO-APIC pins to @@ -535,13 +551,25 @@ void __init get_smp_config (void) } else BUG(); - printk(KERN_INFO "Processors: %d\n", num_processors); + if (!early) + printk(KERN_INFO "Processors: %d\n", num_processors); /* * Only use the first configuration found. */ } -static int __init smp_scan_config (unsigned long base, unsigned long length) +void __init early_get_smp_config(void) +{ + __get_smp_config(1); +} + +void __init get_smp_config(void) +{ + __get_smp_config(0); +} + +static int __init smp_scan_config(unsigned long base, unsigned long length, + unsigned reserve) { extern void __bad_mpf_size(void); unsigned int *bp = phys_to_virt(base); @@ -560,10 +588,15 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) || (mpf->mpf_specification == 4)) ) { smp_found_config = 1; + mpf_found = mpf; + + if (!reserve) + return 1; + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); if (mpf->mpf_physptr) - reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE); - mpf_found = mpf; + reserve_bootmem_generic(mpf->mpf_physptr, + PAGE_SIZE); return 1; } bp += 4; @@ -572,7 +605,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) return 0; } -void __init find_smp_config(void) +static void __init __find_smp_config(unsigned reserve) { unsigned int address; @@ -584,9 +617,9 @@ void __init find_smp_config(void) * 2) Scan the top 1K of base RAM * 3) Scan the 64K of bios */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) + if (smp_scan_config(0x0, 0x400, reserve) || + smp_scan_config(639*0x400, 0x400, reserve) || + smp_scan_config(0xF0000, 0x10000, reserve)) return; /* * If it is an SMP machine we should know now. @@ -603,13 +636,23 @@ void __init find_smp_config(void) address = *(unsigned short *)phys_to_virt(0x40E); address <<= 4; - if (smp_scan_config(address, 0x1000)) + if (smp_scan_config(address, 0x1000, reserve)) return; /* If we have come this far, we did not find an MP table */ printk(KERN_INFO "No mptable found.\n"); } +void __init early_find_smp_config(void) +{ + __find_smp_config(0); +} + +void __init find_smp_config(void) +{ + __find_smp_config(1); +} + /* -------------------------------------------------------------------------- ACPI-based MP Configuration -------------------------------------------------------------------------- */ -- cgit v1.2.3 From a6333c3ccbdc0ae001cff6ee1d3633942ef763f4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:09 +0300 Subject: x86: add mp_bus_not_pci bitmap to mpparse_32.c Signed-off: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 2abc9392799a..c13092db578e 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -43,6 +43,7 @@ unsigned int __cpuinitdata maxcpus = NR_CPUS; */ int apic_version [MAX_APICS]; int mp_bus_id_to_type [MAX_MP_BUSSES]; +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int mp_bus_id_to_node [MAX_MP_BUSSES]; int mp_bus_id_to_local [MAX_MP_BUSSES]; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; @@ -240,12 +241,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m) } #endif + set_bit(m->mpc_busid, mp_bus_not_pci); if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { mpc_oem_pci_bus(m, translation_table[mpc_record]); + clear_bit(m->mpc_busid, mp_bus_not_pci); mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; mp_current_pci_id++; @@ -984,6 +987,7 @@ void __init mp_config_acpi_legacy_irqs (void) * Fabricate the legacy ISA bus (bus #31). */ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; + set_bit(MP_ISA_BUS, mp_bus_not_pci); Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); /* -- cgit v1.2.3 From d27e2b8e2ac34b62aca95d3cd7efe9708b718fde Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:18 +0300 Subject: x86: use not_pci bitmap #1 Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 4ca548632c8d..b5680780fa95 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -810,10 +810,7 @@ static int __init find_isa_irq_pin(int irq, int type) for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].mpc_srcbus; - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA - ) && + if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].mpc_irqtype == type) && (mp_irqs[i].mpc_srcbusirq == irq)) -- cgit v1.2.3 From 73b2961bfa003518bb9cdd3c4003fad4d474ec13 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:24 +0300 Subject: x86: use not_pci bitmap #2 Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index b5680780fa95..f7f3f3144fda 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -826,10 +826,7 @@ static int __init find_isa_irq_apic(int irq, int type) for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].mpc_srcbus; - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA - ) && + if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].mpc_irqtype == type) && (mp_irqs[i].mpc_srcbusirq == irq)) break; -- cgit v1.2.3 From 47cab822d6b25d8e130da00edef91ec3eef0efab Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:30 +0300 Subject: x86: use not_pci bitmap #3 Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index f7f3f3144fda..efe8322edfba 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -866,7 +866,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) mp_irqs[i].mpc_dstapic == MP_APIC_ALL) break; - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && + if (!test_bit(lbus, mp_bus_not_pci) && !mp_irqs[i].mpc_irqtype && (bus == lbus) && (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { -- cgit v1.2.3 From 6728801dce13f83ee7e1778b137ceebab61b71c4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:36 +0300 Subject: x86: use not_pci bitmap #4 Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 48 ++++++++++---------------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index efe8322edfba..2a609edd59f0 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -929,19 +929,19 @@ static int EISA_ELCR(unsigned int irq) return 0; } +/* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +#define default_ISA_trigger(idx) (0) +#define default_ISA_polarity(idx) (0) + /* EISA interrupts are always polarity zero and can be edge or level * trigger depending on the ELCR value. If an interrupt is listed as * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) -#define default_EISA_polarity(idx) (0) - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) +#define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, * when listed as conforming in the MP table. */ @@ -953,7 +953,7 @@ static int EISA_ELCR(unsigned int irq) * when listed as conforming in the MP table. */ #define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) (0) +#define default_MCA_polarity(idx) default_ISA_polarity(idx) static int MPBIOS_polarity(int idx) { @@ -967,35 +967,9 @@ static int MPBIOS_polarity(int idx) { case 0: /* conforms, ie. bus-type dependent polarity */ { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - polarity = default_ISA_polarity(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - polarity = default_EISA_polarity(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - polarity = default_PCI_polarity(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - polarity = default_MCA_polarity(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } + polarity = test_bit(bus, mp_bus_not_pci)? + default_ISA_polarity(idx): + default_PCI_polarity(idx); break; } case 1: /* high active */ -- cgit v1.2.3 From 9c0076cb34a0ea968413d9a1ccb6c7c850d3a1ee Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:43 +0300 Subject: x86: use not_pci bitmap #5 Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 2a609edd59f0..e400f77eb0e0 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1010,11 +1010,14 @@ static int MPBIOS_trigger(int idx) { case 0: /* conforms, ie. bus-type dependent */ { + trigger = test_bit(bus, mp_bus_not_pci)? + default_ISA_trigger(idx): + default_PCI_trigger(idx); switch (mp_bus_id_to_type[bus]) { case MP_BUS_ISA: /* ISA pin */ { - trigger = default_ISA_trigger(idx); + /* set before the switch */ break; } case MP_BUS_EISA: /* EISA pin */ @@ -1024,7 +1027,7 @@ static int MPBIOS_trigger(int idx) } case MP_BUS_PCI: /* PCI pin */ { - trigger = default_PCI_trigger(idx); + /* set before the switch */ break; } case MP_BUS_MCA: /* MCA pin */ -- cgit v1.2.3 From 643befed1090af5f0000297ce11fa23e2777f42b Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:49 +0300 Subject: x86: use not_pci bitmap #6 Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 47 ++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index e400f77eb0e0..97ac6104d63a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1091,39 +1091,22 @@ static int pin_2_irq(int idx, int apic, int pin) if (mp_irqs[idx].mpc_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - case MP_BUS_EISA: - case MP_BUS_MCA: - { - irq = mp_irqs[idx].mpc_srcbusirq; - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); + if (test_bit(bus, mp_bus_not_pci)) + irq = mp_irqs[idx].mpc_srcbusirq; + else { + /* + * PCI IRQs are mapped in order + */ + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; - break; - } - default: - { - printk(KERN_ERR "unknown bus type %d.\n",bus); - irq = 0; - break; - } + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } /* -- cgit v1.2.3 From 9e0a2de2684372f16130b753efdbf226a997efb0 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:54:56 +0300 Subject: x86: rearrange bus_type parse Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index c13092db578e..13adcc40a00e 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -242,16 +242,16 @@ static void __init MP_bus_info (struct mpc_config_bus *m) #endif set_bit(m->mpc_busid, mp_bus_not_pci); - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { + if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { mpc_oem_pci_bus(m, translation_table[mpc_record]); clear_bit(m->mpc_busid, mp_bus_not_pci); mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; mp_current_pci_id++; + } else if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; } else { -- cgit v1.2.3 From c0a282c251181aa423d4831719613b8286b5b839 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 20 Mar 2008 14:55:02 +0300 Subject: x86: make mp_bus_id_to_type optional [ mingo@elte.hu: fix boot regression. ] Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 4 ++++ arch/x86/kernel/mpparse_32.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 97ac6104d63a..0d70acd3b134 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -915,6 +915,7 @@ void __init setup_ioapic_dest(void) } #endif +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) /* * EISA Edge/Level control register, ELCR */ @@ -928,6 +929,7 @@ static int EISA_ELCR(unsigned int irq) "Broken MPtable reports ISA irq %d\n", irq); return 0; } +#endif /* ISA interrupts are always polarity zero edge triggered, * when listed as conforming in the MP table. */ @@ -1013,6 +1015,7 @@ static int MPBIOS_trigger(int idx) trigger = test_bit(bus, mp_bus_not_pci)? default_ISA_trigger(idx): default_PCI_trigger(idx); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) switch (mp_bus_id_to_type[bus]) { case MP_BUS_ISA: /* ISA pin */ @@ -1042,6 +1045,7 @@ static int MPBIOS_trigger(int idx) break; } } +#endif break; } case 1: /* edge */ diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 13adcc40a00e..8795122db9bc 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -42,7 +42,9 @@ unsigned int __cpuinitdata maxcpus = NR_CPUS; * MP-table. */ int apic_version [MAX_APICS]; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) int mp_bus_id_to_type [MAX_MP_BUSSES]; +#endif DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int mp_bus_id_to_node [MAX_MP_BUSSES]; int mp_bus_id_to_local [MAX_MP_BUSSES]; @@ -245,9 +247,10 @@ static void __init MP_bus_info (struct mpc_config_bus *m) if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { mpc_oem_pci_bus(m, translation_table[mpc_record]); clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; mp_current_pci_id++; +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { @@ -256,6 +259,7 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; } else { printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); +#endif } } @@ -983,10 +987,12 @@ void __init mp_config_acpi_legacy_irqs (void) int i = 0; int ioapic = -1; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) /* * Fabricate the legacy ISA bus (bus #31). */ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +#endif set_bit(MP_ISA_BUS, mp_bus_not_pci); Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); -- cgit v1.2.3 From e129cb490e842753b43af7aae136935fc0928dc8 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Tue, 11 Mar 2008 22:55:42 +0300 Subject: x86: move mp_bus_id_to_local to numa.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 8795122db9bc..ae385b427841 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -47,7 +47,6 @@ int mp_bus_id_to_type [MAX_MP_BUSSES]; #endif DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int mp_bus_id_to_node [MAX_MP_BUSSES]; -int mp_bus_id_to_local [MAX_MP_BUSSES]; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; static int mp_current_pci_id; -- cgit v1.2.3 From 037cab07e9515149fecc2274775807f06ea6b036 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Tue, 11 Mar 2008 22:55:48 +0300 Subject: x86: move mp_bus_id_to_node to numa.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 1 - arch/x86/kernel/summit_32.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index ae385b427841..1b1fd6e920e6 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -46,7 +46,6 @@ int apic_version [MAX_APICS]; int mp_bus_id_to_type [MAX_MP_BUSSES]; #endif DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_node [MAX_MP_BUSSES]; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; static int mp_current_pci_id; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 30f04c3e68e4..6878a9c2df5d 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -35,6 +35,8 @@ static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; + static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) { int twister = 0, node = 0; -- cgit v1.2.3 From b552da8740222c35bcd83c9be7b27185bfb6d53c Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 19 Mar 2008 15:58:11 +0100 Subject: x86 iommu: add more documentation Fix coding style in pci-dma_64.c and add stubs for documentation. I hope someone fills the rest, I understand maybe off and soft... Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma_64.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 375cb2bc45be..ada5a0604992 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -232,32 +232,32 @@ static __init int iommu_setup(char *p) return -EINVAL; while (*p) { - if (!strncmp(p,"off",3)) + if (!strncmp(p, "off", 3)) no_iommu = 1; /* gart_parse_options has more force support */ - if (!strncmp(p,"force",5)) + if (!strncmp(p, "force", 5)) force_iommu = 1; - if (!strncmp(p,"noforce",7)) { + if (!strncmp(p, "noforce", 7)) { iommu_merge = 0; force_iommu = 0; } - if (!strncmp(p, "biomerge",8)) { + if (!strncmp(p, "biomerge", 8)) { iommu_bio_merge = 4096; iommu_merge = 1; force_iommu = 1; } - if (!strncmp(p, "panic",5)) + if (!strncmp(p, "panic", 5)) panic_on_overflow = 1; - if (!strncmp(p, "nopanic",7)) + if (!strncmp(p, "nopanic", 7)) panic_on_overflow = 0; - if (!strncmp(p, "merge",5)) { + if (!strncmp(p, "merge", 5)) { iommu_merge = 1; force_iommu = 1; } - if (!strncmp(p, "nomerge",7)) + if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; - if (!strncmp(p, "forcesac",8)) + if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; @@ -265,7 +265,7 @@ static __init int iommu_setup(char *p) forbid_dac = -1; #ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft",4)) + if (!strncmp(p, "soft", 4)) swiotlb = 1; #endif -- cgit v1.2.3 From 26c6b5ea5575a5a4886dc45f889e7b783641f2de Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:24:56 -0300 Subject: x86: change var types in __inquire_remote_apic change some variables' types in __inquire_remote_apic to match x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 2dd95bae2b96..bea2d328a4f0 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -368,10 +368,10 @@ static void unmap_cpu_to_logical_apicid(int cpu) static inline void __inquire_remote_apic(int apicid) { - int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; int timeout; - unsigned long status; + u32 status; printk("Inquiring remote APIC #%d...\n", apicid); -- cgit v1.2.3 From ba10485c2f5a9482d93b095960cbe7d865d59e04 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:24:57 -0300 Subject: x86: add loglevel to printks Add loglevel facilities to printks in __inquire_remote_apic. the levels are the ones to match x86_64 ones. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index bea2d328a4f0..8676eec2d00f 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -373,17 +373,18 @@ static inline void __inquire_remote_apic(int apicid) int timeout; u32 status; - printk("Inquiring remote APIC #%d...\n", apicid); + printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk("... APIC #%d %s: ", apicid, names[i]); + printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); /* * Wait for idle. */ status = safe_apic_wait_icr_idle(); if (status) - printk("a previous APIC delivery may have failed\n"); + printk(KERN_CONT + "a previous APIC delivery may have failed\n"); apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); @@ -397,10 +398,10 @@ static inline void __inquire_remote_apic(int apicid) switch (status) { case APIC_ICR_RR_VALID: status = apic_read(APIC_RRR); - printk("%lx\n", status); + printk(KERN_CONT "%08x\n", status); break; default: - printk("failed\n"); + printk(KERN_CONT "failed\n"); } } } -- cgit v1.2.3 From 1af8a0c1b3b85b1007f49917ea6a351b7aeb7562 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:24:58 -0300 Subject: x86: use apic_*_around instead of apic_write in x86_64 This patch replaces apic_read() for apic_read_around() and apic_write for apic_write_around() in smpboot_64.c We do it to have a common usage between x86_64 and i386. In the former, it will always simply expand to apic_write Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 4f6d9768648f..57ebe6c04305 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -281,8 +281,8 @@ static void inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); timeout = 0; do { @@ -315,12 +315,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta /* * Turn INIT on target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* * Send IPI */ - apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); @@ -331,10 +331,10 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Send IPI */ - apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -353,6 +353,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta for (j = 1; j <= num_starts; j++) { Dprintk("Sending STARTUP #%d.\n",j); + apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); Dprintk("After apic_write.\n"); @@ -362,11 +363,11 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta */ /* Target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Boot on the stack */ /* Kick the second */ - apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12)); + apic_write_around(APIC_ICR, APIC_DM_STARTUP | (start_rip>>12)); /* * Give the other CPU some time to accept the IPI. @@ -386,6 +387,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta * Due to the Pentium erratum 3AP. */ if (maxlvt > 3) { + apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); } accept_status = (apic_read(APIC_ESR) & 0xEF); -- cgit v1.2.3 From d0173aeac4f7fa90a63319b817bd207bdc0ac87e Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:24:59 -0300 Subject: x86: use start_ipi_hook in x86_64 It is used to match i386. The definition for the non-paravirt case is moved to smp.h instead of smp_32.h Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 57ebe6c04305..13ab1123d1d2 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -344,6 +344,14 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta num_starts = 2; + /* + * Paravirt / VMI wants a startup IPI hook here to set up the + * target processor state. + */ + startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, + (unsigned long) init_rsp); + + /* * Run STARTUP IPI loop. */ -- cgit v1.2.3 From 82389871b89467b4478e02d13be2f776e0138b5b Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:00 -0300 Subject: x86: add an smp_apply_quirks to smpboot_32.c The split of smp_store_cpu_info in a quirks-only part will ease integration with x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 8676eec2d00f..e05006416d8c 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -78,19 +78,8 @@ static void map_cpu_to_logical_apicid(void); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -void __cpuinit smp_store_cpu_info(int id) +static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - if (id!=0) - identify_secondary_cpu(c); /* * Mask B, Pentium, but not Pentium MMX */ @@ -138,6 +127,23 @@ void __cpuinit smp_store_cpu_info(int id) valid_k7: ; + +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +void __cpuinit smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c = &cpu_data(id); + + *c = boot_cpu_data; + c->cpu_index = id; + if (id != 0) + identify_secondary_cpu(c); + smp_apply_quirks(c); } static atomic_t init_deasserted; -- cgit v1.2.3 From 4f3ab1959a63a2686524f17665d799fac28eb271 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:01 -0300 Subject: x86: decouple call to print_cpu_info from smp_store_cpu_info This will ease integration with i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 13ab1123d1d2..1da28c6c1f5f 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -97,7 +97,6 @@ static void __cpuinit smp_store_cpu_info(int id) *c = boot_cpu_data; c->cpu_index = id; identify_cpu(c); - print_cpu_info(c); } static inline void wait_for_init_deassert(atomic_t *deassert) @@ -568,6 +567,8 @@ do_rest: if (cpu_isset(cpu, cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("CPU has booted.\n"); + printk(KERN_INFO "CPU%d: ", cpu); + print_cpu_info(&cpu_data(cpu)); } else { boot_error = 1; if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE)) @@ -751,6 +752,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) */ setup_boot_clock(); + printk(KERN_INFO "CPU%d: ", 0); + print_cpu_info(&cpu_data(0)); } /* -- cgit v1.2.3 From 7a636af66404766df60acff88c475df8e8d79347 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:02 -0300 Subject: x86: provide specialized identification routines for x86_64 provide two specialized identify_secondary_cpu() and identify_boot_cpu() routines for x86_64. Although not strictly needed, they are functionally correct, and will ease integration with i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 13fe525bf065..990724143c43 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -1036,14 +1036,24 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif select_idle_routine(c); - if (c != &boot_cpu_data) - mtrr_ap_init(); #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif } +void __cpuinit identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + mtrr_ap_init(); +} + static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); -- cgit v1.2.3 From 5745abfe86841a97fbe9e3f1e4f881a01b0c5f5b Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:03 -0300 Subject: x86: use identify_boot_cpu Call this function instead of identify_cpu in bugs_64.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/bugs_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c index 8f520f93ffd4..60207e999a04 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/bugs_64.c @@ -12,7 +12,7 @@ void __init check_bugs(void) { - identify_cpu(&boot_cpu_data); + identify_boot_cpu(); #if !defined(CONFIG_SMP) printk("CPU: "); print_cpu_info(&boot_cpu_data); -- cgit v1.2.3 From f7401f7fe653f90f8f80a241840b9b499779e87d Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:04 -0300 Subject: x86: call identify_secondary_cpu in smp_store_cpu_info Call it conditionally for secondary cpus. This behaviour matches i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 1da28c6c1f5f..f84e30da521a 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -96,7 +96,8 @@ static void __cpuinit smp_store_cpu_info(int id) *c = boot_cpu_data; c->cpu_index = id; - identify_cpu(c); + if (id != 0) + identify_secondary_cpu(c); } static inline void wait_for_init_deassert(atomic_t *deassert) -- cgit v1.2.3 From 1d89a7f072d4f76f0538edfb474d527066ee7838 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:05 -0300 Subject: x86: merge smp_store_cpu_info now that it is the same between arches, put it into smpboot.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 77 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 71 +--------------------------------------- arch/x86/kernel/smpboot_64.c | 15 --------- 3 files changed, 78 insertions(+), 85 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b13b9d55f9ce..a157a5245923 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -45,6 +45,83 @@ unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; +#ifdef CONFIG_X86_32 +/* Set if we find a B stepping CPU */ +int __cpuinitdata smp_b_stepping; +#endif + +static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_32 + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + + if (num_possible_cpus() == 1) + goto valid_k7; + + /* Athlon 660/661 is valid. */ + if ((c->x86_model == 6) && ((c->x86_mask == 0) || + (c->x86_mask == 1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model == 7) && (c->x86_mask == 0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability + * bit. It's worth noting that the A5 stepping (662) of some + * Athlon XP's have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for + * more. + */ + if (((c->x86_model == 6) && (c->x86_mask >= 2)) || + ((c->x86_model == 7) && (c->x86_mask >= 1)) || + (c->x86_model > 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, not a certified SMP capable AMD system. */ + add_taint(TAINT_UNSAFE_SMP); + } + +valid_k7: + ; +#endif +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +void __cpuinit smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c = &cpu_data(id); + + *c = boot_cpu_data; + c->cpu_index = id; + if (id != 0) + identify_secondary_cpu(c); + smp_apply_quirks(c); +} + + void __cpuinit set_cpu_sibling_map(int cpu) { int i; diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index e05006416d8c..0bfb31e13540 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -59,8 +59,7 @@ #include #include -/* Set if we find a B stepping CPU */ -static int __cpuinitdata smp_b_stepping; +extern int smp_b_stepping; static cpumask_t smp_commenced_mask; @@ -78,74 +77,6 @@ static void map_cpu_to_logical_apicid(void); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) -{ - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - /* - * Remember we have B step Pentia with bugs - */ - smp_b_stepping = 1; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - - if (num_possible_cpus() == 1) - goto valid_k7; - - /* Athlon 660/661 is valid. */ - if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model==7) && (c->x86_mask==0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. - * It's worth noting that the A5 stepping (662) of some Athlon XP's - * have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. - */ - if (((c->x86_model==6) && (c->x86_mask>=2)) || - ((c->x86_model==7) && (c->x86_mask>=1)) || - (c->x86_model> 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, it's not a certified SMP capable AMD system. */ - add_taint(TAINT_UNSAFE_SMP); - } - -valid_k7: - ; - -} - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -void __cpuinit smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - if (id != 0) - identify_secondary_cpu(c); - smp_apply_quirks(c); -} - static atomic_t init_deasserted; static void __cpuinit smp_callin(void) diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index f84e30da521a..c213345ca2f5 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -85,21 +85,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) #endif -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -static void __cpuinit smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - if (id != 0) - identify_secondary_cpu(c); -} - static inline void wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) -- cgit v1.2.3 From 3b22ec7b13cb31e0d87fbc0aabe14caaaad309e8 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:06 -0300 Subject: x86: always enable irqs when entering idle This matches x86_64 behaviour, which is a superior one IMHO Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ec05fb750dfc..08c41ed5e805 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -127,6 +127,7 @@ void default_idle(void) local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { + local_irq_enable(); /* loop is done by the caller */ cpu_relax(); } @@ -142,6 +143,7 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle(void) { + local_irq_enable(); cpu_relax(); } @@ -248,8 +250,11 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) - __mwait(ax, cx); - } + __sti_mwait(ax, cx); + else + local_irq_enable(); + } else + local_irq_enable(); } /* Default MONITOR/MWAIT with no hints, used for default C1 state */ -- cgit v1.2.3 From 3ff8171feecfcdee46be7d6e92259debe46ac87f Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:07 -0300 Subject: x86: don't call local_irq_enable before entering idle the call to idle is guaranteed to do it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 0bfb31e13540..6c16165abf11 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -214,9 +214,6 @@ static void __cpuinit start_secondary(void *unused) unlock_ipi_call_lock(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; - /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); - wmb(); cpu_idle(); } -- cgit v1.2.3 From 8f15b82e22779fd44baeb78515c9c154b407eff0 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:08 -0300 Subject: x86: move setup_secondary_clock a little bit down in the function This is done so we call setup_secondary_clock() in the same place x86_64 does. A separate patch for this is appearantly not needed. But clock initialization is such a delicate thing, that it's safer to do this way Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 6c16165abf11..4e5416eb42b0 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -185,7 +185,6 @@ static void __cpuinit start_secondary(void *unused) */ check_tsc_sync_target(); - setup_secondary_clock(); if (nmi_watchdog == NMI_IO_APIC) { disable_8259A_irq(0); enable_NMI_through_LVT0(); @@ -214,6 +213,8 @@ static void __cpuinit start_secondary(void *unused) unlock_ipi_call_lock(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + setup_secondary_clock(); + wmb(); cpu_idle(); } -- cgit v1.2.3 From 5733f627498121870b081b7ab73ffba01348f8cd Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:09 -0300 Subject: x86: move state update out of ipi_lock it does not need to be inside lock. Do the way i386 does. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index c213345ca2f5..cfcfd2c2062b 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -233,10 +233,10 @@ void __cpuinit start_secondary(void) */ spin_unlock(&vector_lock); cpu_set(smp_processor_id(), cpu_online_map); - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; - unlock_ipi_call_lock(); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + setup_secondary_clock(); cpu_idle(); -- cgit v1.2.3 From 148a30f8cf2ac72b7ea6e5c8971fad8d80ec3879 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:11 -0300 Subject: x86: use APIC_INTEGRATED tests in x86_64 This patch does not change the behaviour of x86_64, since APIC_INTEGRATED is always defined as (1). But the code now matches exactly i386 version (well, this part of the code, at least) Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index cfcfd2c2062b..d7b59d6c6963 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -295,6 +295,15 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; + /* + * Be paranoid about clearing APIC errors. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } + Dprintk("Asserting INIT.\n"); /* @@ -327,7 +336,10 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta mb(); atomic_set(&init_deasserted, 1); - num_starts = 2; + if (APIC_INTEGRATED(apic_version[phys_apicid])) + num_starts = 2; + else + num_starts = 0; /* * Paravirt / VMI wants a startup IPI hook here to set up the -- cgit v1.2.3 From fa8004d8e0c2ba21a44bdc95c44c6c5267a991ec Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:12 -0300 Subject: x86: add barriers statement goal is to have i386 and x86_64 closer, so we add barriers to match Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 4 ++++ arch/x86/kernel/smpboot_64.c | 1 + 2 files changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 4e5416eb42b0..a232f4d1f7a5 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -180,6 +180,9 @@ static void __cpuinit start_secondary(void *unused) smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) cpu_relax(); + + /* otherwise gcc will move up smp_processor_id before the cpu_init */ + barrier(); /* * Check TSC synchronization with the BP: */ @@ -432,6 +435,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); + mb(); atomic_set(&init_deasserted, 1); /* diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index d7b59d6c6963..a9cc91127b91 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -239,6 +239,7 @@ void __cpuinit start_secondary(void) setup_secondary_clock(); + wmb(); cpu_idle(); } -- cgit v1.2.3 From 4780b261e2b71ca72804b26479d794839e68d9ab Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:13 -0300 Subject: x86: isolate sanity checking Isolate all sanity checking in a smp_sanity_check() function as x86_64 does. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 57 ++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index a232f4d1f7a5..b44a743d0ea9 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -735,10 +735,6 @@ exit: } #endif -/* - * Cycle through the processors sending APIC IPIs to boot each. - */ - static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; @@ -746,26 +742,8 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif -static void __init smp_boot_cpus(unsigned int max_cpus) +static int __init smp_sanity_check(unsigned max_cpus) { - int apicid, cpu, bit, kicked; - unsigned long bogosum = 0; - - /* - * Setup boot CPU information - */ - smp_store_cpu_info(0); /* Final full version of the data */ - printk("CPU%d: ", 0); - print_cpu_info(&cpu_data(0)); - - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - boot_cpu_logical_apicid = logical_smp_processor_id(); - per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid; - - current_thread_info()->cpu = 0; - - set_cpu_sibling_map(0); - /* * If we couldn't find an SMP configuration at boot time, * get out of here now! @@ -780,7 +758,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); - return; + return -1; } /* @@ -806,7 +784,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); - return; + return -1; } verify_local_APIC(); @@ -828,9 +806,36 @@ static void __init smp_boot_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); - return; + return -1; } + return 0; +} + + +/* + * Cycle through the processors sending APIC IPIs to boot each. + */ +static void __init smp_boot_cpus(unsigned int max_cpus) +{ + int apicid, cpu, bit, kicked; + unsigned long bogosum = 0; + + /* + * Setup boot CPU information + */ + smp_store_cpu_info(0); /* Final full version of the data */ + printk(KERN_INFO "CPU%d: ", 0); + print_cpu_info(&cpu_data(0)); + + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_logical_apicid = logical_smp_processor_id(); + per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid; + + current_thread_info()->cpu = 0; + + set_cpu_sibling_map(0); + smp_sanity_check(max_cpus); connect_bsp_APIC(); setup_local_APIC(); map_cpu_to_logical_apicid(); -- cgit v1.2.3 From b675f11127291cdb6a090ece289e4f9a0b1d609e Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:14 -0300 Subject: x86: isolate logic to disable smp Put it in a disable_smp() function, as x86_64 does Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index b44a743d0ea9..8144aa3a1edf 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -742,6 +742,15 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif +static void __init disable_smp(void) +{ + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + map_cpu_to_logical_apicid(); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); +} + static int __init smp_sanity_check(unsigned max_cpus) { /* @@ -750,14 +759,10 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); + disable_smp(); if (APIC_init_uniprocessor()) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); - map_cpu_to_logical_apicid(); - cpu_set(0, per_cpu(cpu_sibling_map, 0)); - cpu_set(0, per_cpu(cpu_core_map, 0)); return -1; } @@ -779,11 +784,6 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - map_cpu_to_logical_apicid(); - cpu_set(0, per_cpu(cpu_sibling_map, 0)); - cpu_set(0, per_cpu(cpu_core_map, 0)); return -1; } @@ -801,11 +801,6 @@ static int __init smp_sanity_check(unsigned max_cpus) connect_bsp_APIC(); setup_local_APIC(); } - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - map_cpu_to_logical_apicid(); - cpu_set(0, per_cpu(cpu_sibling_map, 0)); - cpu_set(0, per_cpu(cpu_core_map, 0)); return -1; } return 0; @@ -835,7 +830,12 @@ static void __init smp_boot_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); - smp_sanity_check(max_cpus); + if (smp_sanity_check(max_cpus) < 0) { + printk(KERN_INFO "SMP disabled\n"); + disable_smp(); + return; + } + connect_bsp_APIC(); setup_local_APIC(); map_cpu_to_logical_apicid(); -- cgit v1.2.3 From f915d7f46b84192a19647c8e6b111a7e518875cb Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:15 -0300 Subject: x86: do tests before do_boot_cpu in i386 Do tests before do_boot_cpu in native_cpu_up for i386. Tests are a little bit broader than originally, and are the same as x86_64. Test for smp_callin is not applicable right now and is deferred. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 8144aa3a1edf..147af81f70ea 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -711,10 +711,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) int apicid, ret; apicid = per_cpu(x86_cpu_to_apicid, cpu); - if (apicid == BAD_APICID) { - ret = -ENODEV; - goto exit; - } info.complete = &done; info.apicid = apicid; @@ -952,10 +948,22 @@ void __init native_smp_prepare_boot_cpu(void) int __cpuinit native_cpu_up(unsigned int cpu) { + int apicid = cpu_present_to_apicid(cpu); unsigned long flags; -#ifdef CONFIG_HOTPLUG_CPU int ret = 0; + WARN_ON(irqs_disabled()); + + Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); + + if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || + !physid_isset(apicid, phys_cpu_present_map)) { + printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); + return -EINVAL; + } + +#ifdef CONFIG_HOTPLUG_CPU + /* * We do warm boot only on cpus that had booted earlier * Otherwise cold boot is all handled from smp_boot_cpus(). -- cgit v1.2.3 From f3ce4466abd6f5173db98b5cc2269c139cd1790a Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:16 -0300 Subject: x86: make __smp_prepare_cpu void We have already removed the only condition that could fail here. so just don't test for any return value Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 147af81f70ea..ee6f3bd70390 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -704,11 +704,11 @@ static void __cpuinit do_warm_boot_cpu(struct work_struct *work) complete(info->complete); } -static int __cpuinit __smp_prepare_cpu(int cpu) +static void __cpuinit __smp_prepare_cpu(int cpu) { DECLARE_COMPLETION_ONSTACK(done); struct warm_boot_cpu_info info; - int apicid, ret; + int apicid; apicid = per_cpu(x86_cpu_to_apicid, cpu); @@ -725,9 +725,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) wait_for_completion(&done); zap_low_mappings(); - ret = 0; -exit: - return ret; } #endif @@ -950,7 +947,6 @@ int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = cpu_present_to_apicid(cpu); unsigned long flags; - int ret = 0; WARN_ON(irqs_disabled()); @@ -971,10 +967,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) * when a cpu is taken offline from cpu_exit_clear(). */ if (!cpu_isset(cpu, cpu_callin_map)) - ret = __smp_prepare_cpu(cpu); - - if (ret) - return -EIO; + __smp_prepare_cpu(cpu); #endif /* In case one didn't come up */ -- cgit v1.2.3 From 8154fa3740d2bbc64aa46e75bcccb71dd82280d3 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:17 -0300 Subject: x86: move assignment of CPU_PREPARE before do_boot_cpu Done to match x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index ee6f3bd70390..0e86ccc90d82 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -958,6 +958,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) return -EINVAL; } + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; #ifdef CONFIG_HOTPLUG_CPU /* @@ -976,7 +977,6 @@ int __cpuinit native_cpu_up(unsigned int cpu) return -EIO; } - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); -- cgit v1.2.3 From cbe879fc6c77b5751a91167654b75a39421d0f3f Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:19 -0300 Subject: x86: define bios to apicid mapping This mapping already exists in x86_64, just provide it for i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 0e86ccc90d82..92a5df6190b5 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -70,6 +70,12 @@ void *x86_cpu_to_apicid_early_ptr; DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); +u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata + = { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_bios_cpu_apicid_early_ptr; +DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + u8 apicid_2_node[MAX_APICID]; static void map_cpu_to_logical_apicid(void); -- cgit v1.2.3 From ccf82085ee32c9b171183d8042e8a6e2776ec628 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:20 -0300 Subject: x86: initialize map pointers in setup_32.c this will serve as a reference as to whether or not to use the per_cpu variables in mpparse. Done the same way as x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_32.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index d4ad6e8ae886..eb97bcfe0f6f 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -722,6 +722,18 @@ char * __init __attribute__((weak)) memory_setup(void) return machine_specific_memory_setup(); } +#ifdef CONFIG_NUMA +/* + * In the golden day, when everything among i386 and x86_64 will be + * integrated, this will not live here + */ +void *x86_cpu_to_node_map_early_ptr; +int x86_cpu_to_node_map_init[NR_CPUS] = { + [0 ... NR_CPUS-1] = NUMA_NO_NODE +}; +DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -855,6 +867,18 @@ void __init setup_arch(char **cmdline_p) io_delay_init(); +#ifdef CONFIG_X86_SMP + /* + * setup to use the early static init tables during kernel startup + * X86_SMP will exclude sub-arches that don't deal well with it. + */ + x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; + x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; +#ifdef CONFIG_NUMA + x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; +#endif +#endif + #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(); #endif -- cgit v1.2.3 From 4fe29a85642544503cf81e9cf251ef0f4e65b162 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:23 -0300 Subject: x86: use specialized routine for setup per-cpu area We use the same routing as x86_64, moved now to setup.c. Just with a few ifdefs inside. Note that this routing uses prefill_possible_map(). It has the very nice side effect of allowing hotplugging of cpus that are marked as present but disabled by acpi bios. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/setup.c | 103 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup64.c | 77 -------------------------------- arch/x86/kernel/smpboot_32.c | 2 + 4 files changed, 106 insertions(+), 78 deletions(-) create mode 100644 arch/x86/kernel/setup.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c436e747f502..5d33509fd1c1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o +obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c new file mode 100644 index 000000000000..1179aa06cdbf --- /dev/null +++ b/arch/x86/kernel/setup.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +/* + * Copy data used in early init routines from the initial arrays to the + * per cpu data areas. These arrays then become expendable and the + * *_early_ptr's are zeroed indicating that the static arrays are gone. + */ +static void __init setup_per_cpu_maps(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { +#ifdef CONFIG_SMP + if (per_cpu_offset(cpu)) { +#endif + per_cpu(x86_cpu_to_apicid, cpu) = + x86_cpu_to_apicid_init[cpu]; + per_cpu(x86_bios_cpu_apicid, cpu) = + x86_bios_cpu_apicid_init[cpu]; +#ifdef CONFIG_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + x86_cpu_to_node_map_init[cpu]; +#endif +#ifdef CONFIG_SMP + } else + printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n", + cpu); +#endif + } + + /* indicate the early static arrays will soon be gone */ + x86_cpu_to_apicid_early_ptr = NULL; + x86_bios_cpu_apicid_early_ptr = NULL; +#ifdef CONFIG_NUMA + x86_cpu_to_node_map_early_ptr = NULL; +#endif +} + +#ifdef CONFIG_X86_32 +/* + * Great future not-so-futuristic plan: make i386 and x86_64 do it + * the same way + */ +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); +#endif + +/* + * Great future plan: + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. + * Always point %gs to its beginning + */ +void __init setup_per_cpu_areas(void) +{ + int i; + unsigned long size; + +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); +#endif + + /* Copy section for each CPU (we discard the original) */ + size = PERCPU_ENOUGH_ROOM; + + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", + size); + for_each_cpu_mask(i, cpu_possible_map) { + char *ptr; +#ifndef CONFIG_NEED_MULTIPLE_NODES + ptr = alloc_bootmem_pages(size); +#else + int node = early_cpu_to_node(i); + if (!node_online(node) || !NODE_DATA(node)) + ptr = alloc_bootmem_pages(size); + else + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); +#endif + if (!ptr) + panic("Cannot allocate cpu data for CPU %d\n", i); +#ifdef CONFIG_X86_64 + cpu_pda(i)->data_offset = ptr - __per_cpu_start; +#else + __per_cpu_offset[i] = ptr - __per_cpu_start; +#endif + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + } + + /* setup percpu data maps early */ + setup_per_cpu_maps(); +} + +#endif diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index e24c45677094..6b4e3262e8cb 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -85,83 +85,6 @@ static int __init nonx32_setup(char *str) } __setup("noexec32=", nonx32_setup); -/* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. - */ -static void __init setup_per_cpu_maps(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - if (per_cpu_offset(cpu)) { -#endif - per_cpu(x86_cpu_to_apicid, cpu) = - x86_cpu_to_apicid_init[cpu]; - per_cpu(x86_bios_cpu_apicid, cpu) = - x86_bios_cpu_apicid_init[cpu]; -#ifdef CONFIG_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - x86_cpu_to_node_map_init[cpu]; -#endif -#ifdef CONFIG_SMP - } - else - printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n", - cpu); -#endif - } - - /* indicate the early static arrays will soon be gone */ - x86_cpu_to_apicid_early_ptr = NULL; - x86_bios_cpu_apicid_early_ptr = NULL; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = NULL; -#endif -} - -/* - * Great future plan: - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. - * Always point %gs to its beginning - */ -void __init setup_per_cpu_areas(void) -{ - int i; - unsigned long size; - -#ifdef CONFIG_HOTPLUG_CPU - prefill_possible_map(); -#endif - - /* Copy section for each CPU (we discard the original) */ - size = PERCPU_ENOUGH_ROOM; - - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); - for_each_cpu_mask (i, cpu_possible_map) { - char *ptr; -#ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(size); -#else - int node = early_cpu_to_node(i); - - if (!node_online(node) || !NODE_DATA(node)) - ptr = alloc_bootmem_pages(size); - else - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); -#endif - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); - cpu_pda(i)->data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - } - - /* setup percpu data maps early */ - setup_per_cpu_maps(); -} - void pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 92a5df6190b5..bf5c9e9f26c1 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -665,6 +665,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) unmap_cpu_to_logical_apicid(cpu); cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ + cpu_clear(cpu, cpu_possible_map); cpucount--; } else { per_cpu(x86_cpu_to_apicid, cpu) = apicid; @@ -743,6 +744,7 @@ EXPORT_SYMBOL(xquad_portio); static void __init disable_smp(void) { + cpu_possible_map = cpumask_of_cpu(0); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); map_cpu_to_logical_apicid(); -- cgit v1.2.3 From 73bf102b1cadc53d418df02ba687769a9f916a6d Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:24 -0300 Subject: x86: fill bios cpu to apicid maps We fill the per-cpu (or array) that maps bios cpu id to apicid in mpparse_32.c, the way x86_64 does Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 1b1fd6e920e6..6ea97163701f 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -75,8 +75,6 @@ unsigned disabled_cpus __cpuinitdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - /* * Intel MP BIOS table parsing routines: */ @@ -220,7 +218,14 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) def_to_bigsmp = 1; } } - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; + /* are we being called early in kernel startup? */ + if (x86_cpu_to_apicid_early_ptr) { + u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; + } else { + int cpu = num_processors - 1; + per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; + } } static void __init MP_bus_info (struct mpc_config_bus *m) -- cgit v1.2.3 From a6c422ccdb57924bd20ae408dba8e9db01d09677 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:25 -0300 Subject: x86: fill cpu to apicid and present map in mpparse This is the way x86_64 does, and complement the already present patch that does the bios cpu to apicid mapping here Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 19 +++++++++++++++++-- arch/x86/kernel/smpboot_32.c | 24 +++++++----------------- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 6ea97163701f..a0cec74b80ef 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -105,7 +105,8 @@ static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinit static void __cpuinit MP_processor_info (struct mpc_config_processor *m) { - int ver, apicid; + int ver, apicid, cpu; + cpumask_t tmp_map; physid_mask_t phys_cpu; if (!(m->mpc_cpuflag & CPU_ENABLED)) { @@ -198,6 +199,16 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) cpu_set(num_processors, cpu_possible_map); num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) + /* + * x86_bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + cpu = 0; /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y @@ -220,12 +231,16 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) } /* are we being called early in kernel startup? */ if (x86_cpu_to_apicid_early_ptr) { + u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + + cpu_to_apicid[cpu] = m->mpc_apicid; bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } else { - int cpu = num_processors - 1; + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; } + cpu_set(cpu, cpu_present_map); } static void __init MP_bus_info (struct mpc_config_bus *m) diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index bf5c9e9f26c1..2fea910eff43 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -525,16 +525,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; -static inline int alloc_cpu_id(void) -{ - cpumask_t tmp_map; - int cpu; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - if (cpu >= NR_CPUS) - return -ENODEV; - return cpu; -} #ifdef CONFIG_HOTPLUG_CPU static struct task_struct * __cpuinitdata cpu_idle_tasks[NR_CPUS]; @@ -605,7 +595,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); - per_cpu(x86_cpu_to_apicid, cpu) = apicid; /* * This grunge runs the startup process for * the targeted processor. @@ -666,10 +655,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpu_clear(cpu, cpu_possible_map); + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; cpucount--; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - cpu_set(cpu, cpu_present_map); } /* mark "stuck" area as not stuck */ @@ -745,6 +732,7 @@ EXPORT_SYMBOL(xquad_portio); static void __init disable_smp(void) { cpu_possible_map = cpumask_of_cpu(0); + cpu_present_map = cpumask_of_cpu(0); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); map_cpu_to_logical_apicid(); @@ -825,7 +813,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); - per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid; current_thread_info()->cpu = 0; @@ -866,8 +853,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus) continue; if (max_cpus <= cpucount+1) continue; - - if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) + /* Utterly temporary */ + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (per_cpu(x86_cpu_to_apicid, cpu) == apicid) + break; + if (do_boot_cpu(apicid, cpu)) printk("CPU #%d not responding - cannot use it.\n", apicid); else -- cgit v1.2.3 From 1161705bd66df0c80fa45e87190e456c02e6f145 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 19 Mar 2008 20:26:15 +0100 Subject: x86: fill cpu to apicid and present map in mpparse, fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index a0cec74b80ef..000b51b78fbd 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -75,6 +75,10 @@ unsigned disabled_cpus __cpuinitdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; +#ifndef CONFIG_SMP +DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; +#endif + /* * Intel MP BIOS table parsing routines: */ @@ -229,6 +233,7 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) def_to_bigsmp = 1; } } +#ifdef CONFIG_SMP /* are we being called early in kernel startup? */ if (x86_cpu_to_apicid_early_ptr) { u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; @@ -240,6 +245,7 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; } +#endif cpu_set(cpu, cpu_present_map); } -- cgit v1.2.3 From e1a14d0c1391627d869c0f97bb5e2382bf36d8dc Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:26 -0300 Subject: x86: get rid of cpucount weighting a map will do. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 2fea910eff43..5c4e85cceb16 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -166,8 +166,6 @@ static void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } -static int cpucount; - /* * Activate a secondary processor. */ @@ -585,7 +583,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); - ++cpucount; alternatives_smp_switch(1); /* So we see what's up */ @@ -656,7 +653,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpu_clear(cpu, cpu_possible_map); per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - cpucount--; } /* mark "stuck" area as not stuck */ @@ -672,7 +668,6 @@ void cpu_exit_clear(void) idle_task_exit(); - cpucount --; cpu_uninit(); irq_ctx_exit(cpu); @@ -795,7 +790,6 @@ static int __init smp_sanity_check(unsigned max_cpus) return 0; } - /* * Cycle through the processors sending APIC IPIs to boot each. */ @@ -851,7 +845,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) if (!check_apicid_present(bit)) continue; - if (max_cpus <= cpucount+1) + if (max_cpus <= cpus_weight(cpu_present_map)) continue; /* Utterly temporary */ for (cpu = 0; cpu < NR_CPUS; cpu++) @@ -878,7 +872,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) bogosum += cpu_data(cpu).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpucount+1, + cpus_weight(cpu_present_map), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); @@ -892,7 +886,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) * approved Athlon */ if (tainted & TAINT_UNSAFE_SMP) { - if (cpucount) + if (cpus_weight(cpu_present_map)) printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); else tainted &= ~TAINT_UNSAFE_SMP; -- cgit v1.2.3 From 904541e2f76bc3efe4cc9978b7adb3323ea8607e Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:27 -0300 Subject: x86: allow user to impress friends. Impressing friends is a very important thing. Do it in a separate function to make it even more explicit, and ease integration. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 20 ++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 17 ++--------------- 2 files changed, 22 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a157a5245923..02427d1003d3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -228,6 +228,26 @@ void __init smp_alloc_memory(void) } #endif +void impress_friends(void) +{ + int cpu; + unsigned long bogosum = 0; + /* + * Allow the user to impress friends. + */ + Dprintk("Before bogomips.\n"); + for_each_possible_cpu(cpu) + if (cpu_isset(cpu, cpu_callout_map)) + bogosum += cpu_data(cpu).loops_per_jiffy; + printk(KERN_INFO + "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + cpus_weight(cpu_present_map), + bogosum/(500000/HZ), + (bogosum/(5000/HZ))%100); + + Dprintk("Before bogocount - setting activated=1.\n"); +} + #ifdef CONFIG_HOTPLUG_CPU void remove_siblinginfo(int cpu) { diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5c4e85cceb16..34493f8ba8ac 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -790,13 +790,13 @@ static int __init smp_sanity_check(unsigned max_cpus) return 0; } +extern void impress_friends(void); /* * Cycle through the processors sending APIC IPIs to boot each. */ static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; - unsigned long bogosum = 0; /* * Setup boot CPU information @@ -863,20 +863,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) */ smpboot_restore_warm_reset_vector(); - /* - * Allow the user to impress friends. - */ - Dprintk("Before bogomips.\n"); - for_each_possible_cpu(cpu) - if (cpu_isset(cpu, cpu_callout_map)) - bogosum += cpu_data(cpu).loops_per_jiffy; - printk(KERN_INFO - "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpus_weight(cpu_present_map), - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); - - Dprintk("Before bogocount - setting activated=1.\n"); + impress_friends(); if (smp_b_stepping) printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); -- cgit v1.2.3 From 693d4b8a6429af7f2029df20a59e22f4d752e141 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:28 -0300 Subject: x86: do smp tainting checks in a separate function It will ease integration for x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 21 +++++++++++++++++++-- arch/x86/kernel/smpboot_32.c | 20 ++------------------ 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 02427d1003d3..ddb94ef37789 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -45,10 +45,8 @@ unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -#ifdef CONFIG_X86_32 /* Set if we find a B stepping CPU */ int __cpuinitdata smp_b_stepping; -#endif static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { @@ -105,6 +103,25 @@ valid_k7: #endif } +void smp_checks(void) +{ + if (smp_b_stepping) + printk(KERN_WARNING "WARNING: SMP operation may be unreliable" + "with B stepping processors.\n"); + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + if (tainted & TAINT_UNSAFE_SMP) { + if (cpus_weight(cpu_present_map)) + printk(KERN_INFO "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + else + tainted &= ~TAINT_UNSAFE_SMP; + } +} + /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 34493f8ba8ac..361851cdaa97 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -59,8 +59,6 @@ #include #include -extern int smp_b_stepping; - static cpumask_t smp_commenced_mask; /* which logical CPU number maps to which CPU (physical APIC ID) */ @@ -791,6 +789,7 @@ static int __init smp_sanity_check(unsigned max_cpus) } extern void impress_friends(void); +extern void smp_checks(void); /* * Cycle through the processors sending APIC IPIs to boot each. */ @@ -865,22 +864,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) impress_friends(); - if (smp_b_stepping) - printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - if (tainted & TAINT_UNSAFE_SMP) { - if (cpus_weight(cpu_present_map)) - printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); - else - tainted &= ~TAINT_UNSAFE_SMP; - } - - Dprintk("Boot done.\n"); - + smp_checks(); /* * construct cpu_sibling_map, so that we can tell sibling CPUs * efficiently. -- cgit v1.2.3 From f68e00a32b4f5a2881c3a39d71cc2c22e92f1d99 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:29 -0300 Subject: x86: move impress_friends and smp_check to cpus_done the cpu count is changed accordingly: now, what matters is online cpus. Also, we add those functions for x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/smpboot_32.c | 22 ++++++++++++---------- arch/x86/kernel/smpboot_64.c | 8 ++++++++ 3 files changed, 22 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ddb94ef37789..6978f1bf6533 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -114,7 +114,7 @@ void smp_checks(void) * approved Athlon */ if (tainted & TAINT_UNSAFE_SMP) { - if (cpus_weight(cpu_present_map)) + if (num_online_cpus()) printk(KERN_INFO "WARNING: This combination of AMD" "processors is not suitable for SMP.\n"); else @@ -258,7 +258,7 @@ void impress_friends(void) bogosum += cpu_data(cpu).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpus_weight(cpu_present_map), + num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 361851cdaa97..1736404c3c36 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -788,8 +788,6 @@ static int __init smp_sanity_check(unsigned max_cpus) return 0; } -extern void impress_friends(void); -extern void smp_checks(void); /* * Cycle through the processors sending APIC IPIs to boot each. */ @@ -857,14 +855,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ++kicked; } - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); - - impress_friends(); - - smp_checks(); /* * construct cpu_sibling_map, so that we can tell sibling CPUs * efficiently. @@ -959,8 +949,20 @@ int __cpuinit native_cpu_up(unsigned int cpu) return 0; } +extern void impress_friends(void); +extern void smp_checks(void); + void __init native_smp_cpus_done(unsigned int max_cpus) { + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + + Dprintk("Boot done.\n"); + + impress_friends(); + smp_checks(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index a9cc91127b91..c3e770b0094b 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -824,12 +824,20 @@ int __cpuinit native_cpu_up(unsigned int cpu) return err; } +extern void impress_friends(void); +extern void smp_checks(void); + /* * Finish the SMP boot. */ void __init native_smp_cpus_done(unsigned int max_cpus) { smp_cleanup_boot(); + + Dprintk("Boot done.\n"); + + impress_friends(); + smp_checks(); setup_ioapic_dest(); check_nmi_watchdog(); } -- cgit v1.2.3 From 8d77010f8c93b4d41ffd71c7ad9d07fc1668cd5a Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:31 -0300 Subject: x86: include mach_wakecpu.h in smpboot_64 Do it and also fix conflicts, which automatically makes x86_64 look closer to i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index c3e770b0094b..c6c993f4c415 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -60,6 +60,8 @@ #include #include +#include + /* Set when the idlers are all forked */ int smp_threads_ready; @@ -85,13 +87,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) #endif -static inline void wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); - return; -} - static atomic_t init_deasserted __cpuinitdata; /* @@ -247,7 +242,7 @@ extern volatile unsigned long init_rsp; extern void (*initial_code)(void); #ifdef APIC_DEBUG -static void inquire_remote_apic(int apicid) +static void __inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; -- cgit v1.2.3 From eb44d0a2a9c4d64ed89044fcf1f75e6a27c42ea7 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:32 -0300 Subject: x86: include smpboot_hooks.h in smpboot_64.c We do it and also fix conflicts, which makes x86_64 automatically closer to i386 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index c6c993f4c415..b9384b3af017 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -61,6 +61,7 @@ #include #include +#include /* Set when the idlers are all forked */ int smp_threads_ready; @@ -517,14 +518,7 @@ do_rest: Dprintk("Setting warm reset code and vector.\n"); - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - Dprintk("1.\n"); - *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4; - Dprintk("2.\n"); - *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf; - Dprintk("3.\n"); - + smpboot_setup_warm_reset_vector(start_rip); /* * Be paranoid about clearing APIC errors. */ @@ -593,23 +587,6 @@ do_rest: cycles_t cacheflush_time; unsigned long cache_decay_ticks; -/* - * Cleanup possible dangling ends... - */ -static __cpuinit void smp_cleanup_boot(void) -{ - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - CMOS_WRITE(0, 0xf); - - /* - * Reset trampoline flag - */ - *((volatile int *) phys_to_virt(0x467)) = 0; -} - /* * Fall back to non SMP mode after errors. * @@ -827,7 +804,7 @@ extern void smp_checks(void); */ void __init native_smp_cpus_done(unsigned int max_cpus) { - smp_cleanup_boot(); + smpboot_restore_warm_reset_vector(); Dprintk("Boot done.\n"); -- cgit v1.2.3 From 17c9ab1eabcc08794064a6e3232ac421664c9ce1 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:33 -0300 Subject: x86: move smp_intr_init away from smpboot_32.c We move it to apic_32.c, since it's irq related anyway, and only called from that file. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 23 +++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 21 --------------------- 2 files changed, 23 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6aa93db7faa3..c32cc0feb47b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1317,6 +1317,29 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } +#ifdef CONFIG_SMP +void __init smp_intr_init(void) +{ + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); +} +#endif + /* * Initialize APIC interrupts */ diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 1736404c3c36..87c9a75d929c 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -968,24 +968,3 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #endif zap_low_mappings(); } - -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); -} -- cgit v1.2.3 From 3cf19f31d967da2c1279142d4dbafe18f521a1bf Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:34 -0300 Subject: x86: don't set maps in native_smp_prepare_boot_cpu() By this time, they are already set in init routines Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 87c9a75d929c..bfdfe3c64d06 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -889,10 +889,7 @@ void __init native_smp_prepare_boot_cpu(void) init_gdt(cpu); switch_to_new_gdt(); - cpu_set(cpu, cpu_online_map); cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_present_map); - cpu_set(cpu, cpu_possible_map); __get_cpu_var(cpu_state) = CPU_ONLINE; } -- cgit v1.2.3 From e32ede19ac64b5cd896e6d28aa51d34887791ab2 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:35 -0300 Subject: x86: wipe get_nmi_reason out of nmi_64.h use mach_traps when it is supposed to be used. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi_64.c | 2 ++ arch/x86/kernel/traps_64.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 9a4fde74bee1..11f9130ac513 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -26,6 +26,8 @@ #include #include +#include + int unknown_nmi_panic; int nmi_watchdog_enabled; int panic_on_unrecovered_nmi; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 045466681911..33292ac814f4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -33,6 +33,8 @@ #include #include +#include + #if defined(CONFIG_EDAC) #include #endif -- cgit v1.2.3 From 6d60cd5359e261cad1e519e77ca733c05c2f8025 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:36 -0300 Subject: x86: unify nmi_32.h and nmi_64.h Two more files goes away. nmi_64.h and nmi_32.h gives birth to nmi.h Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 6a0aa7038685..167385155765 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -67,7 +67,7 @@ static __init void nmi_cpu_busy(void *data) } #endif -static int __init check_nmi_watchdog(void) +int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; int cpu; -- cgit v1.2.3 From 4626df1801dc03de42f1c155417393b91c8f5d97 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:37 -0300 Subject: x86: call check_nmi_watchdog explicitly in native_smp_cpus_done With this, remove its late_initcall marker from nmi_32.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi_32.c | 2 -- arch/x86/kernel/smpboot_32.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 167385155765..9cfc094eddb0 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -129,8 +129,6 @@ int __init check_nmi_watchdog(void) kfree(prev_nmi_count); return 0; } -/* This needs to happen later in boot so counters are working */ -late_initcall(check_nmi_watchdog); static int __init setup_nmi_watchdog(char *str) { diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index bfdfe3c64d06..1f3aff4caaf7 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -963,5 +963,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif + check_nmi_watchdog(); zap_low_mappings(); } -- cgit v1.2.3 From 50e440aa5323860d9e5960143b720e461ed0c582 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:38 -0300 Subject: x86: call nmi_watchdog_default in i386 this does not exist, so it will be an empty macro Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 1f3aff4caaf7..a35055361b85 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -876,6 +876,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { + nmi_watchdog_default(); smp_commenced_mask = cpumask_of_cpu(0); cpu_callin_map = cpumask_of_cpu(0); mb(); -- cgit v1.2.3 From e7f8b14e028f7a2f9e5c83c17164aeeeb9c61f17 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:39 -0300 Subject: x86: don't initialize sibling and core maps during preparation it is redundant, since it is already done by set_cpu_sibling_map() Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index a35055361b85..5cae17f3eb75 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -855,18 +855,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ++kicked; } - /* - * construct cpu_sibling_map, so that we can tell sibling CPUs - * efficiently. - */ - for_each_possible_cpu(cpu) { - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - cpus_clear(per_cpu(cpu_core_map, cpu)); - } - - cpu_set(0, per_cpu(cpu_sibling_map, 0)); - cpu_set(0, per_cpu(cpu_core_map, 0)); - smpboot_setup_io_apic(); setup_boot_clock(); -- cgit v1.2.3 From 802b8133b4f78c30a2668d142d78861e27c0c6a7 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:41 -0300 Subject: x86: schedule work only if keventd is already running Only call schedule_work if keventd is already running. This is already the way x86_64 does Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5cae17f3eb75..255c6f761480 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -708,8 +708,12 @@ static void __cpuinit __smp_prepare_cpu(int cpu) clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); - schedule_work(&info.task); - wait_for_completion(&done); + if (!keventd_up() || current_is_keventd()) + info.task.func(&info.task); + else { + schedule_work(&info.task); + wait_for_completion(&done); + } zap_low_mappings(); } -- cgit v1.2.3 From d2bcbad5f3ad38a1c09861bca7e252dde7bb8259 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:42 -0300 Subject: x86: do not zap_low_mappings in __smp_prepare_cpus It was okay when cpus were cold booted before this point. But with the new state machine, they will not have arrived to the trampoline yet. zapping low mappings will have the bad effect of breaking it completely after paging enablement Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 255c6f761480..88ee65585d3f 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -195,11 +195,6 @@ static void __cpuinit start_secondary(void *unused) enable_NMI_through_LVT0(); enable_8259A_irq(0); } - /* - * low-memory mappings have been cleared, flush them from - * the local TLBs too. - */ - local_flush_tlb(); /* This must be done before setting cpu_online_map */ set_cpu_sibling_map(raw_smp_processor_id()); @@ -714,8 +709,6 @@ static void __cpuinit __smp_prepare_cpu(int cpu) schedule_work(&info.task); wait_for_completion(&done); } - - zap_low_mappings(); } #endif -- cgit v1.2.3 From 9713277607f9eac7d655c6854dd92bc2ce1b6f02 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:43 -0300 Subject: x86: boot cpus from cpu_up, instead of prepare_cpus After all the infrastructure work, we're now prepared to boot the cpus from cpu_up, and not from prepare_cpus. So the difference between cold boot and hotplug is effectively over, and the functions are used to the purposes they're meant to. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 48 ++------------------------------------------ 1 file changed, 2 insertions(+), 46 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 88ee65585d3f..978e13708ddb 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -670,6 +670,7 @@ void cpu_exit_clear(void) cpu_clear(cpu, smp_commenced_mask); unmap_cpu_to_logical_apicid(cpu); } +#endif struct warm_boot_cpu_info { struct completion *complete; @@ -710,7 +711,6 @@ static void __cpuinit __smp_prepare_cpu(int cpu) wait_for_completion(&done); } } -#endif static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ @@ -790,8 +790,6 @@ static int __init smp_sanity_check(unsigned max_cpus) */ static void __init smp_boot_cpus(unsigned int max_cpus) { - int apicid, cpu, bit, kicked; - /* * Setup boot CPU information */ @@ -819,39 +817,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) setup_portio_remap(); - /* - * Scan the CPU present map and fire up the other CPUs via do_boot_cpu - * - * In clustered apic mode, phys_cpu_present_map is a constructed thus: - * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the - * clustered apic ID. - */ - Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); - - kicked = 1; - for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { - apicid = cpu_present_to_apicid(bit); - /* - * Don't even attempt to start the boot CPU! - */ - if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) - continue; - - if (!check_apicid_present(bit)) - continue; - if (max_cpus <= cpus_weight(cpu_present_map)) - continue; - /* Utterly temporary */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (per_cpu(x86_cpu_to_apicid, cpu) == apicid) - break; - if (do_boot_cpu(apicid, cpu)) - printk("CPU #%d not responding - cannot use it.\n", - apicid); - else - ++kicked; - } - smpboot_setup_io_apic(); setup_boot_clock(); @@ -895,17 +860,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) } per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; -#ifdef CONFIG_HOTPLUG_CPU - /* - * We do warm boot only on cpus that had booted earlier - * Otherwise cold boot is all handled from smp_boot_cpus(). - * cpu_callin_map is set during AP kickstart process. Its reset - * when a cpu is taken offline from cpu_exit_clear(). - */ - if (!cpu_isset(cpu, cpu_callin_map)) - __smp_prepare_cpu(cpu); -#endif + __smp_prepare_cpu(cpu); /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { -- cgit v1.2.3 From ddd10ecfa231c88382fc2f10a3120d2ad8e92381 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:44 -0300 Subject: x86: get rid of commenced mask. As we now boot cpus from cpu_up, we don't need it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 978e13708ddb..c30abed08923 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -59,8 +59,6 @@ #include #include -static cpumask_t smp_commenced_mask; - /* which logical CPU number maps to which CPU (physical APIC ID) */ u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -180,8 +178,6 @@ static void __cpuinit start_secondary(void *unused) cpu_init(); preempt_disable(); smp_callin(); - while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) - cpu_relax(); /* otherwise gcc will move up smp_processor_id before the cpu_init */ barrier(); @@ -667,7 +663,6 @@ void cpu_exit_clear(void) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); - cpu_clear(cpu, smp_commenced_mask); unmap_cpu_to_logical_apicid(cpu); } #endif @@ -827,7 +822,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) void __init native_smp_prepare_cpus(unsigned int max_cpus) { nmi_watchdog_default(); - smp_commenced_mask = cpumask_of_cpu(0); cpu_callin_map = cpumask_of_cpu(0); mb(); smp_boot_cpus(max_cpus); @@ -869,8 +863,6 @@ int __cpuinit native_cpu_up(unsigned int cpu) return -EIO; } - /* Unleash the CPU! */ - cpu_set(cpu, smp_commenced_mask); /* * Check TSC synchronization with the AP (keep irqs disabled -- cgit v1.2.3 From 365c894c65b98da944992199ea24206f531674de Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:45 -0300 Subject: x86: use create_idle struct in do_boot_cpu Use a new worker, with help of the create_idle struct to fork the idle thread. We now have two workers, the first of them triggered by __smp_prepare_cpu. But the later is going away soon. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 86 ++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index c30abed08923..fc1eb5255f66 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -79,6 +79,24 @@ static void map_cpu_to_logical_apicid(void); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; +/* Store all idle threads, this can be reused instead of creating +* a new thread. Also avoids complicated thread destroy functionality +* for idle threads. +*/ +#ifdef CONFIG_HOTPLUG_CPU +/* + * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is + * removed after init for !CONFIG_HOTPLUG_CPU. + */ +static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); +#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) +#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) +#else +struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) +#endif + static atomic_t init_deasserted; static void __cpuinit smp_callin(void) @@ -513,30 +531,21 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) extern cpumask_t cpu_initialized; -#ifdef CONFIG_HOTPLUG_CPU -static struct task_struct * __cpuinitdata cpu_idle_tasks[NR_CPUS]; -static inline struct task_struct * __cpuinit alloc_idle_task(int cpu) -{ +struct create_idle { + struct work_struct work; struct task_struct *idle; + struct completion done; + int cpu; +}; - if ((idle = cpu_idle_tasks[cpu]) != NULL) { - /* initialize thread_struct. we really want to avoid destroy - * idle tread - */ - idle->thread.sp = (unsigned long)task_pt_regs(idle); - init_idle(idle, cpu); - return idle; - } - idle = fork_idle(cpu); +static void __cpuinit do_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle = + container_of(work, struct create_idle, work); - if (!IS_ERR(idle)) - cpu_idle_tasks[cpu] = idle; - return idle; + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); } -#else -#define alloc_idle_task(cpu) fork_idle(cpu) -#endif - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -544,11 +553,15 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. */ { - struct task_struct *idle; unsigned long boot_error; int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + INIT_WORK(&c_idle.work, do_fork_idle); /* * Save current MTRR state in case it was changed since early boot @@ -556,19 +569,38 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) */ mtrr_save_state(); + c_idle.idle = get_idle_for_cpu(cpu); + /* * We can't use kernel_thread since we must avoid to * reschedule the child. */ - idle = alloc_idle_task(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU %d", cpu); + if (c_idle.idle) { + c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) + (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); + init_idle(c_idle.idle, cpu); + goto do_rest; + } + + if (!keventd_up() || current_is_keventd()) + c_idle.work.func(&c_idle.work); + else { + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + } + + if (IS_ERR(c_idle.idle)) { + printk(KERN_ERR "failed fork for CPU %d\n", cpu); + return PTR_ERR(c_idle.idle); + } + set_idle_for_cpu(cpu, c_idle.idle); +do_rest: + per_cpu(current_task, cpu) = c_idle.idle; init_gdt(cpu); - per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - idle->thread.ip = (unsigned long) start_secondary; + c_idle.idle->thread.ip = (unsigned long) start_secondary; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); @@ -577,7 +609,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* So we see what's up */ printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip); /* Stack for startup_32 can be just as for start_secondary onwards */ - stack_start.sp = (void *) idle->thread.sp; + stack_start.sp = (void *) c_idle.idle->thread.sp; irq_ctx_init(cpu); -- cgit v1.2.3 From 4c07ad6950c2c7077c6d60a3ce83fdbbb553bd65 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:46 -0300 Subject: x86: don't span a new worker in __smp_prepare_cpu We can do it now that do_boot_cpu has its own worker. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index fc1eb5255f66..c03596e11db8 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -699,44 +699,18 @@ void cpu_exit_clear(void) } #endif -struct warm_boot_cpu_info { - struct completion *complete; - struct work_struct task; - int apicid; - int cpu; -}; - -static void __cpuinit do_warm_boot_cpu(struct work_struct *work) -{ - struct warm_boot_cpu_info *info = - container_of(work, struct warm_boot_cpu_info, task); - do_boot_cpu(info->apicid, info->cpu); - complete(info->complete); -} - static void __cpuinit __smp_prepare_cpu(int cpu) { - DECLARE_COMPLETION_ONSTACK(done); - struct warm_boot_cpu_info info; int apicid; apicid = per_cpu(x86_cpu_to_apicid, cpu); - info.complete = &done; - info.apicid = apicid; - info.cpu = cpu; - INIT_WORK(&info.task, do_warm_boot_cpu); - /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); - if (!keventd_up() || current_is_keventd()) - info.task.func(&info.task); - else { - schedule_work(&info.task); - wait_for_completion(&done); - } + + do_boot_cpu(apicid, cpu); } static int boot_cpu_logical_apicid; -- cgit v1.2.3 From ea0cadbfed09674bcc2b3e1e7f2d7317ddde4e95 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:47 -0300 Subject: x86: modify smp_callin in x86_64 to look like i386 We introduce empty macros just to make them look like the same Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index b9384b3af017..e93fff42ec32 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -90,6 +90,9 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; static atomic_t init_deasserted __cpuinitdata; +#define smp_callin_clear_local_apic() do {} while (0) +#define map_cpu_to_logical_apicid() do {} while (0) + /* * Report back to the Boot Processor. * Running on AP. @@ -152,8 +155,10 @@ void __cpuinit smp_callin(void) */ Dprintk("CALLIN, before setup_local_APIC().\n"); + smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); + map_cpu_to_logical_apicid(); /* * Get our bogomips. -- cgit v1.2.3 From df7939ae8bee101d9d79d104e17f14b60845cf0f Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:48 -0300 Subject: x86: wrap esr setting up in i386 in lapic_setup_esr it is a little bit more complicated than x86_64 due to erratas and other stuff, but its existance will ease integration Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 73 ++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index c32cc0feb47b..80c81c76625a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -897,12 +897,50 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } +void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write_around(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + if (esr_disable) + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + else + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + /** * setup_local_APIC - setup the local APIC */ void __cpuinit setup_local_APIC(void) { - unsigned long oldvalue, value, maxlvt, integrated; + unsigned long value, integrated; int i, j; /* Pound the ESR really hard over the head with a big hammer - mbligh */ @@ -1027,38 +1065,7 @@ void __cpuinit setup_local_APIC(void) value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); - if (integrated && !esr_disable) { - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write_around(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - if (esr_disable) - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); - } + lapic_setup_esr(); /* Disable the local apic timer */ value = apic_read(APIC_LVTT); -- cgit v1.2.3 From ac60aae561fff99d38beba82d84277b12437c05e Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:49 -0300 Subject: x86: provide an end_local_APIC_setup function It splits setup_local_APIC in two, providing a function corresponding to the ending part of it. As a side effect, smp_callin looks the same between i386 and x86_64. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 7 ++++++- arch/x86/kernel/smpboot_32.c | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 80c81c76625a..6f506020bd7d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1064,9 +1064,13 @@ void __cpuinit setup_local_APIC(void) if (!integrated) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); +} - lapic_setup_esr(); +void __cpuinit end_local_APIC_setup(void) +{ + unsigned long value; + lapic_setup_esr(); /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); @@ -1256,6 +1260,7 @@ int __init APIC_init_uniprocessor(void) setup_local_APIC(); + end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) if (!skip_ioapic_setup && nr_ioapics) diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index c03596e11db8..dbfaeb30a69a 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -161,6 +161,7 @@ static void __cpuinit smp_callin(void) Dprintk("CALLIN, before setup_local_APIC().\n"); smp_callin_clear_local_apic(); setup_local_APIC(); + end_local_APIC_setup(); map_cpu_to_logical_apicid(); /* @@ -780,6 +781,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n"); connect_bsp_APIC(); setup_local_APIC(); + end_local_APIC_setup(); } return -1; } @@ -813,6 +815,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) connect_bsp_APIC(); setup_local_APIC(); + end_local_APIC_setup(); map_cpu_to_logical_apicid(); -- cgit v1.2.3 From e481fcf8563d300e7f8875cae5fdc41941d29de0 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:50 -0300 Subject: x86: calibrate delay with irqs enabled We do it to make it close to x86_64. The later needs it, otherwise the nmi watchdog can get into the scene and kill us with a hammer. Enabling irqs here used to trigger a bug in i386. This is because time irq handling relies upon structures that are only initialized after smp initcalls (More precisely, it will find per_cpu(hrtimer_bases, cpu)->cb_pending list not initialized and crash) Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index dbfaeb30a69a..bd2f8863efa2 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -167,7 +167,9 @@ static void __cpuinit smp_callin(void) /* * Get our bogomips. */ + local_irq_enable(); calibrate_delay(); + local_irq_disable(); Dprintk("Stack at about %p\n",&cpuid); /* -- cgit v1.2.3 From 6becedbb06072c5741d4057b9facecb4b3143711 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:51 -0300 Subject: x86: minor adjustments for do_boot_cpu This patch provides minor adjustments for do_boot_cpus in both architectures to allow for integration Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 22 ++++++++++++++-------- arch/x86/kernel/smpboot_64.c | 15 ++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index bd2f8863efa2..5165b11d8aac 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -556,7 +556,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. */ { - unsigned long boot_error; + unsigned long boot_error = 0; int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; @@ -566,11 +566,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); - /* - * Save current MTRR state in case it was changed since early boot - * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: - */ - mtrr_save_state(); + alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -607,8 +603,6 @@ do_rest: /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); - alternatives_smp_switch(1); - /* So we see what's up */ printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip); /* Stack for startup_32 can be just as for start_secondary onwards */ @@ -628,6 +622,12 @@ do_rest: store_NMI_vector(&nmi_high, &nmi_low); smpboot_setup_warm_reset_vector(start_eip); + /* + * Be paranoid about clearing APIC errors. + */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + /* * Starting actual IPI sequence... @@ -864,6 +864,12 @@ int __cpuinit native_cpu_up(unsigned int cpu) return -EINVAL; } + /* + * Save current MTRR state in case it was changed since early boot + * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: + */ + mtrr_save_state(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; __smp_prepare_cpu(cpu); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index e93fff42ec32..7d1b4cb380db 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -432,7 +432,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) */ static int __cpuinit do_boot_cpu(int cpu, int apicid) { - unsigned long boot_error; + unsigned long boot_error = 0; int timeout; unsigned long start_rip; struct create_idle c_idle = { @@ -530,11 +530,6 @@ do_rest: apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - /* - * Status is now clean - */ - boot_error = 0; - /* * Starting actual IPI sequence... */ @@ -564,7 +559,7 @@ do_rest: print_cpu_info(&cpu_data(cpu)); } else { boot_error = 1; - if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE)) + if (*((volatile unsigned char *)trampoline_base) == 0xA5) /* trampoline started but...? */ printk("Stuck ??\n"); @@ -583,10 +578,12 @@ do_rest: cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, cpu_possible_map); per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - return -EIO; } - return 0; + /* mark "stuck" area as not stuck */ + *((volatile unsigned long *)trampoline_base) = 0; + + return boot_error; } cycles_t cacheflush_time; -- cgit v1.2.3 From 4370ee4d3b7772158174bf6f0bf08359c2ccf54b Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:52 -0300 Subject: x86: call do_boot_cpu directly from native_cpu_up We don't need __smp_prepare_cpu anymore. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5165b11d8aac..4ba5ab2d81fb 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -702,20 +702,6 @@ void cpu_exit_clear(void) } #endif -static void __cpuinit __smp_prepare_cpu(int cpu) -{ - int apicid; - - apicid = per_cpu(x86_cpu_to_apicid, cpu); - - /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); - flush_tlb_all(); - - do_boot_cpu(apicid, cpu); -} - static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; @@ -872,7 +858,12 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - __smp_prepare_cpu(cpu); + /* init low mem mapping */ + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + flush_tlb_all(); + + do_boot_cpu(apicid, cpu); /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { -- cgit v1.2.3 From f6bc40290964b5fcb48c226ccafa4b7536d62663 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:53 -0300 Subject: x86: include mach_apic.h in smpboot_64.c and smpboot.c After the inclusion, a lot of files needs fixing for conflicts, some of them in the headers themselves, to accomodate for both i386 and x86_64 versions. [ mingo@elte.hu: build fix ] Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ++++ arch/x86/kernel/mpparse_64.c | 2 ++ arch/x86/kernel/smpboot.c | 2 ++ arch/x86/kernel/smpboot_64.c | 1 + 4 files changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2cdc9de9371d..956b60f3ebd5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -40,6 +40,10 @@ #include #include +#ifdef CONFIG_X86_LOCAL_APIC +# include +#endif + static int __initdata acpi_force = 0; #ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 529b1c22077e..03ef1a8b53e8 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -30,6 +30,8 @@ #include #include +#include + /* Have we found an MP table */ int smp_found_config; unsigned int __cpuinitdata maxcpus = NR_CPUS; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6978f1bf6533..253be86a88e4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -11,6 +11,8 @@ #include #include +#include + /* Number of siblings per CPU package */ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 7d1b4cb380db..8a59fa80f883 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -61,6 +61,7 @@ #include #include +#include #include /* Set when the idlers are all forked */ -- cgit v1.2.3 From 071782692798d7a6e0a5679f3186ea7fea49fd62 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:54 -0300 Subject: x86: change wakeup_secondary name wakeup_secondary_via_INIT => wakeup_secondary_cpu. This is to match i386, where init is not always used. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 8a59fa80f883..7ec96218a97e 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -63,6 +63,7 @@ #include #include #include +#include /* Set when the idlers are all forked */ int smp_threads_ready; @@ -293,7 +294,8 @@ static void __inquire_remote_apic(int apicid) /* * Kick the secondary to wake up. */ -static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) +static int __cpuinit wakeup_secondary_cpu(int phys_apicid, + unsigned int start_rip) { unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; @@ -534,7 +536,7 @@ do_rest: /* * Starting actual IPI sequence... */ - boot_error = wakeup_secondary_via_INIT(apicid, start_rip); + boot_error = wakeup_secondary_cpu(apicid, start_rip); if (!boot_error) { /* -- cgit v1.2.3 From b9f9294a86fd274e4055891450033e8bc9d68f66 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:55 -0300 Subject: x86: add callin tests to cpu_up Now that we boot cpus here, callin_map has this meaning (same as x86_64) Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 4ba5ab2d81fb..33758a2ddd48 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -839,6 +839,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = cpu_present_to_apicid(cpu); unsigned long flags; + int err; WARN_ON(irqs_disabled()); @@ -850,6 +851,14 @@ int __cpuinit native_cpu_up(unsigned int cpu) return -EINVAL; } + /* + * Already booted CPU? + */ + if (cpu_isset(cpu, cpu_callin_map)) { + Dprintk("do_boot_cpu %d Already started\n", cpu); + return -ENOSYS; + } + /* * Save current MTRR state in case it was changed since early boot * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: @@ -863,15 +872,12 @@ int __cpuinit native_cpu_up(unsigned int cpu) min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); - do_boot_cpu(apicid, cpu); - - /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) { - printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - return -EIO; + err = do_boot_cpu(apicid, cpu); + if (err < 0) { + Dprintk("do_boot_cpu failed %d\n", err); + return err; } - /* * Check TSC synchronization with the AP (keep irqs disabled * while doing so): -- cgit v1.2.3 From 7cc3959ecd830796231f50bf5e42dc018b3694f2 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:56 -0300 Subject: x86: move {un}map_cpu_to_logical_apicid to smpboot.c Move map_cpu_to_logical_apicid() and unmap_cpu_to_logical_apicid() to smpboot.c. They take together all the bunch of static functions they rely upon Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 60 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 59 ++----------------------------------------- 2 files changed, 62 insertions(+), 57 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 253be86a88e4..5bff87e99898 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -50,6 +50,66 @@ static cpumask_t cpu_sibling_setup_map; /* Set if we find a B stepping CPU */ int __cpuinitdata smp_b_stepping; +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) + +/* which logical CPUs are on which nodes */ +cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly = + { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; +EXPORT_SYMBOL(node_to_cpumask_map); +/* which node each logical CPU is on */ +int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; +EXPORT_SYMBOL(cpu_to_node_map); + +/* set up a mapping between cpu and node. */ +static void map_cpu_to_node(int cpu, int node) +{ + printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); + cpu_set(cpu, node_to_cpumask_map[node]); + cpu_to_node_map[cpu] = node; +} + +/* undo a mapping between cpu and node. */ +static void unmap_cpu_to_node(int cpu) +{ + int node; + + printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); + for (node = 0; node < MAX_NUMNODES; node++) + cpu_clear(cpu, node_to_cpumask_map[node]); + cpu_to_node_map[cpu] = 0; +} +#else /* !(CONFIG_NUMA && CONFIG_X86_32) */ +#define map_cpu_to_node(cpu, node) ({}) +#define unmap_cpu_to_node(cpu) ({}) +#endif + +#ifdef CONFIG_X86_32 +u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = + { [0 ... NR_CPUS-1] = BAD_APICID }; + +void map_cpu_to_logical_apicid(void) +{ + int cpu = smp_processor_id(); + int apicid = logical_smp_processor_id(); + int node = apicid_to_node(apicid); + + if (!node_online(node)) + node = first_online_node; + + cpu_2_logical_apicid[cpu] = apicid; + map_cpu_to_node(cpu, node); +} + +void unmap_cpu_to_logical_apicid(int cpu) +{ + cpu_2_logical_apicid[cpu] = BAD_APICID; + unmap_cpu_to_node(cpu); +} +#else +#define unmap_cpu_to_logical_apicid(cpu) do {} while (0) +#define map_cpu_to_logical_apicid() do {} while (0) +#endif + static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 33758a2ddd48..1eb7b73b45a3 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -74,7 +74,8 @@ EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); u8 apicid_2_node[MAX_APICID]; -static void map_cpu_to_logical_apicid(void); +extern void map_cpu_to_logical_apicid(void); +extern void unmap_cpu_to_logical_apicid(int cpu); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -262,62 +263,6 @@ extern struct { unsigned short ss; } stack_start; -#ifdef CONFIG_NUMA - -/* which logical CPUs are on which nodes */ -cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly = - { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; -EXPORT_SYMBOL(node_to_cpumask_map); -/* which node each logical CPU is on */ -int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; -EXPORT_SYMBOL(cpu_to_node_map); - -/* set up a mapping between cpu and node. */ -static inline void map_cpu_to_node(int cpu, int node) -{ - printk("Mapping cpu %d to node %d\n", cpu, node); - cpu_set(cpu, node_to_cpumask_map[node]); - cpu_to_node_map[cpu] = node; -} - -/* undo a mapping between cpu and node. */ -static inline void unmap_cpu_to_node(int cpu) -{ - int node; - - printk("Unmapping cpu %d from all nodes\n", cpu); - for (node = 0; node < MAX_NUMNODES; node ++) - cpu_clear(cpu, node_to_cpumask_map[node]); - cpu_to_node_map[cpu] = 0; -} -#else /* !CONFIG_NUMA */ - -#define map_cpu_to_node(cpu, node) ({}) -#define unmap_cpu_to_node(cpu) ({}) - -#endif /* CONFIG_NUMA */ - -u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; - -static void map_cpu_to_logical_apicid(void) -{ - int cpu = smp_processor_id(); - int apicid = logical_smp_processor_id(); - int node = apicid_to_node(apicid); - - if (!node_online(node)) - node = first_online_node; - - cpu_2_logical_apicid[cpu] = apicid; - map_cpu_to_node(cpu, node); -} - -static void unmap_cpu_to_logical_apicid(int cpu) -{ - cpu_2_logical_apicid[cpu] = BAD_APICID; - unmap_cpu_to_node(cpu); -} - static inline void __inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; -- cgit v1.2.3 From 9d97d0da71ad6c7ceb76b4e29b02bed1ee9d4cd2 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:57 -0300 Subject: x86: move stack_start to smp.h voyager would conflict with it, but the types are ultimately compatible. So remove the extern definition from voyager_smp.c in favour of the common one Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 1eb7b73b45a3..ae25927f08c1 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -257,12 +257,6 @@ void __devinit initialize_secondary(void) :"m" (current->thread.sp),"m" (current->thread.ip)); } -/* Static state in head.S used to set up a CPU */ -extern struct { - void * sp; - unsigned short ss; -} stack_start; - static inline void __inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; -- cgit v1.2.3 From c70dcb74309cedfa64f0060f4a84792e873ceb53 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:58 -0300 Subject: x86: change boot_cpu_id to boot_cpu_physical_apicid This is to match i386. The former name was cuter, but the current is more meaningful and more general, since cpu_id can be a logical id. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 13 +++++++------ arch/x86/kernel/mpparse_64.c | 12 ++++++------ arch/x86/kernel/smpboot_64.c | 18 ++++++++++-------- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 8a475793f736..868ec1deb19a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -431,7 +431,8 @@ void __cpuinit check_boot_apic_timer_broadcast(void) lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; local_irq_enable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, + &boot_cpu_physical_apicid); local_irq_disable(); } @@ -857,7 +858,7 @@ static int __init detect_init_APIC(void) } mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_id = 0; + boot_cpu_physical_apicid = 0; return 0; } @@ -882,7 +883,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); } /** @@ -909,7 +910,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); } /* @@ -930,8 +931,8 @@ int __init APIC_init_uniprocessor(void) verify_local_APIC(); - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); setup_local_APIC(); diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 03ef1a8b53e8..20a345dd425b 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -59,8 +59,8 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ -unsigned int boot_cpu_id = -1U; -EXPORT_SYMBOL(boot_cpu_id); +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); /* Internal processor count */ unsigned int num_processors; @@ -107,7 +107,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) } if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { bootup_cpu = " (Bootup-CPU)"; - boot_cpu_id = m->mpc_apicid; + boot_cpu_physical_apicid = m->mpc_apicid; } printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); @@ -665,8 +665,8 @@ void __init mp_register_lapic_address(u64 address) { mp_lapic_addr = (unsigned long) address; set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - if (boot_cpu_id == -1U) - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); } void __cpuinit mp_register_lapic (u8 id, u8 enabled) @@ -674,7 +674,7 @@ void __cpuinit mp_register_lapic (u8 id, u8 enabled) struct mpc_config_processor processor; int boot_cpu = 0; - if (id == boot_cpu_id) + if (id == boot_cpu_physical_apicid) boot_cpu = 1; processor.mpc_type = MP_PROCESSOR; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 7ec96218a97e..420ae4a33548 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -602,7 +602,8 @@ static __init void disable_smp(void) cpu_present_map = cpumask_of_cpu(0); cpu_possible_map = cpumask_of_cpu(0); if (smp_found_config) - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); + phys_cpu_present_map = + physid_mask_of_physid(boot_cpu_physical_apicid); else phys_cpu_present_map = physid_mask_of_physid(0); cpu_set(0, per_cpu(cpu_sibling_map, 0)); @@ -637,9 +638,10 @@ static int __init smp_sanity_check(unsigned max_cpus) * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ - if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) { - printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_id); + if (!physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map)) { + printk(KERN_NOTICE + "weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); physid_set(hard_smp_processor_id(), phys_cpu_present_map); } @@ -648,7 +650,7 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!cpu_has_apic) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_id); + boot_cpu_physical_apicid); printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); nr_ioapics = 0; return -1; @@ -709,9 +711,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) enable_IO_APIC(); end_local_APIC_setup(); - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); + GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } @@ -756,7 +758,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); - if (apicid == BAD_APICID || apicid == boot_cpu_id || + if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || !physid_isset(apicid, phys_cpu_present_map)) { printk("__cpu_up: bad cpu %d\n", cpu); return -EINVAL; -- cgit v1.2.3 From cb3c8b9003f15efa4a750a32d2d602d40cc45d5a Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:25:59 -0300 Subject: x86: integrate do_boot_cpu This is a very large patch, because it depends on a lot of auxiliary static functions. But they all have been modified to the point that they're sufficiently close now. So they're just merged in smpboot.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 588 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 532 +-------------------------------------- arch/x86/kernel/smpboot_64.c | 515 +------------------------------------ 3 files changed, 591 insertions(+), 1044 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5bff87e99898..69c17965f48d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -4,14 +4,42 @@ #include #include #include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include #include +#include +#include + +/* Store all idle threads, this can be reused instead of creating +* a new thread. Also avoids complicated thread destroy functionality +* for idle threads. +*/ +#ifdef CONFIG_HOTPLUG_CPU +/* + * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is + * removed after init for !CONFIG_HOTPLUG_CPU. + */ +static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); +#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) +#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) +#else +struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) +#endif /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -41,6 +69,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +static atomic_t init_deasserted; + /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); @@ -110,6 +140,96 @@ void unmap_cpu_to_logical_apicid(int cpu) #define map_cpu_to_logical_apicid() do {} while (0) #endif +/* + * Report back to the Boot Processor. + * Running on AP. + */ +void __cpuinit smp_callin(void) +{ + int cpuid, phys_id; + unsigned long timeout; + + /* + * If waken up by an INIT in an 82489DX configuration + * we may get here before an INIT-deassert IPI reaches + * our local APIC. We have to wait for the IPI or we'll + * lock up on an APIC access. + */ + wait_for_init_deassert(&init_deasserted); + + /* + * (This works even if the APIC is not enabled.) + */ + phys_id = GET_APIC_ID(apic_read(APIC_ID)); + cpuid = smp_processor_id(); + if (cpu_isset(cpuid, cpu_callin_map)) { + panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, + phys_id, cpuid); + } + Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); + + /* + * STARTUP IPIs are fragile beasts as they might sometimes + * trigger some glue motherboard logic. Complete APIC bus + * silence for 1 second, this overestimates the time the + * boot CPU is spending to send the up to 2 STARTUP IPIs + * by a factor of two. This should be enough. + */ + + /* + * Waiting 2s total for startup (udelay is not yet working) + */ + timeout = jiffies + 2*HZ; + while (time_before(jiffies, timeout)) { + /* + * Has the boot CPU finished it's STARTUP sequence? + */ + if (cpu_isset(cpuid, cpu_callout_map)) + break; + cpu_relax(); + } + + if (!time_before(jiffies, timeout)) { + panic("%s: CPU%d started up but did not get a callout!\n", + __func__, cpuid); + } + + /* + * the boot CPU has finished the init stage and is spinning + * on callin_map until we finish. We are free to set up this + * CPU, first the APIC. (this is probably redundant on most + * boards) + */ + + Dprintk("CALLIN, before setup_local_APIC().\n"); + smp_callin_clear_local_apic(); + setup_local_APIC(); + end_local_APIC_setup(); + map_cpu_to_logical_apicid(); + + /* + * Get our bogomips. + * + * Need to enable IRQs because it can take longer and then + * the NMI watchdog might kill us. + */ + local_irq_enable(); + calibrate_delay(); + local_irq_disable(); + Dprintk("Stack at about %p\n", &cpuid); + + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + + /* + * Allow the master to continue. + */ + cpu_set(cpuid, cpu_callin_map); +} + + static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 @@ -327,6 +447,474 @@ void impress_friends(void) Dprintk("Before bogocount - setting activated=1.\n"); } +static inline void __inquire_remote_apic(int apicid) +{ + unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + char *names[] = { "ID", "VERSION", "SPIV" }; + int timeout; + u32 status; + + printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); + + /* + * Wait for idle. + */ + status = safe_apic_wait_icr_idle(); + if (status) + printk(KERN_CONT + "a previous APIC delivery may have failed\n"); + + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + + timeout = 0; + do { + udelay(100); + status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; + } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); + + switch (status) { + case APIC_ICR_RR_VALID: + status = apic_read(APIC_RRR); + printk(KERN_CONT "%08x\n", status); + break; + default: + printk(KERN_CONT "failed\n"); + } + } +} + +#ifdef WAKE_SECONDARY_VIA_NMI +/* + * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal + * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this + * won't ... remember to clear down the APIC, etc later. + */ +static int __devinit +wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) +{ + unsigned long send_status, accept_status = 0; + int maxlvt; + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + + Dprintk("Waiting for send to finish...\n"); + send_status = safe_apic_wait_icr_idle(); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + Dprintk("NMI sent.\n"); + + if (send_status) + printk(KERN_ERR "APIC never delivered???\n"); + if (accept_status) + printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} +#endif /* WAKE_SECONDARY_VIA_NMI */ + +extern void start_secondary(void *unused); +#ifdef WAKE_SECONDARY_VIA_INIT +static int __devinit +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +{ + unsigned long send_status, accept_status = 0; + int maxlvt, num_starts, j; + + /* + * Be paranoid about clearing APIC errors. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } + + Dprintk("Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* + * Send IPI + */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + send_status = safe_apic_wait_icr_idle(); + + mdelay(10); + + Dprintk("Deasserting INIT.\n"); + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Send IPI */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + send_status = safe_apic_wait_icr_idle(); + + mb(); + atomic_set(&init_deasserted, 1); + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't send the STARTUP IPIs. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) + num_starts = 2; + else + num_starts = 0; + + /* + * Paravirt / VMI wants a startup IPI hook here to set up the + * target processor state. + */ + startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, +#ifdef CONFIG_X86_64 + (unsigned long)init_rsp); +#else + (unsigned long)stack_start.sp); +#endif + + /* + * Run STARTUP IPI loop. + */ + Dprintk("#startup loops: %d.\n", num_starts); + + maxlvt = lapic_get_maxlvt(); + + for (j = 1; j <= num_starts; j++) { + Dprintk("Sending STARTUP #%d.\n", j); + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + Dprintk("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + Dprintk("Startup point 1.\n"); + + Dprintk("Waiting for send to finish...\n"); + send_status = safe_apic_wait_icr_idle(); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + Dprintk("After Startup.\n"); + + if (send_status) + printk(KERN_ERR "APIC never delivered???\n"); + if (accept_status) + printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} +#endif /* WAKE_SECONDARY_VIA_INIT */ + +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit do_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle = + container_of(work, struct create_idle, work); + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + +static int __cpuinit do_boot_cpu(int apicid, int cpu) +/* + * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad + * (ie clustered apic addressing mode), this is a LOGICAL apic ID. + * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. + */ +{ + unsigned long boot_error = 0; + int timeout; + unsigned long start_ip; + unsigned short nmi_high = 0, nmi_low = 0; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + INIT_WORK(&c_idle.work, do_fork_idle); +#ifdef CONFIG_X86_64 + /* allocate memory for gdts of secondary cpus. Hotplug is considered */ + if (!cpu_gdt_descr[cpu].address && + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { + printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); + return -1; + } + + /* Allocate node local memory for AP pdas */ + if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { + struct x8664_pda *newpda, *pda; + int node = cpu_to_node(cpu); + pda = cpu_pda(cpu); + newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, + node); + if (newpda) { + memcpy(newpda, pda, sizeof(struct x8664_pda)); + cpu_pda(cpu) = newpda; + } else + printk(KERN_ERR + "Could not allocate node local PDA for CPU %d on node %d\n", + cpu, node); + } +#endif + + alternatives_smp_switch(1); + + c_idle.idle = get_idle_for_cpu(cpu); + + /* + * We can't use kernel_thread since we must avoid to + * reschedule the child. + */ + if (c_idle.idle) { + c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) + (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); + init_idle(c_idle.idle, cpu); + goto do_rest; + } + + if (!keventd_up() || current_is_keventd()) + c_idle.work.func(&c_idle.work); + else { + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + } + + if (IS_ERR(c_idle.idle)) { + printk("failed fork for CPU %d\n", cpu); + return PTR_ERR(c_idle.idle); + } + + set_idle_for_cpu(cpu, c_idle.idle); +do_rest: +#ifdef CONFIG_X86_32 + per_cpu(current_task, cpu) = c_idle.idle; + init_gdt(cpu); + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + c_idle.idle->thread.ip = (unsigned long) start_secondary; + /* Stack for startup_32 can be just as for start_secondary onwards */ + stack_start.sp = (void *) c_idle.idle->thread.sp; + irq_ctx_init(cpu); +#else + cpu_pda(cpu)->pcurrent = c_idle.idle; + init_rsp = c_idle.idle->thread.sp; + load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); + initial_code = (unsigned long)start_secondary; + clear_tsk_thread_flag(c_idle.idle, TIF_FORK); +#endif + + /* start_ip had better be page-aligned! */ + start_ip = setup_trampoline(); + + /* So we see what's up */ + printk(KERN_INFO "Booting processor %d/%d ip %lx\n", + cpu, apicid, start_ip); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + atomic_set(&init_deasserted, 0); + + Dprintk("Setting warm reset code and vector.\n"); + + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_ip); + /* + * Be paranoid about clearing APIC errors. + */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + + + /* + * Starting actual IPI sequence... + */ + boot_error = wakeup_secondary_cpu(apicid, start_ip); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + cpu_set(cpu, cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (cpu_isset(cpu, cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (cpu_isset(cpu, cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + Dprintk("OK.\n"); + printk(KERN_INFO "CPU%d: ", cpu); + print_cpu_info(&cpu_data(cpu)); + Dprintk("CPU has booted.\n"); + } else { + boot_error = 1; + if (*((volatile unsigned char *)trampoline_base) + == 0xA5) + /* trampoline started but...? */ + printk(KERN_ERR "Stuck ??\n"); + else + /* trampoline code not run */ + printk(KERN_ERR "Not responding.\n"); + inquire_remote_apic(apicid); + } + } + + if (boot_error) { + /* Try to put things back the way they were before ... */ + unmap_cpu_to_logical_apicid(cpu); +#ifdef CONFIG_X86_64 + clear_node_cpumask(cpu); /* was set by numa_add_cpu */ +#endif + cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ + cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ + cpu_clear(cpu, cpu_possible_map); + cpu_clear(cpu, cpu_present_map); + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; + } + + /* mark "stuck" area as not stuck */ + *((volatile unsigned long *)trampoline_base) = 0; + + return boot_error; +} + +int __cpuinit native_cpu_up(unsigned int cpu) +{ + int apicid = cpu_present_to_apicid(cpu); + unsigned long flags; + int err; + + WARN_ON(irqs_disabled()); + + Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); + + if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || + !physid_isset(apicid, phys_cpu_present_map)) { + printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); + return -EINVAL; + } + + /* + * Already booted CPU? + */ + if (cpu_isset(cpu, cpu_callin_map)) { + Dprintk("do_boot_cpu %d Already started\n", cpu); + return -ENOSYS; + } + + /* + * Save current MTRR state in case it was changed since early boot + * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: + */ + mtrr_save_state(); + + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + +#ifdef CONFIG_X86_32 + /* init low mem mapping */ + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + flush_tlb_all(); +#endif + + err = do_boot_cpu(apicid, cpu); + if (err < 0) { + Dprintk("do_boot_cpu failed %d\n", err); + return err; + } + + /* + * Check TSC synchronization with the AP (keep irqs disabled + * while doing so): + */ + local_irq_save(flags); + check_tsc_sync_source(cpu); + local_irq_restore(flags); + + while (!cpu_isset(cpu, cpu_online_map)) { + cpu_relax(); + touch_nmi_watchdog(); + } + + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU void remove_siblinginfo(int cpu) { diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index ae25927f08c1..e82eeb2fdfef 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -80,114 +80,12 @@ extern void unmap_cpu_to_logical_apicid(int cpu); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -/* Store all idle threads, this can be reused instead of creating -* a new thread. Also avoids complicated thread destroy functionality -* for idle threads. -*/ -#ifdef CONFIG_HOTPLUG_CPU -/* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) -#else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) -#endif - -static atomic_t init_deasserted; - -static void __cpuinit smp_callin(void) -{ - int cpuid, phys_id; - unsigned long timeout; - - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - wait_for_init_deassert(&init_deasserted); - - /* - * (This works even if the APIC is not enabled.) - */ - phys_id = GET_APIC_ID(apic_read(APIC_ID)); - cpuid = smp_processor_id(); - if (cpu_isset(cpuid, cpu_callin_map)) { - printk("huh, phys CPU#%d, CPU#%d already present??\n", - phys_id, cpuid); - BUG(); - } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpu_isset(cpuid, cpu_callout_map)) - break; - cpu_relax(); - } - - if (!time_before(jiffies, timeout)) { - printk("BUG: CPU%d started up but did not get a callout!\n", - cpuid); - BUG(); - } - - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - Dprintk("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); - setup_local_APIC(); - end_local_APIC_setup(); - map_cpu_to_logical_apicid(); - - /* - * Get our bogomips. - */ - local_irq_enable(); - calibrate_delay(); - local_irq_disable(); - Dprintk("Stack at about %p\n",&cpuid); - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - - /* - * Allow the master to continue. - */ - cpu_set(cpuid, cpu_callin_map); -} +extern void smp_callin(void); /* * Activate a secondary processor. */ -static void __cpuinit start_secondary(void *unused) +void __cpuinit start_secondary(void *unused) { /* * Don't put *anything* before cpu_init(), SMP booting is too @@ -257,373 +155,6 @@ void __devinit initialize_secondary(void) :"m" (current->thread.sp),"m" (current->thread.ip)); } -static inline void __inquire_remote_apic(int apicid) -{ - unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - char *names[] = { "ID", "VERSION", "SPIV" }; - int timeout; - u32 status; - - printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); - - for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - status = safe_apic_wait_icr_idle(); - if (status) - printk(KERN_CONT - "a previous APIC delivery may have failed\n"); - - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk(KERN_CONT "%08x\n", status); - break; - default: - printk(KERN_CONT "failed\n"); - } - } -} - -#ifdef WAKE_SECONDARY_VIA_NMI -/* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. - */ -static int __devinit -wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) -{ - unsigned long send_status, accept_status = 0; - int maxlvt; - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - Dprintk("NMI sent.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif /* WAKE_SECONDARY_VIA_NMI */ - -#ifdef WAKE_SECONDARY_VIA_INIT -static int __devinit -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - unsigned long send_status, accept_status = 0; - int maxlvt, num_starts, j; - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - Dprintk("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* - * Send IPI - */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - mdelay(10); - - Dprintk("Deasserting INIT.\n"); - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - mb(); - atomic_set(&init_deasserted, 1); - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Paravirt / VMI wants a startup IPI hook here to set up the - * target processor state. - */ - startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - (unsigned long) stack_start.sp); - - /* - * Run STARTUP IPI loop. - */ - Dprintk("#startup loops: %d.\n", num_starts); - - maxlvt = lapic_get_maxlvt(); - - for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n",j); - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - Dprintk("Startup point 1.\n"); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - Dprintk("After Startup.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif /* WAKE_SECONDARY_VIA_INIT */ - -extern cpumask_t cpu_initialized; - -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} -static int __cpuinit do_boot_cpu(int apicid, int cpu) -/* - * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad - * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. - */ -{ - unsigned long boot_error = 0; - int timeout; - unsigned long start_eip; - unsigned short nmi_high = 0, nmi_low = 0; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - INIT_WORK(&c_idle.work, do_fork_idle); - - alternatives_smp_switch(1); - - c_idle.idle = get_idle_for_cpu(cpu); - - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - if (c_idle.idle) { - c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) - (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); - init_idle(c_idle.idle, cpu); - goto do_rest; - } - - if (!keventd_up() || current_is_keventd()) - c_idle.work.func(&c_idle.work); - else { - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - } - - if (IS_ERR(c_idle.idle)) { - printk(KERN_ERR "failed fork for CPU %d\n", cpu); - return PTR_ERR(c_idle.idle); - } - - set_idle_for_cpu(cpu, c_idle.idle); -do_rest: - per_cpu(current_task, cpu) = c_idle.idle; - init_gdt(cpu); - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - - c_idle.idle->thread.ip = (unsigned long) start_secondary; - /* start_eip had better be page-aligned! */ - start_eip = setup_trampoline(); - - /* So we see what's up */ - printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip); - /* Stack for startup_32 can be just as for start_secondary onwards */ - stack_start.sp = (void *) c_idle.idle->thread.sp; - - irq_ctx_init(cpu); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - - Dprintk("Setting warm reset code and vector.\n"); - - store_NMI_vector(&nmi_high, &nmi_low); - - smpboot_setup_warm_reset_vector(start_eip); - /* - * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - - /* - * Starting actual IPI sequence... - */ - boot_error = wakeup_secondary_cpu(apicid, start_eip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; - if (*((volatile unsigned char *)trampoline_base) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); - else - /* trampoline code not run */ - printk("Not responding.\n"); - inquire_remote_apic(apicid); - } - } - - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpu_clear(cpu, cpu_possible_map); - per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - } - - /* mark "stuck" area as not stuck */ - *((volatile unsigned long *)trampoline_base) = 0; - - return boot_error; -} - #ifdef CONFIG_HOTPLUG_CPU void cpu_exit_clear(void) { @@ -774,65 +305,6 @@ void __init native_smp_prepare_boot_cpu(void) __get_cpu_var(cpu_state) = CPU_ONLINE; } -int __cpuinit native_cpu_up(unsigned int cpu) -{ - int apicid = cpu_present_to_apicid(cpu); - unsigned long flags; - int err; - - WARN_ON(irqs_disabled()); - - Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); - - if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || - !physid_isset(apicid, phys_cpu_present_map)) { - printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); - return -EINVAL; - } - - /* - * Already booted CPU? - */ - if (cpu_isset(cpu, cpu_callin_map)) { - Dprintk("do_boot_cpu %d Already started\n", cpu); - return -ENOSYS; - } - - /* - * Save current MTRR state in case it was changed since early boot - * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: - */ - mtrr_save_state(); - - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - - /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); - flush_tlb_all(); - - err = do_boot_cpu(apicid, cpu); - if (err < 0) { - Dprintk("do_boot_cpu failed %d\n", err); - return err; - } - - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); - - while (!cpu_isset(cpu, cpu_online_map)) { - cpu_relax(); - touch_nmi_watchdog(); - } - - return 0; -} - extern void impress_friends(void); extern void smp_checks(void); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 420ae4a33548..71f13b15bd89 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -71,119 +71,7 @@ int smp_threads_ready; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -/* - * Store all idle threads, this can be reused instead of creating - * a new thread. Also avoids complicated thread destroy functionality - * for idle threads. - */ -#ifdef CONFIG_HOTPLUG_CPU -/* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x,p) (per_cpu(idle_thread_array, x) = (p)) -#else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) -#endif - -static atomic_t init_deasserted __cpuinitdata; - -#define smp_callin_clear_local_apic() do {} while (0) -#define map_cpu_to_logical_apicid() do {} while (0) - -/* - * Report back to the Boot Processor. - * Running on AP. - */ -void __cpuinit smp_callin(void) -{ - int cpuid, phys_id; - unsigned long timeout; - - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - wait_for_init_deassert(&init_deasserted); - - /* - * (This works even if the APIC is not enabled.) - */ - phys_id = GET_APIC_ID(apic_read(APIC_ID)); - cpuid = smp_processor_id(); - if (cpu_isset(cpuid, cpu_callin_map)) { - panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", - phys_id, cpuid); - } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpu_isset(cpuid, cpu_callout_map)) - break; - cpu_relax(); - } - - if (!time_before(jiffies, timeout)) { - panic("smp_callin: CPU%d started up but did not get a callout!\n", - cpuid); - } - - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - Dprintk("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); - setup_local_APIC(); - end_local_APIC_setup(); - map_cpu_to_logical_apicid(); - - /* - * Get our bogomips. - * - * Need to enable IRQs because it can take longer and then - * the NMI watchdog might kill us. - */ - local_irq_enable(); - calibrate_delay(); - local_irq_disable(); - Dprintk("Stack at about %p\n",&cpuid); - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - - /* - * Allow the master to continue. - */ - cpu_set(cpuid, cpu_callin_map); -} - +extern void smp_callin(void); /* * Setup code on secondary processor (after comming out of the trampoline) */ @@ -246,349 +134,6 @@ void __cpuinit start_secondary(void) cpu_idle(); } -extern volatile unsigned long init_rsp; -extern void (*initial_code)(void); - -#ifdef APIC_DEBUG -static void __inquire_remote_apic(int apicid) -{ - unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - char *names[] = { "ID", "VERSION", "SPIV" }; - int timeout; - u32 status; - - printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); - - for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - status = safe_apic_wait_icr_idle(); - if (status) - printk(KERN_CONT - "a previous APIC delivery may have failed\n"); - - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk(KERN_CONT "%08x\n", status); - break; - default: - printk(KERN_CONT "failed\n"); - } - } -} -#endif - -/* - * Kick the secondary to wake up. - */ -static int __cpuinit wakeup_secondary_cpu(int phys_apicid, - unsigned int start_rip) -{ - unsigned long send_status, accept_status = 0; - int maxlvt, num_starts, j; - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - Dprintk("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* - * Send IPI - */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - mdelay(10); - - Dprintk("Deasserting INIT.\n"); - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - mb(); - atomic_set(&init_deasserted, 1); - - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Paravirt / VMI wants a startup IPI hook here to set up the - * target processor state. - */ - startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - (unsigned long) init_rsp); - - - /* - * Run STARTUP IPI loop. - */ - Dprintk("#startup loops: %d.\n", num_starts); - - maxlvt = lapic_get_maxlvt(); - - for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n",j); - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP | (start_rip>>12)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - Dprintk("Startup point 1.\n"); - - Dprintk("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - Dprintk("After Startup.\n"); - - if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); - if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} - -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - -/* - * Boot one CPU. - */ -static int __cpuinit do_boot_cpu(int cpu, int apicid) -{ - unsigned long boot_error = 0; - int timeout; - unsigned long start_rip; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - INIT_WORK(&c_idle.work, do_fork_idle); - - /* allocate memory for gdts of secondary cpus. Hotplug is considered */ - if (!cpu_gdt_descr[cpu].address && - !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { - printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); - return -1; - } - - /* Allocate node local memory for AP pdas */ - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { - struct x8664_pda *newpda, *pda; - int node = cpu_to_node(cpu); - pda = cpu_pda(cpu); - newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC, - node); - if (newpda) { - memcpy(newpda, pda, sizeof (struct x8664_pda)); - cpu_pda(cpu) = newpda; - } else - printk(KERN_ERR - "Could not allocate node local PDA for CPU %d on node %d\n", - cpu, node); - } - - alternatives_smp_switch(1); - - c_idle.idle = get_idle_for_cpu(cpu); - - if (c_idle.idle) { - c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) - (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); - init_idle(c_idle.idle, cpu); - goto do_rest; - } - - /* - * During cold boot process, keventd thread is not spun up yet. - * When we do cpu hot-add, we create idle threads on the fly, we should - * not acquire any attributes from the calling context. Hence the clean - * way to create kernel_threads() is to do that from keventd(). - * We do the current_is_keventd() due to the fact that ACPI notifier - * was also queuing to keventd() and when the caller is already running - * in context of keventd(), we would end up with locking up the keventd - * thread. - */ - if (!keventd_up() || current_is_keventd()) - c_idle.work.func(&c_idle.work); - else { - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - } - - if (IS_ERR(c_idle.idle)) { - printk("failed fork for CPU %d\n", cpu); - return PTR_ERR(c_idle.idle); - } - - set_idle_for_cpu(cpu, c_idle.idle); - -do_rest: - - cpu_pda(cpu)->pcurrent = c_idle.idle; - - start_rip = setup_trampoline(); - - init_rsp = c_idle.idle->thread.sp; - load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); - initial_code = start_secondary; - clear_tsk_thread_flag(c_idle.idle, TIF_FORK); - - printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, - cpus_weight(cpu_present_map), - apicid); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - - Dprintk("Setting warm reset code and vector.\n"); - - smpboot_setup_warm_reset_vector(start_rip); - /* - * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - /* - * Starting actual IPI sequence... - */ - boot_error = wakeup_secondary_cpu(apicid, start_rip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("CPU has booted.\n"); - printk(KERN_INFO "CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - } else { - boot_error = 1; - if (*((volatile unsigned char *)trampoline_base) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); - else - /* trampoline code not run */ - printk("Not responding.\n"); -#ifdef APIC_DEBUG - inquire_remote_apic(apicid); -#endif - } - } - if (boot_error) { - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ - clear_node_cpumask(cpu); /* was set by numa_add_cpu */ - cpu_clear(cpu, cpu_present_map); - cpu_clear(cpu, cpu_possible_map); - per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - } - - /* mark "stuck" area as not stuck */ - *((volatile unsigned long *)trampoline_base) = 0; - - return boot_error; -} - cycles_t cacheflush_time; unsigned long cache_decay_ticks; @@ -745,64 +290,6 @@ void __init native_smp_prepare_boot_cpu(void) per_cpu(cpu_state, me) = CPU_ONLINE; } -/* - * Entry point to boot a CPU. - */ -int __cpuinit native_cpu_up(unsigned int cpu) -{ - int apicid = cpu_present_to_apicid(cpu); - unsigned long flags; - int err; - - WARN_ON(irqs_disabled()); - - Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); - - if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || - !physid_isset(apicid, phys_cpu_present_map)) { - printk("__cpu_up: bad cpu %d\n", cpu); - return -EINVAL; - } - - /* - * Already booted CPU? - */ - if (cpu_isset(cpu, cpu_callin_map)) { - Dprintk("do_boot_cpu %d Already started\n", cpu); - return -ENOSYS; - } - - /* - * Save current MTRR state in case it was changed since early boot - * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: - */ - mtrr_save_state(); - - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - /* Boot it! */ - err = do_boot_cpu(cpu, apicid); - if (err < 0) { - Dprintk("do_boot_cpu failed %d\n", err); - return err; - } - - /* Unleash the CPU! */ - Dprintk("waiting for cpu %d\n", cpu); - - /* - * Make sure and check TSC sync: - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); - - while (!cpu_isset(cpu, cpu_online_map)) - cpu_relax(); - err = 0; - - return err; -} - extern void impress_friends(void); extern void smp_checks(void); -- cgit v1.2.3 From bbc2ff6a91a4eef8030018cd389bb12352d11b34 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:00 -0300 Subject: x86: integrate start_secondary It now looks the same between architectures, so we merge it in smpboot.c. Minor differences goes inside an ifdef Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 86 +++++++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/smpboot_32.c | 75 -------------------------------------- arch/x86/kernel/smpboot_64.c | 63 -------------------------------- 3 files changed, 85 insertions(+), 139 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 69c17965f48d..a36ae2785c48 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,90 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* + * Activate a secondary processor. + */ +void __cpuinit start_secondary(void *unused) +{ + /* + * Don't put *anything* before cpu_init(), SMP booting is too + * fragile that we want to limit the things done here to the + * most necessary things. + */ +#ifdef CONFIG_VMI + vmi_bringup(); +#endif + cpu_init(); + preempt_disable(); + smp_callin(); + + /* otherwise gcc will move up smp_processor_id before the cpu_init */ + barrier(); + /* + * Check TSC synchronization with the BP: + */ + check_tsc_sync_target(); + + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + enable_NMI_through_LVT0(); + enable_8259A_irq(0); + } + + /* This must be done before setting cpu_online_map */ + set_cpu_sibling_map(raw_smp_processor_id()); + wmb(); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI recipients, and the time when the determination is made + * for which cpus receive the IPI. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + lock_ipi_call_lock(); +#ifdef CONFIG_X86_64 + spin_lock(&vector_lock); + + /* Setup the per cpu irq handling data structures */ + __setup_vector_irq(smp_processor_id()); + /* + * Allow the master to continue. + */ + spin_unlock(&vector_lock); +#endif + cpu_set(smp_processor_id(), cpu_online_map); + unlock_ipi_call_lock(); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + + setup_secondary_clock(); + + wmb(); + cpu_idle(); +} + +#ifdef CONFIG_X86_32 +/* + * Everything has been set up for the secondary + * CPUs - they just need to reload everything + * from the task structure + * This function must not return. + */ +void __devinit initialize_secondary(void) +{ + /* + * We don't actually need to load the full TSS, + * basically just the stack pointer and the ip. + */ + + asm volatile( + "movl %0,%%esp\n\t" + "jmp *%1" + : + :"m" (current->thread.sp), "m" (current->thread.ip)); +} +#endif static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { @@ -533,7 +618,6 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) } #endif /* WAKE_SECONDARY_VIA_NMI */ -extern void start_secondary(void *unused); #ifdef WAKE_SECONDARY_VIA_INIT static int __devinit wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index e82eeb2fdfef..77b045cfebd4 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -80,81 +80,6 @@ extern void unmap_cpu_to_logical_apicid(int cpu); /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -extern void smp_callin(void); - -/* - * Activate a secondary processor. - */ -void __cpuinit start_secondary(void *unused) -{ - /* - * Don't put *anything* before cpu_init(), SMP booting is too - * fragile that we want to limit the things done here to the - * most necessary things. - */ -#ifdef CONFIG_VMI - vmi_bringup(); -#endif - cpu_init(); - preempt_disable(); - smp_callin(); - - /* otherwise gcc will move up smp_processor_id before the cpu_init */ - barrier(); - /* - * Check TSC synchronization with the BP: - */ - check_tsc_sync_target(); - - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - enable_NMI_through_LVT0(); - enable_8259A_irq(0); - } - - /* This must be done before setting cpu_online_map */ - set_cpu_sibling_map(raw_smp_processor_id()); - wmb(); - - /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - */ - lock_ipi_call_lock(); - cpu_set(smp_processor_id(), cpu_online_map); - unlock_ipi_call_lock(); - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; - - setup_secondary_clock(); - - wmb(); - cpu_idle(); -} - -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __devinit initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the ip. - */ - - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"m" (current->thread.sp),"m" (current->thread.ip)); -} - #ifdef CONFIG_HOTPLUG_CPU void cpu_exit_clear(void) { diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 71f13b15bd89..60cd8cf1b073 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -71,69 +71,6 @@ int smp_threads_ready; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -extern void smp_callin(void); -/* - * Setup code on secondary processor (after comming out of the trampoline) - */ -void __cpuinit start_secondary(void) -{ - /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. - */ - cpu_init(); - preempt_disable(); - smp_callin(); - - /* otherwise gcc will move up the smp_processor_id before the cpu_init */ - barrier(); - - /* - * Check TSC sync first: - */ - check_tsc_sync_target(); - - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - enable_NMI_through_LVT0(); - enable_8259A_irq(0); - } - - /* - * The sibling maps must be set before turing the online map on for - * this cpu - */ - set_cpu_sibling_map(smp_processor_id()); - - /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI in genapic_flat.c. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - */ - lock_ipi_call_lock(); - spin_lock(&vector_lock); - - /* Setup the per cpu irq handling data structures */ - __setup_vector_irq(smp_processor_id()); - /* - * Allow the master to continue. - */ - spin_unlock(&vector_lock); - cpu_set(smp_processor_id(), cpu_online_map); - unlock_ipi_call_lock(); - - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; - - setup_secondary_clock(); - - wmb(); - cpu_idle(); -} - cycles_t cacheflush_time; unsigned long cache_decay_ticks; -- cgit v1.2.3 From a8db8453ff52609b14716361651ad10d2ab66682 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:01 -0300 Subject: x86: merge smp_prepare_boot_cpu it is practically the same between arches now, so it is moved to smpboot.c. Minor differences (gdt initialization) live inside an ifdef Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 18 ++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 14 -------------- arch/x86/kernel/smpboot_64.c | 14 -------------- 3 files changed, 18 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a36ae2785c48..b214d8dcc07a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -24,6 +24,9 @@ #include #include +/* State of each CPU */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + /* Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality * for idle threads. @@ -999,6 +1002,21 @@ int __cpuinit native_cpu_up(unsigned int cpu) return 0; } +/* + * Early setup to make printk work. + */ +void __init native_smp_prepare_boot_cpu(void) +{ + int me = smp_processor_id(); +#ifdef CONFIG_X86_32 + init_gdt(me); + switch_to_new_gdt(); +#endif + /* already set me in cpu_online_map in boot_cpu_init() */ + cpu_set(me, cpu_callout_map); + per_cpu(cpu_state, me) = CPU_ONLINE; +} + #ifdef CONFIG_HOTPLUG_CPU void remove_siblinginfo(int cpu) { diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 77b045cfebd4..5d27b1db6c26 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -77,9 +77,6 @@ u8 apicid_2_node[MAX_APICID]; extern void map_cpu_to_logical_apicid(void); extern void unmap_cpu_to_logical_apicid(int cpu); -/* State of each CPU. */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; - #ifdef CONFIG_HOTPLUG_CPU void cpu_exit_clear(void) { @@ -219,17 +216,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) smp_boot_cpus(max_cpus); } -void __init native_smp_prepare_boot_cpu(void) -{ - unsigned int cpu = smp_processor_id(); - - init_gdt(cpu); - switch_to_new_gdt(); - - cpu_set(cpu, cpu_callout_map); - __get_cpu_var(cpu_state) = CPU_ONLINE; -} - extern void impress_friends(void); extern void smp_checks(void); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 60cd8cf1b073..f77299b0639e 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -68,9 +68,6 @@ /* Set when the idlers are all forked */ int smp_threads_ready; -/* State of each CPU */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; - cycles_t cacheflush_time; unsigned long cache_decay_ticks; @@ -216,17 +213,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) print_cpu_info(&cpu_data(0)); } -/* - * Early setup to make printk work. - */ -void __init native_smp_prepare_boot_cpu(void) -{ - int me = smp_processor_id(); - /* already set me in cpu_online_map in boot_cpu_init() */ - cpu_set(me, cpu_callout_map); - per_cpu(cpu_state, me) = CPU_ONLINE; -} - extern void impress_friends(void); extern void smp_checks(void); -- cgit v1.2.3 From 83f7eb9c674c1bcaad6ca258fdd7dd3b96465a62 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:02 -0300 Subject: x86: merge native_smp_cpus_done They look similar enough, and are merged. Only difference (zap_low_mapping for i386) is inside ifdef Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 21 ++++++++++++++++++++- arch/x86/kernel/smpboot_32.c | 21 --------------------- arch/x86/kernel/smpboot_64.c | 18 ------------------ 3 files changed, 20 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b214d8dcc07a..26118b4a1c38 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -880,7 +880,6 @@ do_rest: apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - /* * Starting actual IPI sequence... */ @@ -1017,6 +1016,26 @@ void __init native_smp_prepare_boot_cpu(void) per_cpu(cpu_state, me) = CPU_ONLINE; } +void __init native_smp_cpus_done(unsigned int max_cpus) +{ + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + + Dprintk("Boot done.\n"); + + impress_friends(); + smp_checks(); +#ifdef CONFIG_X86_IO_APIC + setup_ioapic_dest(); +#endif + check_nmi_watchdog(); +#ifdef CONFIG_X86_32 + zap_low_mappings(); +#endif +} + #ifdef CONFIG_HOTPLUG_CPU void remove_siblinginfo(int cpu) { diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5d27b1db6c26..75fb5064af66 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -215,24 +215,3 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) mb(); smp_boot_cpus(max_cpus); } - -extern void impress_friends(void); -extern void smp_checks(void); - -void __init native_smp_cpus_done(unsigned int max_cpus) -{ - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); - - Dprintk("Boot done.\n"); - - impress_friends(); - smp_checks(); -#ifdef CONFIG_X86_IO_APIC - setup_ioapic_dest(); -#endif - check_nmi_watchdog(); - zap_low_mappings(); -} diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index f77299b0639e..f4363a38d079 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -212,21 +212,3 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); } - -extern void impress_friends(void); -extern void smp_checks(void); - -/* - * Finish the SMP boot. - */ -void __init native_smp_cpus_done(unsigned int max_cpus) -{ - smpboot_restore_warm_reset_vector(); - - Dprintk("Boot done.\n"); - - impress_friends(); - smp_checks(); - setup_ioapic_dest(); - check_nmi_watchdog(); -} -- cgit v1.2.3 From bd7b47ba5e4ced4e20bed2394c9580637d44550a Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:03 -0300 Subject: x86: use physical id when disabling smp if smp configuration is not found at all, hook into 0. This is done to match x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 75fb5064af66..14db038b6b48 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -106,7 +106,11 @@ static void __init disable_smp(void) cpu_possible_map = cpumask_of_cpu(0); cpu_present_map = cpumask_of_cpu(0); smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); + if (smp_found_config) + phys_cpu_present_map = + physid_mask_of_physid(boot_cpu_physical_apicid); + else + phys_cpu_present_map = physid_mask_of_physid(0); map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); -- cgit v1.2.3 From e7bc8fbad4c582639334285dd1d9571578c58674 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:04 -0300 Subject: x86: get rid of smp_boot_cpus This patch get rid of smp_boot_cpus(), since it does not boot any cpu anymore. Its code is split in a way to make it closer to x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 14db038b6b48..d153d8423740 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -172,21 +172,19 @@ static int __init smp_sanity_check(unsigned max_cpus) return 0; } -/* - * Cycle through the processors sending APIC IPIs to boot each. - */ -static void __init smp_boot_cpus(unsigned int max_cpus) +/* These are wrappers to interface to the new boot process. Someone + who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ +void __init native_smp_prepare_cpus(unsigned int max_cpus) { + nmi_watchdog_default(); + cpu_callin_map = cpumask_of_cpu(0); + mb(); + /* * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ - printk(KERN_INFO "CPU%d: ", 0); - print_cpu_info(&cpu_data(0)); - - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); - current_thread_info()->cpu = 0; set_cpu_sibling_map(0); @@ -197,25 +195,19 @@ static void __init smp_boot_cpus(unsigned int max_cpus) return; } + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + connect_bsp_APIC(); setup_local_APIC(); end_local_APIC_setup(); map_cpu_to_logical_apicid(); - setup_portio_remap(); smpboot_setup_io_apic(); + printk(KERN_INFO "CPU%d: ", 0); + print_cpu_info(&cpu_data(0)); setup_boot_clock(); } -/* These are wrappers to interface to the new boot process. Someone - who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ -void __init native_smp_prepare_cpus(unsigned int max_cpus) -{ - nmi_watchdog_default(); - cpu_callin_map = cpumask_of_cpu(0); - mb(); - smp_boot_cpus(max_cpus); -} -- cgit v1.2.3 From 7cefaa20e798c547f569ca3f79547f820c802997 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:05 -0300 Subject: x86: additions to i386 native_smp_prepare_cpus. Add function calls to native_smp_prepare_cpus in i386 to match x86_64 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index d153d8423740..6be36d3eea4e 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -172,11 +172,23 @@ static int __init smp_sanity_check(unsigned max_cpus) return 0; } -/* These are wrappers to interface to the new boot process. Someone - who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ +static void __init smp_cpu_index_default(void) +{ + int i; + struct cpuinfo_x86 *c; + + for_each_cpu_mask(i, cpu_possible_map) { + c = &cpu_data(i); + /* mark all to hotplug */ + c->cpu_index = NR_CPUS; + } +} + void __init native_smp_prepare_cpus(unsigned int max_cpus) { nmi_watchdog_default(); + smp_cpu_index_default(); + current_cpu_data = boot_cpu_data; cpu_callin_map = cpumask_of_cpu(0); mb(); @@ -195,7 +207,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) return; } - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", + GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } connect_bsp_APIC(); setup_local_APIC(); -- cgit v1.2.3 From 1db17f553425ae679bc771d4796b7349f00cb1d9 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:07 -0300 Subject: x86: change x86_64 native_smp_prepare_cpus to match i386 An APIC test is moved, and code is replaced by the mach-default already defined function (smpboot_setup_io_apic). setup_portio_remap() is added, but it is a nop in mach-default. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index f4363a38d079..6679ac502160 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -71,6 +71,7 @@ int smp_threads_ready; cycles_t cacheflush_time; unsigned long cache_decay_ticks; +static int boot_cpu_logical_apicid; /* * Fall back to non SMP mode after errors. * @@ -167,7 +168,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) { nmi_watchdog_default(); smp_cpu_index_default(); + cpu_callin_map = cpumask_of_cpu(0); + mb(); + current_cpu_data = boot_cpu_data; + boot_cpu_logical_apicid = logical_smp_processor_id(); current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); @@ -177,6 +182,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) return; } + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", + GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } /* * Switch from PIC to APIC mode. @@ -190,20 +200,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) enable_IO_APIC(); end_local_APIC_setup(); - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } - - /* - * Now start the IO-APICs - */ - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else - nr_ioapics = 0; - /* * Set up local APIC timer on boot CPU. */ -- cgit v1.2.3 From 0df1ba8cabc6c2d613921c75e525826e6eb3210b Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:08 -0300 Subject: x86: add extra sanity check This test exists in x86_64 and also applies to i386. So we add it Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 6be36d3eea4e..ae23b603978c 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -118,6 +118,12 @@ static void __init disable_smp(void) static int __init smp_sanity_check(unsigned max_cpus) { + if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { + printk(KERN_WARNING "weird, boot CPU (#%d) not listed" + "by the BIOS.\n", hard_smp_processor_id()); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + /* * If we couldn't find an SMP configuration at boot time, * get out of here now! -- cgit v1.2.3 From 771263d31114adb5e234364a58280c876c2ed182 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:09 -0300 Subject: x86: change x86_64 sanity checks to match i386. They are mostly inocuous. APIC_INTEGRATED will expand to 1, check_phys_apicid_present is checking for the same thing it was before, etc. But the code is identical to i386 now, and will allow us to integrate it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_64.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 6679ac502160..c66fb15b0131 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -50,6 +50,7 @@ #include #include +#include #include #include #include @@ -105,7 +106,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * If we couldn't find an SMP configuration at boot time, * get out of here now! */ - if (!smp_found_config) { + if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); disable_smp(); if (APIC_init_uniprocessor()) @@ -118,7 +119,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ - if (!physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map)) { + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", boot_cpu_physical_apicid); @@ -128,7 +129,8 @@ static int __init smp_sanity_check(unsigned max_cpus) /* * If we couldn't find a local APIC, then get out of here now! */ - if (!cpu_has_apic) { + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && + !cpu_has_apic) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); @@ -136,6 +138,8 @@ static int __init smp_sanity_check(unsigned max_cpus) return -1; } + verify_local_APIC(); + /* * If SMP should be disabled, then really disable it! */ -- cgit v1.2.3 From 9f3734f631267d2f36008833b62670ca342ac000 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:10 -0300 Subject: x86: introduce smpboot_clear_io_apic x86_64 has two nr_ioapics = 0 statements. In 32-bit, it can be done too. We do it through the smpboot_clear_io_apic() inline function, to cope with subarchitectures (visws) that does not compile mpparse in Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot_32.c | 2 ++ arch/x86/kernel/smpboot_64.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index ae23b603978c..5a0f57f35191 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -155,6 +155,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + smpboot_clear_io_apic(); return -1; } @@ -173,6 +174,7 @@ static int __init smp_sanity_check(unsigned max_cpus) setup_local_APIC(); end_local_APIC_setup(); } + smpboot_clear_io_apic(); return -1; } return 0; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index c66fb15b0131..775244545ffa 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -134,7 +134,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - nr_ioapics = 0; + smpboot_clear_io_apic(); return -1; } @@ -145,7 +145,7 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - nr_ioapics = 0; + smpboot_clear_io_apic(); return -1; } -- cgit v1.2.3 From 8aef135c73436fa46fdb4dc8aba49d5539dee72d Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:11 -0300 Subject: x86: merge native_smp_prepare_cpus With the previous changes, code for native_smp_prepare_cpus() in i386 and x86_64 now look very similar. merge them into smpboot.c. Minor differences are inside ifdef Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 170 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 137 ---------------------------------- arch/x86/kernel/smpboot_64.c | 141 ----------------------------------- 3 files changed, 170 insertions(+), 278 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 26118b4a1c38..45119d39f31e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -75,6 +76,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; +static int boot_cpu_logical_apicid; + /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); @@ -1001,6 +1004,173 @@ int __cpuinit native_cpu_up(unsigned int cpu) return 0; } +/* + * Fall back to non SMP mode after errors. + * + * RED-PEN audit/test this more. I bet there is more state messed up here. + */ +static __init void disable_smp(void) +{ + cpu_present_map = cpumask_of_cpu(0); + cpu_possible_map = cpumask_of_cpu(0); +#ifdef CONFIG_X86_32 + smpboot_clear_io_apic_irqs(); +#endif + if (smp_found_config) + phys_cpu_present_map = + physid_mask_of_physid(boot_cpu_physical_apicid); + else + phys_cpu_present_map = physid_mask_of_physid(0); + map_cpu_to_logical_apicid(); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); +} + +/* + * Various sanity checks. + */ +static int __init smp_sanity_check(unsigned max_cpus) +{ + if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { + printk(KERN_WARNING "weird, boot CPU (#%d) not listed" + "by the BIOS.\n", hard_smp_processor_id()); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + + /* + * If we couldn't find an SMP configuration at boot time, + * get out of here now! + */ + if (!smp_found_config && !acpi_lapic) { + printk(KERN_NOTICE "SMP motherboard not detected.\n"); + disable_smp(); + if (APIC_init_uniprocessor()) + printk(KERN_NOTICE "Local APIC not detected." + " Using dummy APIC emulation.\n"); + return -1; + } + + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + */ + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + printk(KERN_NOTICE + "weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && + !cpu_has_apic) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + printk(KERN_ERR "... forcing use of dummy APIC emulation." + "(tell your hw vendor)\n"); + smpboot_clear_io_apic(); + return -1; + } + + verify_local_APIC(); + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + printk(KERN_INFO "SMP mode deactivated," + "forcing use of dummy APIC emulation.\n"); + smpboot_clear_io_apic(); +#ifdef CONFIG_X86_32 + if (nmi_watchdog == NMI_LOCAL_APIC) { + printk(KERN_INFO "activating minimal APIC for" + "NMI watchdog use.\n"); + connect_bsp_APIC(); + setup_local_APIC(); + end_local_APIC_setup(); + } +#endif + return -1; + } + + return 0; +} + +static void __init smp_cpu_index_default(void) +{ + int i; + struct cpuinfo_x86 *c; + + for_each_cpu_mask(i, cpu_possible_map) { + c = &cpu_data(i); + /* mark all to hotplug */ + c->cpu_index = NR_CPUS; + } +} + +/* + * Prepare for SMP bootup. The MP table or ACPI has been read + * earlier. Just do some sanity checking here and enable APIC mode. + */ +void __init native_smp_prepare_cpus(unsigned int max_cpus) +{ + nmi_watchdog_default(); + smp_cpu_index_default(); + current_cpu_data = boot_cpu_data; + cpu_callin_map = cpumask_of_cpu(0); + mb(); + /* + * Setup boot CPU information + */ + smp_store_cpu_info(0); /* Final full version of the data */ + boot_cpu_logical_apicid = logical_smp_processor_id(); + current_thread_info()->cpu = 0; /* needed? */ + set_cpu_sibling_map(0); + + if (smp_sanity_check(max_cpus) < 0) { + printk(KERN_INFO "SMP disabled\n"); + disable_smp(); + return; + } + + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", + GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } + +#ifdef CONFIG_X86_32 + connect_bsp_APIC(); +#endif + /* + * Switch from PIC to APIC mode. + */ + setup_local_APIC(); + +#ifdef CONFIG_X86_64 + /* + * Enable IO APIC before setting up error vector + */ + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); +#endif + end_local_APIC_setup(); + + map_cpu_to_logical_apicid(); + + setup_portio_remap(); + + smpboot_setup_io_apic(); + /* + * Set up local APIC timer on boot CPU. + */ + + printk(KERN_INFO "CPU%d: ", 0); + print_cpu_info(&cpu_data(0)); + setup_boot_clock(); +} /* * Early setup to make printk work. */ diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5a0f57f35191..3a1b9e40cedb 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -74,7 +74,6 @@ EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); u8 apicid_2_node[MAX_APICID]; -extern void map_cpu_to_logical_apicid(void); extern void unmap_cpu_to_logical_apicid(int cpu); #ifdef CONFIG_HOTPLUG_CPU @@ -94,144 +93,8 @@ void cpu_exit_clear(void) } #endif -static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; #ifdef CONFIG_X86_NUMAQ EXPORT_SYMBOL(xquad_portio); #endif - -static void __init disable_smp(void) -{ - cpu_possible_map = cpumask_of_cpu(0); - cpu_present_map = cpumask_of_cpu(0); - smpboot_clear_io_apic_irqs(); - if (smp_found_config) - phys_cpu_present_map = - physid_mask_of_physid(boot_cpu_physical_apicid); - else - phys_cpu_present_map = physid_mask_of_physid(0); - map_cpu_to_logical_apicid(); - cpu_set(0, per_cpu(cpu_sibling_map, 0)); - cpu_set(0, per_cpu(cpu_core_map, 0)); -} - -static int __init smp_sanity_check(unsigned max_cpus) -{ - if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { - printk(KERN_WARNING "weird, boot CPU (#%d) not listed" - "by the BIOS.\n", hard_smp_processor_id()); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - printk(KERN_NOTICE "SMP motherboard not detected.\n"); - disable_smp(); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); - return -1; - } - - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * Makes no sense to do this check in clustered apic mode, so skip it - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic(); - return -1; - } - - verify_local_APIC(); - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - smp_found_config = 0; - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - - if (nmi_watchdog == NMI_LOCAL_APIC) { - printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n"); - connect_bsp_APIC(); - setup_local_APIC(); - end_local_APIC_setup(); - } - smpboot_clear_io_apic(); - return -1; - } - return 0; -} - -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_cpu_mask(i, cpu_possible_map) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = NR_CPUS; - } -} - -void __init native_smp_prepare_cpus(unsigned int max_cpus) -{ - nmi_watchdog_default(); - smp_cpu_index_default(); - current_cpu_data = boot_cpu_data; - cpu_callin_map = cpumask_of_cpu(0); - mb(); - - /* - * Setup boot CPU information - */ - smp_store_cpu_info(0); /* Final full version of the data */ - boot_cpu_logical_apicid = logical_smp_processor_id(); - current_thread_info()->cpu = 0; - - set_cpu_sibling_map(0); - - if (smp_sanity_check(max_cpus) < 0) { - printk(KERN_INFO "SMP disabled\n"); - disable_smp(); - return; - } - - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } - - connect_bsp_APIC(); - setup_local_APIC(); - end_local_APIC_setup(); - map_cpu_to_logical_apicid(); - - setup_portio_remap(); - - smpboot_setup_io_apic(); - - printk(KERN_INFO "CPU%d: ", 0); - print_cpu_info(&cpu_data(0)); - setup_boot_clock(); -} - diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 775244545ffa..66b55629733b 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -71,144 +71,3 @@ int smp_threads_ready; cycles_t cacheflush_time; unsigned long cache_decay_ticks; - -static int boot_cpu_logical_apicid; -/* - * Fall back to non SMP mode after errors. - * - * RED-PEN audit/test this more. I bet there is more state messed up here. - */ -static __init void disable_smp(void) -{ - cpu_present_map = cpumask_of_cpu(0); - cpu_possible_map = cpumask_of_cpu(0); - if (smp_found_config) - phys_cpu_present_map = - physid_mask_of_physid(boot_cpu_physical_apicid); - else - phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, per_cpu(cpu_sibling_map, 0)); - cpu_set(0, per_cpu(cpu_core_map, 0)); -} - -/* - * Various sanity checks. - */ -static int __init smp_sanity_check(unsigned max_cpus) -{ - if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - hard_smp_processor_id()); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - printk(KERN_NOTICE "SMP motherboard not detected.\n"); - disable_smp(); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); - return -1; - } - - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk(KERN_NOTICE - "weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && - !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic(); - return -1; - } - - verify_local_APIC(); - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - smpboot_clear_io_apic(); - return -1; - } - - return 0; -} - -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_cpu_mask(i, cpu_possible_map) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = NR_CPUS; - } -} - -/* - * Prepare for SMP bootup. The MP table or ACPI has been read - * earlier. Just do some sanity checking here and enable APIC mode. - */ -void __init native_smp_prepare_cpus(unsigned int max_cpus) -{ - nmi_watchdog_default(); - smp_cpu_index_default(); - cpu_callin_map = cpumask_of_cpu(0); - mb(); - - current_cpu_data = boot_cpu_data; - boot_cpu_logical_apicid = logical_smp_processor_id(); - current_thread_info()->cpu = 0; /* needed? */ - set_cpu_sibling_map(0); - - if (smp_sanity_check(max_cpus) < 0) { - printk(KERN_INFO "SMP disabled\n"); - disable_smp(); - return; - } - - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } - - /* - * Switch from PIC to APIC mode. - */ - setup_local_APIC(); - - /* - * Enable IO APIC before setting up error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); - end_local_APIC_setup(); - - /* - * Set up local APIC timer on boot CPU. - */ - - setup_boot_clock(); - printk(KERN_INFO "CPU%d: ", 0); - print_cpu_info(&cpu_data(0)); -} -- cgit v1.2.3 From 2cd9fb71eedffb3a208a84daff705b9da5c915e8 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:12 -0300 Subject: x86: merge cpu_exit_clear this is the last remaining function in smpboot_32.c Since it is i386 specific, move it around an ifdef to smpboot.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 18 ++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 19 ------------------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 45119d39f31e..6a7fb1300073 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1207,6 +1207,24 @@ void __init native_smp_cpus_done(unsigned int max_cpus) } #ifdef CONFIG_HOTPLUG_CPU + +# ifdef CONFIG_X86_32 +void cpu_exit_clear(void) +{ + int cpu = raw_smp_processor_id(); + + idle_task_exit(); + + cpu_uninit(); + irq_ctx_exit(cpu); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + + unmap_cpu_to_logical_apicid(cpu); +} +# endif /* CONFIG_X86_32 */ + void remove_siblinginfo(int cpu) { int sibling; diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 3a1b9e40cedb..5469207fa863 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -74,25 +74,6 @@ EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); u8 apicid_2_node[MAX_APICID]; -extern void unmap_cpu_to_logical_apicid(int cpu); - -#ifdef CONFIG_HOTPLUG_CPU -void cpu_exit_clear(void) -{ - int cpu = raw_smp_processor_id(); - - idle_task_exit(); - - cpu_uninit(); - irq_ctx_exit(cpu); - - cpu_clear(cpu, cpu_callout_map); - cpu_clear(cpu, cpu_callin_map); - - unmap_cpu_to_logical_apicid(cpu); -} -#endif - /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; #ifdef CONFIG_X86_NUMAQ -- cgit v1.2.3 From acbb67341805d3b9ef263d8cbd103a6054164491 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:13 -0300 Subject: x86: move apicid mappings to smpboot.c They are i386 specific (the x86_64 definitions live elsewhere, and should remain there), so are enclosed around an ifdef Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 21 +++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 13 ------------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6a7fb1300073..75637fb760e7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -25,6 +25,27 @@ #include #include +/* + * FIXME: For x86_64, those are defined in other files. But moving them here, + * would make the setup areas dependent on smp, which is a loss. When we + * integrate apic between arches, we can probably do a better job, but + * right now, they'll stay here -- glommer + */ +#ifdef CONFIG_X86_32 +/* which logical CPU number maps to which CPU (physical APIC ID) */ +u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = + { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_cpu_to_apicid_early_ptr; +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); + +u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata + = { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_bios_cpu_apicid_early_ptr; +DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +#endif + /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 5469207fa863..3590afe575e7 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -59,19 +59,6 @@ #include #include -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = - { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_cpu_to_apicid_early_ptr; -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); - -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata - = { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_bios_cpu_apicid_early_ptr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - u8 apicid_2_node[MAX_APICID]; /* Where the IO area was mapped on multiquad, always 0 otherwise */ -- cgit v1.2.3 From 4cedb3343f0b087275b9a8e23fc90737881ac91c Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 19 Mar 2008 14:26:14 -0300 Subject: x86: remove smpboot_32.c and smpboot_64.c Remove the last leftovers from the files. Move the ones that are still used to the files they belong, the others that grep can't reach, simply throw away. Merge comments ontop of file and that's it: smpboot integrated Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 +-- arch/x86/kernel/smpboot.c | 42 +++++++++++++++++++++++++ arch/x86/kernel/smpboot_32.c | 68 ----------------------------------------- arch/x86/kernel/smpboot_64.c | 73 -------------------------------------------- 4 files changed, 44 insertions(+), 143 deletions(-) delete mode 100644 arch/x86/kernel/smpboot_32.c delete mode 100644 arch/x86/kernel/smpboot_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5d33509fd1c1..d5a05a0cef62 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,10 +46,10 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smpboot_$(BITS).o smp.o +obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += smpboot_64.o tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse_$(BITS).o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 75637fb760e7..61b9a5b6fc07 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,3 +1,44 @@ +/* + * x86 SMP booting functions + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * Copyright 2001 Andi Kleen, SuSE Labs. + * + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * Thanks to Intel for making available several different Pentium, + * Pentium Pro and Pentium-II/Xeon MP machines. + * Original development of Linux SMP code supported by Caldera. + * + * This code is released under the GNU General Public License version 2 or + * later. + * + * Fixes + * Felix Koop : NR_CPUS used properly + * Jose Renau : Handle single CPU case. + * Alan Cox : By repeated request 8) - Total BogoMIPS report. + * Greg Wright : Fix for kernel stacks panic. + * Erich Boleyn : MP v1.4 and additional changes. + * Matthias Sattler : Changes for 2.1 kernel map. + * Michel Lespinasse : Changes for 2.1 kernel map. + * Michael Chastain : Change trampoline.S to gnu as. + * Alan Cox : Dumb bug: 'B' step PPro's are fine + * Ingo Molnar : Added APIC timers, based on code + * from Jose Renau + * Ingo Molnar : various cleanups and rewrites + * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX APICs + * Andi Kleen : Changed for SMP boot into long mode. + * Martin J. Bligh : Added support for multi-quad systems + * Dave Jones : Report invalid combinations of Athlon CPUs. + * Rusty Russell : Hacked into shape for new "hotplug" boot process. + * Andi Kleen : Converted to new state machine. + * Ashok Raj : CPU hotplug support + * Glauber Costa : i386 and x86_64 integration + */ + #include #include #include @@ -44,6 +85,7 @@ u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata void *x86_bios_cpu_apicid_early_ptr; DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +u8 apicid_2_node[MAX_APICID]; #endif /* State of each CPU */ diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c deleted file mode 100644 index 3590afe575e7..000000000000 --- a/arch/x86/kernel/smpboot_32.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 or - * later. - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIPS report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Martin J. Bligh : Added support for multi-quad systems - * Dave Jones : Report invalid combinations of Athlon CPUs. -* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -u8 apicid_2_node[MAX_APICID]; - -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; -#ifdef CONFIG_X86_NUMAQ -EXPORT_SYMBOL(xquad_portio); -#endif diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c deleted file mode 100644 index 66b55629733b..000000000000 --- a/arch/x86/kernel/smpboot_64.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * Copyright 2001 Andi Kleen, SuSE Labs. - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIP report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Andi Kleen : Changed for SMP boot into long mode. - * Rusty Russell : Hacked into shape for new "hotplug" boot process. - * Andi Kleen : Converted to new state machine. - * Various cleanups. - * Probably mostly hotplug CPU ready now. - * Ashok Raj : CPU hotplug support - */ - - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Set when the idlers are all forked */ -int smp_threads_ready; - -cycles_t cacheflush_time; -unsigned long cache_decay_ticks; -- cgit v1.2.3 From d2953315c70a4783c94ae6af04f4b0aaad2f09c5 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:07:59 +0300 Subject: x86: lindent mpparse_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 277 ++++++++++++++++++++++--------------------- 1 file changed, 141 insertions(+), 136 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 20a345dd425b..269fd46df42c 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -41,7 +41,7 @@ unsigned int __cpuinitdata maxcpus = NR_CPUS; * MP-table. */ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -56,8 +56,6 @@ int mp_irq_entries; int nr_ioapics; unsigned long mp_lapic_addr = 0; - - /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); @@ -71,12 +69,11 @@ unsigned disabled_cpus __cpuinitdata; physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata - = { [0 ... NR_CPUS-1] = BAD_APICID }; + = {[0 ... NR_CPUS - 1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - /* * Intel MP BIOS table parsing routines: */ @@ -114,13 +111,13 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } if (num_processors >= maxcpus) { printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); + " Processor ignored.\n", maxcpus); return; } @@ -129,14 +126,14 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) cpu = first_cpu(tmp_map); physid_set(m->mpc_apicid, phys_cpu_present_map); - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - /* + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + /* * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ cpu = 0; - } + } /* are we being called early in kernel startup? */ if (x86_cpu_to_apicid_early_ptr) { u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; @@ -153,7 +150,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) cpu_set(cpu, cpu_present_map); } -static void __init MP_bus_info (struct mpc_config_bus *m) +static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; @@ -176,24 +173,24 @@ static int bad_ioapic(unsigned long address) { if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); + "(found %d)\n", MAX_IO_APICS, nr_ioapics); panic("Recompile kernel with bigger MAX_IO_APICS!\n"); } if (!address) { printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); + " found in table, skipping!\n"); return 1; } return 0; } -static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +static void __init MP_ioapic_info(struct mpc_config_ioapic *m) { if (!(m->mpc_flags & MPC_APIC_USABLE)) return; - printk("I/O APIC #%d at 0x%X.\n", - m->mpc_apicid, m->mpc_apicaddr); + printk(KERN_INFO "I/O APIC #%d at 0x%X.\n", m->mpc_apicid, + m->mpc_apicaddr); if (bad_ioapic(m->mpc_apicaddr)) return; @@ -202,25 +199,25 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) nr_ioapics++; } -static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +static void __init MP_intsrc_info(struct mpc_config_intsrc *m) { - mp_irqs [mp_irq_entries] = *m; + mp_irqs[mp_irq_entries] = *m; Dprintk("Int: type %d, pol %d, trig %d, bus %d," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); if (++mp_irq_entries >= MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { Dprintk("Lint: type %d, pol %d, trig %d, bus %d," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); } /* @@ -229,39 +226,38 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { char str[16]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { - printk("MPTABLE: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], - mpc->mpc_signature[1], - mpc->mpc_signature[2], - mpc->mpc_signature[3]); + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + + if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { + printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], mpc->mpc_signature[3]); return 0; } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { - printk("MPTABLE: checksum error!\n"); + if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { + printk(KERN_ERR "MPTABLE: checksum error!\n"); return 0; } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { + if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", - mpc->mpc_spec); + mpc->mpc_spec); return 0; } if (!mpc->mpc_lapic) { printk(KERN_ERR "MPTABLE: null local APIC address!\n"); return 0; } - memcpy(str,mpc->mpc_oem,8); + memcpy(str, mpc->mpc_oem, 8); str[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ",str); + printk(KERN_INFO "MPTABLE: OEM ID: %s ", str); - memcpy(str,mpc->mpc_productid,12); + memcpy(str, mpc->mpc_productid, 12); str[12] = 0; - printk("MPTABLE: Product ID: %s ",str); + printk(KERN_INFO "MPTABLE: Product ID: %s ", str); - printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic); + printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); /* save the local APIC address, it might be non-default */ if (!acpi_lapic) @@ -271,52 +267,52 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) return 1; /* - * Now process the configuration blocks. + * Now process the configuration blocks. */ while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: + switch (*mpt) { + case MP_PROCESSOR: { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; + struct mpc_config_processor *m = + (struct mpc_config_processor *)mpt; if (!acpi_lapic) MP_processor_info(m); mpt += sizeof(*m); count += sizeof(*m); break; } - case MP_BUS: + case MP_BUS: { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; + struct mpc_config_bus *m = + (struct mpc_config_bus *)mpt; MP_bus_info(m); mpt += sizeof(*m); count += sizeof(*m); break; } - case MP_IOAPIC: + case MP_IOAPIC: { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; + struct mpc_config_ioapic *m = + (struct mpc_config_ioapic *)mpt; MP_ioapic_info(m); mpt += sizeof(*m); count += sizeof(*m); break; } - case MP_INTSRC: + case MP_INTSRC: { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; MP_intsrc_info(m); mpt += sizeof(*m); count += sizeof(*m); break; } - case MP_LINTSRC: + case MP_LINTSRC: { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; + struct mpc_config_lintsrc *m = + (struct mpc_config_lintsrc *)mpt; MP_lintsrc_info(m); mpt += sizeof(*m); count += sizeof(*m); @@ -345,7 +341,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) int ELCR_fallback = 0; intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_irqflag = 0; /* conforming */ intsrc.mpc_srcbus = 0; intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; @@ -360,12 +356,16 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * If it does, we assume it's valid. */ if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " + "falling back to ELCR\n"); - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n"); + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || + ELCR_trigger(13)) + printk(KERN_ERR "ELCR contains invalid data... " + "not using ELCR\n"); else { - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); + printk(KERN_INFO + "Using ELCR to identify PCI interrupts\n"); ELCR_fallback = 1; } } @@ -394,13 +394,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ MP_intsrc_info(&intsrc); } intsrc.mpc_irqtype = mp_ExtINT; intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ MP_intsrc_info(&intsrc); } @@ -436,14 +436,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) bus.mpc_type = MP_BUS; bus.mpc_busid = 0; switch (mpc_default_type) { - default: - printk(KERN_ERR "???\nUnknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; + default: + printk(KERN_ERR "???\nUnknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; } MP_bus_info(&bus); if (mpc_default_type > 4) { @@ -465,7 +465,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_irqflag = 0; /* conforming */ lintsrc.mpc_srcbusid = 0; lintsrc.mpc_srcbusirq = 0; lintsrc.mpc_destapic = MP_APIC_ALL; @@ -493,14 +493,14 @@ static void __init __get_smp_config(unsigned early) */ if (acpi_lapic && acpi_ioapic) { printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " - "information\n"); + "information\n"); return; } else if (acpi_lapic) printk(KERN_INFO "Using ACPI for processor (LAPIC) " - "configuration information\n"); + "configuration information\n"); printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); + mpf->mpf_specification); /* * Now see if we need to read further. @@ -514,7 +514,8 @@ static void __init __get_smp_config(unsigned early) return; } - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); + printk(KERN_INFO "Default MP configuration #%d\n", + mpf->mpf_feature1); construct_default_ISA_mptable(mpf->mpf_feature1); } else if (mpf->mpf_physptr) { @@ -525,8 +526,10 @@ static void __init __get_smp_config(unsigned early) */ if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); + printk(KERN_ERR + "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. " + "(tell your hw vendor)\n"); return; } @@ -540,7 +543,9 @@ static void __init __get_smp_config(unsigned early) if (!mp_irq_entries) { struct mpc_config_bus bus; - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " + "using default mptable. " + "(tell your hw vendor)\n"); bus.mpc_type = MP_BUS; bus.mpc_busid = 0; @@ -573,21 +578,21 @@ void __init get_smp_config(void) static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { - extern void __bad_mpf_size(void); + extern void __bad_mpf_size(void); unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); if (sizeof(*mpf) != 16) __bad_mpf_size(); while (length > 0) { mpf = (struct intel_mp_floating *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4))) { smp_found_config = 1; mpf_found = mpf; @@ -620,8 +625,8 @@ static void __init __find_smp_config(unsigned reserve) * 3) Scan the 64K of bios */ if (smp_scan_config(0x0, 0x400, reserve) || - smp_scan_config(639*0x400, 0x400, reserve) || - smp_scan_config(0xF0000, 0x10000, reserve)) + smp_scan_config(639 * 0x400, 0x400, reserve) || + smp_scan_config(0xF0000, 0x10000, reserve)) return; /* * If it is an SMP machine we should know now. @@ -642,7 +647,7 @@ static void __init __find_smp_config(unsigned reserve) return; /* If we have come this far, we did not find an MP table */ - printk(KERN_INFO "No mptable found.\n"); + printk(KERN_INFO "No mptable found.\n"); } void __init early_find_smp_config(void) @@ -663,17 +668,17 @@ void __init find_smp_config(void) void __init mp_register_lapic_address(u64 address) { - mp_lapic_addr = (unsigned long) address; + mp_lapic_addr = (unsigned long)address; set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); } -void __cpuinit mp_register_lapic (u8 id, u8 enabled) +void __cpuinit mp_register_lapic(u8 id, u8 enabled) { struct mpc_config_processor processor; - int boot_cpu = 0; - + int boot_cpu = 0; + if (id == boot_cpu_physical_apicid) boot_cpu = 1; @@ -694,10 +699,10 @@ void __cpuinit mp_register_lapic (u8 id, u8 enabled) #define MP_MAX_IOAPIC_PIN 127 static struct mp_ioapic_routing { - int apic_id; - int gsi_start; - int gsi_end; - u32 pin_programmed[4]; + int apic_id; + int gsi_start; + int gsi_end; + u32 pin_programmed[4]; } mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic(int gsi) @@ -707,7 +712,7 @@ static int mp_find_ioapic(int gsi) /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { if ((gsi >= mp_ioapic_routing[i].gsi_start) - && (gsi <= mp_ioapic_routing[i].gsi_end)) + && (gsi <= mp_ioapic_routing[i].gsi_end)) return i; } @@ -745,31 +750,30 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); mp_ioapics[idx].mpc_apicver = 0; - + /* * Build basic IRQ lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI IRQs). */ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; mp_ioapic_routing[idx].gsi_start = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_start, - mp_ioapic_routing[idx].gsi_end); + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_start, + mp_ioapic_routing[idx].gsi_end); nr_ioapics++; } -void __init -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; + int ioapic = -1; + int pin = -1; /* * Convert 'gsi' to 'ioapic.pin'. @@ -791,13 +795,13 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) intsrc.mpc_irqtype = mp_INT; intsrc.mpc_irqflag = (trigger << 2) | polarity; intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + intsrc.mpc_dstirq = pin; /* INTIN# */ - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", + intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); mp_irqs[mp_irq_entries] = intsrc; @@ -824,7 +828,7 @@ void __init mp_config_acpi_legacy_irqs(void) return; intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ + intsrc.mpc_irqflag = 0; /* Conforming */ intsrc.mpc_srcbus = MP_ISA_BUS; intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; @@ -839,28 +843,29 @@ void __init mp_config_acpi_legacy_irqs(void) struct mpc_config_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) + if (irq->mpc_srcbus == MP_ISA_BUS + && irq->mpc_srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) + (irq->mpc_dstirq == i)) break; } if (idx != mp_irq_entries) { printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ + continue; /* IRQ already used */ } intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ + intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, + "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); mp_irqs[mp_irq_entries] = intsrc; @@ -899,21 +904,21 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); if (idx > 3) { printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); return gsi; } - if ((1< Date: Mon, 17 Mar 2008 22:08:05 +0300 Subject: x86: add bad_ioapic to mpparse_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 000b51b78fbd..fd89d3521bd3 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -287,6 +287,21 @@ static void __init MP_bus_info (struct mpc_config_bus *m) } } +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + static void __init MP_ioapic_info (struct mpc_config_ioapic *m) { if (!(m->mpc_flags & MPC_APIC_USABLE)) @@ -294,16 +309,10 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); + + if (bad_ioapic(m->mpc_apicaddr)) return; - } + mp_ioapics[nr_ioapics] = *m; nr_ioapics++; } @@ -918,16 +927,8 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) int idx = 0; int tmpid; - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in MADT table, skipping!\n"); + if (bad_ioapic(address)) return; - } idx = nr_ioapics++; -- cgit v1.2.3 From e3e3ffa20351b32b5eaa6020d051305c8d803ed4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:11 +0300 Subject: x86: add uniq_ioapic_id to mpparse_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index fd89d3521bd3..838e4974e1ce 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -922,31 +922,30 @@ static int mp_find_ioapic (int gsi) return -1; } +static u8 uniq_ioapic_id(u8 id) +{ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +} + void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) { int idx = 0; - int tmpid; if (bad_ioapic(address)) return; - idx = nr_ioapics++; + idx = nr_ioapics; mp_ioapics[idx].mpc_type = MP_IOAPIC; mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - tmpid = io_apic_get_unique_id(idx, id); - else - tmpid = id; - if (tmpid == -1) { - nr_ioapics--; - return; - } - mp_ioapics[idx].mpc_apicid = tmpid; + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* @@ -960,9 +959,12 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapics[idx].mpc_apicver, + mp_ioapics[idx].mpc_apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); + + nr_ioapics++; } void __init -- cgit v1.2.3 From ce3fe6b2bfded4f5d931c5f2f9325dc2e3fd3a74 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:17 +0300 Subject: x86: use get_bios_ebda in mpparse_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 2 +- arch/x86/kernel/mpparse_64.c | 11 ++++------- arch/x86/kernel/setup_32.c | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 838e4974e1ce..a2162644cb4e 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -27,11 +27,11 @@ #include #include #include +#include #include #include #include -#include /* Have we found an MP table */ int smp_found_config; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 269fd46df42c..fb74135f9d0e 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -641,13 +642,9 @@ static void __init __find_smp_config(unsigned reserve) * should be fixed. */ - address = *(unsigned short *)phys_to_virt(0x40E); - address <<= 4; - if (smp_scan_config(address, 0x1000, reserve)) - return; - - /* If we have come this far, we did not find an MP table */ - printk(KERN_INFO "No mptable found.\n"); + address = get_bios_ebda(); + if (address) + smp_scan_config(address, 0x1000, reserve); } void __init early_find_smp_config(void) diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index eb97bcfe0f6f..58f3c1fbc5c3 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 85e46035bec6f114ad07ce8a9c70388568b1afd4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:24 +0300 Subject: x86: limit scan to 1k of EBDA. Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index fb74135f9d0e..19c826485516 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -640,11 +640,13 @@ static void __init __find_smp_config(unsigned reserve) * trustworthy, simply because the SMP table may have been * stomped on during early boot. These loaders are buggy and * should be fixed. + * + * MP1.4 SPEC states to only scan first 1K of 4K EBDA. */ address = get_bios_ebda(); if (address) - smp_scan_config(address, 0x1000, reserve); + smp_scan_config(address, 0x400, reserve); } void __init early_find_smp_config(void) -- cgit v1.2.3 From 555b07646d5bd0bcd4825e83580d5f6bb34259ea Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:30 +0300 Subject: x86: rename gsi_start to gsi_base to match mpparse_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 19c826485516..83a36eed081b 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -699,7 +699,7 @@ void __cpuinit mp_register_lapic(u8 id, u8 enabled) static struct mp_ioapic_routing { int apic_id; - int gsi_start; + int gsi_base; int gsi_end; u32 pin_programmed[4]; } mp_ioapic_routing[MAX_IO_APICS]; @@ -710,7 +710,7 @@ static int mp_find_ioapic(int gsi) /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_start) + if ((gsi >= mp_ioapic_routing[i].gsi_base) && (gsi <= mp_ioapic_routing[i].gsi_end)) return i; } @@ -755,14 +755,14 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) * and to prevent reprogramming of IOAPIC pins (PCI IRQs). */ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_start = gsi_base; + mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_start, + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -780,7 +780,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) ioapic = mp_find_ioapic(gsi); if (ioapic < 0) return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_start; + pin = gsi - mp_ioapic_routing[ioapic].gsi_base; /* * TBD: This check is for faulty timer entries, where the override @@ -892,7 +892,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start; + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; /* * Avoid pin reprogramming. PRTs typically include entries -- cgit v1.2.3 From 4655c7deca112bea86ca00f616f19c3717f687aa Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:36 +0300 Subject: x86: remove mpc_apic_id() Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index a2162644cb4e..072fcc462399 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -118,7 +118,16 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) return; } +#ifdef CONFIG_X86_NUMAQ apicid = mpc_apic_id(m, translation_table[mpc_record]); +#else + Dprintk("Processor #%d %u:%u APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); + apicid = m->mpc_apicid; +#endif if (m->mpc_featureflag&(1<<0)) Dprintk(" Floating point unit present.\n"); -- cgit v1.2.3 From d285e338899a4ff662a17b22d3bb0e48bb1465d4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:42 +0300 Subject: x86: remove mpc_oem_pci_bus() Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 072fcc462399..621eac569550 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -278,7 +278,9 @@ static void __init MP_bus_info (struct mpc_config_bus *m) set_bit(m->mpc_busid, mp_bus_not_pci); if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { +#ifdef CONFIG_X86_NUMAQ mpc_oem_pci_bus(m, translation_table[mpc_record]); +#endif clear_bit(m->mpc_busid, mp_bus_not_pci); mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; mp_current_pci_id++; -- cgit v1.2.3 From 0ec153af4dec8944e6da558093914a3bce4c76f9 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:48 +0300 Subject: x86: remove mpc_oem_bus_info() Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 621eac569550..febd69dbbee9 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -265,7 +265,11 @@ static void __init MP_bus_info (struct mpc_config_bus *m) memcpy(str, m->mpc_bustype, 6); str[6] = 0; +#ifdef CONFIG_X86_NUMAQ mpc_oem_bus_info(m, str, translation_table[mpc_record]); +#else + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); +#endif #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { -- cgit v1.2.3 From 864205062f1c752c80077be8ec2b15c81f4a6525 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 17 Mar 2008 22:08:55 +0300 Subject: x86: make struct mpc_config_translation NUMAQ-only Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index febd69dbbee9..15dd87110298 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -98,6 +98,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } +#ifdef CONFIG_X86_NUMAQ /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of @@ -106,6 +107,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; +#endif static void __cpuinit MP_processor_info (struct mpc_config_processor *m) { @@ -475,7 +477,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) /* * Now process the configuration blocks. */ +#ifdef CONFIG_X86_NUMAQ mpc_record = 0; +#endif while (count < mpc->mpc_length) { switch(*mpt) { case MP_PROCESSOR: @@ -532,7 +536,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) break; } } +#ifdef CONFIG_X86_NUMAQ ++mpc_record; +#endif } setup_apic_routing(); if (!num_processors) -- cgit v1.2.3 From 3250c91ada16a06de5afef55bce7b766c894d75c Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Thu, 20 Mar 2008 00:39:02 -0700 Subject: x86: vSMP: Fix is_vsmp_box() is_vsmp_box() currently does not work on vSMPowered systems, as pci cfg space is not read correctly -- This patch fixes it. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a00961d42e75..eb25584c54c3 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -84,8 +84,10 @@ int is_vsmp_box(void) return vsmp; /* Check if we are running on a ScaleMP vSMP box */ - if (read_pci_config(0, 0x1f, 0, PCI_VENDOR_ID) == - (PCI_VENDOR_ID_SCALEMP || (PCI_DEVICE_ID_SCALEMP_VSMP_CTL << 16))) + if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) == + PCI_VENDOR_ID_SCALEMP) && + (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) == + PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) vsmp = 1; return vsmp; -- cgit v1.2.3 From aa7d8e25eca5deb33eb08013bc78a80514349b40 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Thu, 20 Mar 2008 00:41:16 -0700 Subject: x86: fix build breakage when PCI is define and PARAVIRT is not - Fix the the build breakage when PARAVIRT is defined but PCI is not This fixes problem reported at: http://marc.info/?l=linux-kernel&m=120525966600698&w=2 - Make is_vsmp_box() available even when PARAVIRT is not defined. This is needed to determine if tsc's are reliable as a time source even when PARAVIRT is not defined. - split vsmp_init to use is_vsmp_box() and set_vsmp_pv_ops() set_vsmp_pv_ops will do nothing if PCI is not enabled in the config. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/setup_64.c | 2 -- arch/x86/kernel/vsmp_64.c | 72 +++++++++++++++++++++++++++------------------- 3 files changed, 44 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d5a05a0cef62..1fe841a86f7e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o -obj-$(CONFIG_PARAVIRT) += vsmp_64.o +obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o obj-$(CONFIG_ACPI_SRAT) += srat_32.o diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 990724143c43..20034493b5ad 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -351,9 +351,7 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_init(); -#ifdef CONFIG_PARAVIRT vsmp_init(); -#endif dmi_scan_machine(); diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index eb25584c54c3..4a790a5f61b7 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -19,6 +19,7 @@ #include #include +#if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' @@ -72,39 +73,11 @@ static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, } -static int vsmp = -1; - -int is_vsmp_box(void) -{ - if (vsmp != -1) - return vsmp; - - vsmp = 0; - if (!early_pci_allowed()) - return vsmp; - - /* Check if we are running on a ScaleMP vSMP box */ - if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) == - PCI_VENDOR_ID_SCALEMP) && - (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) == - PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) - vsmp = 1; - - return vsmp; -} - -void __init vsmp_init(void) +static void __init set_vsmp_pv_ops(void) { void *address; unsigned int cap, ctl, cfg; - if (!is_vsmp_box()) - return; - - if (!early_pci_allowed()) - return; - - /* If we are, use the distinguished irq functions */ pv_irq_ops.irq_disable = vsmp_irq_disable; pv_irq_ops.irq_enable = vsmp_irq_enable; pv_irq_ops.save_fl = vsmp_save_fl; @@ -127,5 +100,46 @@ void __init vsmp_init(void) } early_iounmap(address, 8); +} +#else +static void __init set_vsmp_pv_ops(void) +{ +} +#endif + +#ifdef CONFIG_PCI +static int vsmp = -1; + +int is_vsmp_box(void) +{ + if (vsmp != -1) + return vsmp; + + vsmp = 0; + if (!early_pci_allowed()) + return vsmp; + + /* Check if we are running on a ScaleMP vSMP box */ + if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) == + PCI_VENDOR_ID_SCALEMP) && + (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) == + PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) + vsmp = 1; + + return vsmp; +} +#else +int is_vsmp_box(void) +{ + return 0; +} +#endif + +void __init vsmp_init(void) +{ + if (!is_vsmp_box()) + return; + + set_vsmp_pv_ops(); return; } -- cgit v1.2.3 From 9f6d8552a9cb49dc556777bbdf7ac8b3d7e18edb Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Thu, 20 Mar 2008 00:43:16 -0700 Subject: x86: vSMP: use pvops only if platform has the capability to support it Re-arrange set_vsmp_pv_ops so that pv_ops are set only if the platform has capability to support paravirtualized irq ops Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 4a790a5f61b7..13bd82453e4b 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -78,12 +78,6 @@ static void __init set_vsmp_pv_ops(void) void *address; unsigned int cap, ctl, cfg; - pv_irq_ops.irq_disable = vsmp_irq_disable; - pv_irq_ops.irq_enable = vsmp_irq_enable; - pv_irq_ops.save_fl = vsmp_save_fl; - pv_irq_ops.restore_fl = vsmp_restore_fl; - pv_init_ops.patch = vsmp_patch; - /* set vSMP magic bits to indicate vSMP capable kernel */ cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0); address = early_ioremap(cfg, 8); @@ -92,7 +86,13 @@ static void __init set_vsmp_pv_ops(void) printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl); if (cap & ctl & (1 << 4)) { - /* Turn on vSMP IRQ fastpath handling (see system.h) */ + /* Setup irq ops and turn on vSMP IRQ fastpath handling */ + pv_irq_ops.irq_disable = vsmp_irq_disable; + pv_irq_ops.irq_enable = vsmp_irq_enable; + pv_irq_ops.save_fl = vsmp_save_fl; + pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_init_ops.patch = vsmp_patch; + ctl &= ~(1 << 4); writel(ctl, address + 4); ctl = readl(address + 4); -- cgit v1.2.3 From 1cb68487f5898dd97460e5b6bda9619ec3549361 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Thu, 20 Mar 2008 00:45:08 -0700 Subject: x86: apic_is_clustered_box to indicate unsynched TSC's on multiboard vSMP systems Indicate TSCs are unreliable as time sources if the platform is a multi chassi ScaleMP vSMPowered machine. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 868ec1deb19a..5362cfd30ecd 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1209,7 +1209,7 @@ __cpuinit int apic_is_clustered_box(void) * will be [4, 0x23] or [8, 0x27] could be thought to * vsmp box still need checking... */ - if (!is_vsmp_box() && (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) return 0; bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; @@ -1249,6 +1249,12 @@ __cpuinit int apic_is_clustered_box(void) ++zeros; } + /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards + */ + if (is_vsmp_box() && clusters > 1) + return 1; + /* * If clusters > 2, then should be multi-chassis. * May have to revisit this when multi-core + hyperthreaded CPUs come -- cgit v1.2.3 From 6542fe80e6296cde50c1c3b8a9eede701ee51907 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Mar 2008 09:55:06 +0100 Subject: x86: vsmp fix x86 vsmp fix is vsmp box cleanup code got a bit smaller: arch/x86/kernel/vsmp_64.o: text data bss dec hex filename 205 4 0 209 d1 vsmp_64.o.before 181 4 0 185 b9 vsmp_64.o.after Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 13bd82453e4b..1e9a791dbe39 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -120,10 +120,8 @@ int is_vsmp_box(void) return vsmp; /* Check if we are running on a ScaleMP vSMP box */ - if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) == - PCI_VENDOR_ID_SCALEMP) && - (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) == - PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) + if (read_pci_config(0, 0x1f, 0, PCI_VENDOR_ID) == + (PCI_VENDOR_ID_SCALEMP | (PCI_DEVICE_ID_SCALEMP_VSMP_CTL << 16))) vsmp = 1; return vsmp; -- cgit v1.2.3 From ede1389f8ab4f3a1343e567133fa9720a054a3aa Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 17 Mar 2008 22:29:32 +0200 Subject: x86: remove the write-only timer_uses_ioapic_pin_0 This patch removes the write-only timer_uses_ioapic_pin_0 (gsi can't be <= 15 in the line of it's fake usage in mpparse_32.c). Spotted by the GNU C compiler. Signed-off-by: Adrian Bunk Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 ----- arch/x86/kernel/mpparse_32.c | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 0d70acd3b134..742fab45e1c6 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2114,8 +2114,6 @@ static inline void unlock_ExtINT_logic(void) ioapic_write_entry(apic, pin, entry0); } -int timer_uses_ioapic_pin_0; - /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2155,9 +2153,6 @@ static inline void __init check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - if (pin1 == 0) - timer_uses_ioapic_pin_0 = 1; - printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 15dd87110298..15265ee11f89 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -1176,8 +1176,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * So test for this condition, and if necessary, avoid * the pin collision. */ - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) - gsi = pci_irq++; + gsi = pci_irq++; /* * Don't assign IRQ used by ACPI SCI */ -- cgit v1.2.3 From a31f8dd7ee3b2f5645c220406b1e21f82971f32b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 16 Mar 2008 21:59:11 -0700 Subject: x86: ptrace vs -ENOSYS When we're stopped at syscall entry tracing, ptrace can change the %rax value from -ENOSYS to something else. If no system call is actually made because the syscall number (now in orig_rax) is bad, then we now always reset %rax to -ENOSYS again. This changes it to leave the return value alone after entry tracing. That way, the %rax value set by ptrace is there to be seen in user mode (or in syscall exit tracing). This is consistent with what the 32-bit kernel does. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c20c9e7e08dd..556a8df522a7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -319,19 +319,17 @@ badsys: /* Do syscall tracing */ tracesys: SAVE_REST - movq $-ENOSYS,RAX(%rsp) + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST cmpq $__NR_syscall_max,%rax - movq $-ENOSYS,%rcx - cmova %rcx,%rax - ja 1f + ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) -1: movq %rax,RAX-ARGOFFSET(%rsp) + movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ /* -- cgit v1.2.3 From 0f54091051c450bab751c3ca0cb45d61a67a683b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 17 Mar 2008 02:21:08 -0700 Subject: x86: handle_vm86_trap cleanup Use force_sig in handle_vm86_trap like other machine traps do. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/vm86_32.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 6a91fcf92d67..51040698c222 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -557,16 +557,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) } if (trapno != 1) return 1; /* we let this handle by the calling routine */ - if (current->ptrace & PT_PTRACED) { - unsigned long flags; - spin_lock_irqsave(¤t->sighand->siglock, flags); - sigdelset(¤t->blocked, SIGTRAP); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } - send_sig(SIGTRAP, current, 1); current->thread.trap_no = trapno; current->thread.error_code = error_code; + force_sig(SIGTRAP, current); return 0; } -- cgit v1.2.3 From acb5b8a2dd0f901463d075382ea548935e679f4e Mon Sep 17 00:00:00 2001 From: Yakov Lerner Date: Sun, 16 Mar 2008 03:21:21 -0500 Subject: x86, kprobes: correct post-eip value in post_hander() I was trying to get the address of instruction to be executed next after the kprobed instruction. But regs->eip in post_handler() contains value which is useless to the user. It's pre-corrected value. This value is difficult to use without access to resume_execution(), which is not exported anyway. I moved the invocation of post_handler() to *after* resume_execution(). Now regs->eip contains meaningful value in post_handler(). I do not think this change breaks any backward-compatibility. To make meaning of the old value, post_handler() would need access to resume_execution() which is not exported. I have difficulty to believe that previous, uncorrected, regs->eip can be meaningfully used in post_handler(). Signed-off-by: Yakov Lerner Acked-by: Ananth N Mavinakayanahalli Acked-by: Masami Hiramatsu Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1e3de7db9ad5..cc8ae90103ff 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -858,15 +858,15 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) if (!cur) return 0; + resume_execution(cur, regs, kcb); + regs->flags |= kcb->kprobe_saved_flags; + trace_hardirqs_fixup_flags(regs->flags); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; cur->post_handler(cur, regs, 0); } - resume_execution(cur, regs, kcb); - regs->flags |= kcb->kprobe_saved_flags; - trace_hardirqs_fixup_flags(regs->flags); - /* Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { restore_previous_kprobe(kcb); -- cgit v1.2.3 From 0054f4b708d21bd0032480cf7309d17753bc17bb Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 13 Mar 2008 21:47:32 -0400 Subject: x86: Explicitly include required header files. After an experimental cleanup of , these files were exposed as invoking kmalloc() without including . Signed-off-by: Robert P. J. Day Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi_32.c | 1 + arch/x86/kernel/test_nx.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 9cfc094eddb0..662e63e1cd57 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 10b8a6f69f84..787a5e499dd1 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -11,6 +11,8 @@ */ #include #include +#include + #include #include -- cgit v1.2.3 From 272b9cad6e7a2f61b13cfcd7dde0010e02e9376e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 20 Mar 2008 23:58:33 -0700 Subject: x86: early memtest to find bad ram do simple memtest after init_memory_mapping use find_e820_area_size to find all ram range that is not reserved. and do some simple bits test to find some bad ram. if find some bad ram, use reserve_early to exclude that range. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 4a0953857cb2..4509757844eb 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -114,6 +114,40 @@ again: return changed; } +/* Check for already reserved areas */ +static inline int +bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) +{ + int i; + unsigned long addr = *addrp, last; + unsigned long size = *sizep; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last > r->start && addr < r->start) { + size = r->start - addr; + changed = 1; + goto again; + } + if (last > r->end && addr < r->end) { + addr = round_up(r->end, align); + size = last - addr; + changed = 1; + goto again; + } + if (last <= r->end && addr >= r->start) { + (*sizep)++; + return 0; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} /* * This function checks if any part of the range is mapped * with type. @@ -190,7 +224,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, ei_last = ei->addr + ei->size; if (addr < start) addr = round_up(start, align); - if (addr > ei_last) + if (addr >= ei_last) continue; while (bad_addr(&addr, size, align) && addr+size <= ei_last) ; @@ -204,6 +238,40 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, return -1UL; } +/* + * Find next free range after *start + */ +unsigned long __init find_e820_area_size(unsigned long start, unsigned long *sizep, unsigned long align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long addr, last; + unsigned long ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; +// printk(KERN_DEBUG "find_e820_area_size : e820 %d [%llx, %lx]\n", i, ei->addr, ei_last); + if (addr < start) + addr = round_up(start, align); +// printk(KERN_DEBUG "find_e820_area_size : 0 [%lx, %lx]\n", addr, ei_last); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && addr+ *sizep <= ei_last) + ; + last = addr + *sizep; +// printk(KERN_DEBUG "find_e820_area_size : 1 [%lx, %lx]\n", addr, last); + if (last > ei_last) + continue; + return addr; + } + return -1UL; + +} /* * Find the highest page frame number we have available */ -- cgit v1.2.3 From 01561264bd1ea1d654d09babe02d784a5b150124 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 20 Mar 2008 23:57:21 -0700 Subject: x86: allocate e820 resource struct all together don't need to allocate that one by one Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 4509757844eb..9184e6437c4f 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -300,9 +300,10 @@ unsigned long __init e820_end_of_ram(void) void __init e820_reserve_resources(void) { int i; + struct resource *res; + + res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); for (i = 0; i < e820.nr_map; i++) { - struct resource *res; - res = alloc_bootmem_low(sizeof(struct resource)); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -313,6 +314,7 @@ void __init e820_reserve_resources(void) res->end = res->start + e820.map[i].size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); + res++; } } -- cgit v1.2.3 From 7de6a4cdac6341807261a33896f9ab5a502a4e74 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 13 Mar 2008 11:03:58 +0100 Subject: x86: clean up aperture_64.c Initializing to zero is generally bad idea, I hope it is right for __init data, too. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 00df126169b4..479926d9e004 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -27,11 +27,11 @@ #include int gart_iommu_aperture; -int gart_iommu_aperture_disabled __initdata = 0; -int gart_iommu_aperture_allowed __initdata = 0; +int gart_iommu_aperture_disabled __initdata; +int gart_iommu_aperture_allowed __initdata; int fallback_aper_order __initdata = 1; /* 64MB */ -int fallback_aper_force __initdata = 0; +int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -- cgit v1.2.3 From 2e5d9c857d4e6c9e7b7d8c8c86a68a7842d213d6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:14 -0700 Subject: x86: PAT infrastructure patch Sets up pat_init() infrastructure. PAT MSR has following setting. PAT |PCD ||PWT ||| 000 WB _PAGE_CACHE_WB 001 WC _PAGE_CACHE_WC 010 UC- _PAGE_CACHE_UC_MINUS 011 UC _PAGE_CACHE_UC We are effectively changing WT from boot time setting to WC. UC_MINUS is used to provide backward compatibility to existing /dev/mem users(X). reserve_memtype and free_memtype are new interfaces for maintaining alias-free mapping. It is currently implemented in a simple way with a linked list and not optimized. reserve and free tracks the effective memory type, as a result of PAT and MTRR setting rather than what is actually requested in PAT. pat_init piggy backs on mtrr_init as the rules for setting both pat and mtrr are same. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 120 +++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 3e18db4cefee..011e07e99cd1 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "mtrr.h" struct mtrr_state { @@ -35,6 +36,7 @@ static struct fixed_range_block fixed_range_blocks[] = { static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; +static int mtrr_state_set; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." @@ -42,6 +44,106 @@ static struct mtrr_state mtrr_state = {}; static int mtrr_show; module_param_named(show, mtrr_show, bool, 0); +/* + * Returns the effective MTRR type for the region + * Error returns: + * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR + * - 0xFF - when MTRR is not enabled + */ +u8 mtrr_type_lookup(u64 start, u64 end) +{ + int i; + u64 base, mask; + u8 prev_match, curr_match; + + if (!mtrr_state_set) + return 0xFF; + + if (!mtrr_state.enabled) + return 0xFF; + + /* Make end inclusive end, instead of exclusive */ + end--; + + /* Look in fixed ranges. Just return the type as per start */ + if (mtrr_state.have_fixed && (start < 0x100000)) { + int idx; + + if (start < 0x80000) { + idx = 0; + idx += (start >> 16); + return mtrr_state.fixed_ranges[idx]; + } else if (start < 0xC0000) { + idx = 1 * 8; + idx += ((start - 0x80000) >> 14); + return mtrr_state.fixed_ranges[idx]; + } else if (start < 0x1000000) { + idx = 3 * 8; + idx += ((start - 0xC0000) >> 12); + return mtrr_state.fixed_ranges[idx]; + } + } + + /* + * Look in variable ranges + * Look of multiple ranges matching this address and pick type + * as per MTRR precedence + */ + if (!mtrr_state.enabled & 2) { + return mtrr_state.def_type; + } + + prev_match = 0xFF; + for (i = 0; i < num_var_ranges; ++i) { + unsigned short start_state, end_state; + + if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) + continue; + + base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) + + (mtrr_state.var_ranges[i].base_lo & PAGE_MASK); + mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) + + (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK); + + start_state = ((start & mask) == (base & mask)); + end_state = ((end & mask) == (base & mask)); + if (start_state != end_state) + return 0xFE; + + if ((start & mask) != (base & mask)) { + continue; + } + + curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; + if (prev_match == 0xFF) { + prev_match = curr_match; + continue; + } + + if (prev_match == MTRR_TYPE_UNCACHABLE || + curr_match == MTRR_TYPE_UNCACHABLE) { + return MTRR_TYPE_UNCACHABLE; + } + + if ((prev_match == MTRR_TYPE_WRBACK && + curr_match == MTRR_TYPE_WRTHROUGH) || + (prev_match == MTRR_TYPE_WRTHROUGH && + curr_match == MTRR_TYPE_WRBACK)) { + prev_match = MTRR_TYPE_WRTHROUGH; + curr_match = MTRR_TYPE_WRTHROUGH; + } + + if (prev_match != curr_match) { + return MTRR_TYPE_UNCACHABLE; + } + } + + if (prev_match != 0xFF) + return prev_match; + + return mtrr_state.def_type; +} + /* Get the MSR pair relating to a var range */ static void get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) @@ -79,12 +181,16 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) base, base + step - 1, mtrr_attrib_to_str(*types)); } +static void prepare_set(void); +static void post_set(void); + /* Grab all of the MTRR state for this CPU into *state */ void __init get_mtrr_state(void) { unsigned int i; struct mtrr_var_range *vrs; unsigned lo, dummy; + unsigned long flags; vrs = mtrr_state.var_ranges; @@ -131,6 +237,17 @@ void __init get_mtrr_state(void) printk(KERN_INFO "MTRR %u disabled\n", i); } } + mtrr_state_set = 1; + + /* PAT setup for BP. We need to go through sync steps here */ + local_irq_save(flags); + prepare_set(); + + pat_init(); + + post_set(); + local_irq_restore(flags); + } /* Some BIOS's are fucked and don't set all MTRRs the same! */ @@ -397,6 +514,9 @@ static void generic_set_all(void) /* Actually set the state */ mask = set_mtrr_state(); + /* also set PAT */ + pat_init(); + post_set(); local_irq_restore(flags); -- cgit v1.2.3 From 35605a1027ac630f85a1b95684f7e86b82498cd6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 24 Mar 2008 16:02:01 -0700 Subject: x86: enable PAT for amd k8 and fam10h make known_pat_cpu to think amd k8 and fam10h is ok too. also make tom2 below to be WRBACK Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 17 +++++++++++++++++ arch/x86/kernel/cpu/mtrr/main.c | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 011e07e99cd1..74ec2ea4ed3e 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -37,6 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; +static u64 tom2; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." @@ -138,6 +139,11 @@ u8 mtrr_type_lookup(u64 start, u64 end) } } + if (tom2) { + if (start >= (1ULL<<32) && (end < tom2)) + return MTRR_TYPE_WRBACK; + } + if (prev_match != 0xFF) return prev_match; @@ -206,6 +212,15 @@ void __init get_mtrr_state(void) mtrr_state.def_type = (lo & 0xff); mtrr_state.enabled = (lo & 0xc00) >> 10; + if (amd_special_default_mtrr()) { + unsigned lo, hi; + /* TOP_MEM2 */ + rdmsr(MSR_K8_TOP_MEM2, lo, hi); + tom2 = hi; + tom2 <<= 32; + tom2 |= lo; + tom2 &= 0xffffff8000000ULL; + } if (mtrr_show) { int high_width; @@ -236,6 +251,8 @@ void __init get_mtrr_state(void) else printk(KERN_INFO "MTRR %u disabled\n", i); } + if (tom2) + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", tom2, tom2>>20); } mtrr_state_set = 1; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index a6450b3ae759..6a1e278d9323 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -627,7 +627,7 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup); #define Tom2Enabled (1U << 21) #define Tom2ForceMemTypeWB (1U << 22) -static __init int amd_special_default_mtrr(void) +int __init amd_special_default_mtrr(void) { u32 l, h; -- cgit v1.2.3 From a7c7d0e91daebd7c5e51f9416d612b6a15e7e79a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 8 Apr 2008 16:25:42 +0200 Subject: x86: tom2 warning fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 74ec2ea4ed3e..353efe4f5017 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -251,8 +251,10 @@ void __init get_mtrr_state(void) else printk(KERN_INFO "MTRR %u disabled\n", i); } - if (tom2) - printk(KERN_INFO "TOM2: %016lx aka %ldM\n", tom2, tom2>>20); + if (tom2) { + printk(KERN_INFO "TOM2: %016llx aka %lldM\n", + tom2, tom2>>20); + } } mtrr_state_set = 1; -- cgit v1.2.3 From 9307cacad0dfe3749f00303125c6f7f0523e5616 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 24 Mar 2008 23:24:34 -0700 Subject: x86: pat cpu feature bit setting for known cpus Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 25 +++++++++++++++++++++++++ arch/x86/kernel/setup_64.c | 7 +++++++ 2 files changed, 32 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0dd87b8d6707..d999d7833bc2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -309,6 +309,19 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) } + clear_cpu_cap(c, X86_FEATURE_PAT); + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_PAT); + break; + case X86_VENDOR_INTEL: + if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + set_cpu_cap(c, X86_FEATURE_PAT); + break; + } + } /* @@ -397,6 +410,18 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); } + clear_cpu_cap(c, X86_FEATURE_PAT); + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_PAT); + break; + case X86_VENDOR_INTEL: + if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + set_cpu_cap(c, X86_FEATURE_PAT); + break; + } } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 20034493b5ad..c6fe1e4bc7c2 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -962,12 +962,19 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); + + clear_cpu_cap(c, X86_FEATURE_PAT); + switch (c->x86_vendor) { case X86_VENDOR_AMD: early_init_amd(c); + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_PAT); break; case X86_VENDOR_INTEL: early_init_intel(c); + if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + set_cpu_cap(c, X86_FEATURE_PAT); break; } -- cgit v1.2.3 From 6e908947b4995bc0e551a8257c586d5c3e428201 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Mar 2008 14:32:36 +0100 Subject: x86: fix ioapic bug again un-revert: commit 4960c9df1407c2723459c60ff13e6efe0c209c62 Author: Thomas Gleixner Date: Tue Jan 22 10:23:01 2008 +0100 Revert "x86: fix NMI watchdog & 'stopped time' problem" This reverts commit d4d25deca49ec2527a634557bf5a6cf449f85deb. needs a proper fix though ... Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 12 ++++++++++-- arch/x86/kernel/nmi_32.c | 9 +++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 742fab45e1c6..8ed6eb967652 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2124,10 +2124,14 @@ static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; int vector; + unsigned int ver; unsigned long flags; local_irq_save(flags); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + /* * get/set the timer IRQ vector: */ @@ -2140,11 +2144,15 @@ static inline void __init check_timer(void) * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * disabled in the local APIC. Finally timer interrupts + * need to be acknowledged manually in the 8259A for + * timer_interrupt() and for the i82489DX when using + * the NMI watchdog. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; + timer_ack = !cpu_has_tsc; + timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); if (timer_over_8254 > 0) enable_8259A_irq(0); diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 662e63e1cd57..8421d0ac6f22 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -26,6 +26,7 @@ #include #include +#include #include "mach_traps.h" @@ -81,7 +82,7 @@ int __init check_nmi_watchdog(void) prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - return -1; + goto error; printk(KERN_INFO "Testing NMI watchdog ... "); @@ -118,7 +119,7 @@ int __init check_nmi_watchdog(void) if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - return -1; + goto error; } printk("OK.\n"); @@ -129,6 +130,10 @@ int __init check_nmi_watchdog(void) kfree(prev_nmi_count); return 0; +error: + timer_ack = !cpu_has_tsc; + + return -1; } static int __init setup_nmi_watchdog(char *str) -- cgit v1.2.3 From 89bda4fccbd49f4b2bf59d0165391696037be856 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 21 Mar 2008 20:18:17 +0300 Subject: x86: use same index for processor maps Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 15265ee11f89..1b225cea5d7b 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -212,7 +212,6 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) return; } - cpu_set(num_processors, cpu_possible_map); num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); @@ -251,12 +250,13 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; cpu_to_apicid[cpu] = m->mpc_apicid; - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; + bios_cpu_apicid[cpu] = m->mpc_apicid; } else { per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; } #endif + cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } -- cgit v1.2.3 From 288621e32a7ae3a29c37b45297136c0264e2ff7b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 21 Mar 2008 23:12:14 +0300 Subject: x86: relocate_kernel - use predefined PAGE_SIZE instead of own alias This patch does clean up relocate_kernel_(32|64).S a bit by getting rid of local PAGE_ALIGNED macro. We should use well-known PAGE_SIZE instead Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/relocate_kernel_32.S | 3 +-- arch/x86/kernel/relocate_kernel_64.S | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index f151d6fae462..ec4e6a0e0c2e 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -15,12 +15,11 @@ */ #define PTR(x) (x << 2) -#define PAGE_ALIGNED (1 << PAGE_SHIFT) #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ #define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ .text - .align PAGE_ALIGNED + .align PAGE_SIZE .globl relocate_kernel relocate_kernel: movl 8(%esp), %ebp /* list of pages */ diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 14e95872c6a3..e252c0ed9539 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -15,11 +15,10 @@ */ #define PTR(x) (x << 3) -#define PAGE_ALIGNED (1 << PAGE_SHIFT) #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ .text - .align PAGE_ALIGNED + .align PAGE_SIZE .code64 .globl relocate_kernel relocate_kernel: -- cgit v1.2.3 From c64df70793a9c344874eb4af19f85e0662d2d3ee Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 21 Mar 2008 18:56:19 -0700 Subject: x86: memtest bootparam Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 9184e6437c4f..d6ada0833876 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -241,7 +241,9 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, /* * Find next free range after *start */ -unsigned long __init find_e820_area_size(unsigned long start, unsigned long *sizep, unsigned long align) +unsigned long __init find_e820_area_size(unsigned long start, + unsigned long *sizep, + unsigned long align) { int i; @@ -254,17 +256,15 @@ unsigned long __init find_e820_area_size(unsigned long start, unsigned long *siz continue; addr = round_up(ei->addr, align); ei_last = ei->addr + ei->size; -// printk(KERN_DEBUG "find_e820_area_size : e820 %d [%llx, %lx]\n", i, ei->addr, ei_last); if (addr < start) addr = round_up(start, align); -// printk(KERN_DEBUG "find_e820_area_size : 0 [%lx, %lx]\n", addr, ei_last); if (addr >= ei_last) continue; *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr+ *sizep <= ei_last) + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) ; last = addr + *sizep; -// printk(KERN_DEBUG "find_e820_area_size : 1 [%lx, %lx]\n", addr, last); if (last > ei_last) continue; return addr; -- cgit v1.2.3 From d93c870bad38e8daaaf9f7e900a13431f24becbb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 24 Mar 2008 16:43:21 -0700 Subject: x86: only enable interrupts when kernel state has been set up The sysenter path tries to enable interrupts immediately. Unfortunately this doesn't work in a paravirt environment, because not enough kernel state has been set up at that point (namely, pointing %fs to the kernel percpu data segment). To fix this, defer ENABLE_INTERRUPTS until after the kernel state has been set up. Unfortunately this means that we're running with interrupts disabled for a while without calling the IRQ tracing code, but that can't be called without setting up %fs either. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4b87c32b639f..a664d5726d8d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -291,10 +291,10 @@ ENTRY(ia32_sysenter_target) movl TSS_sysenter_sp0(%esp),%esp sysenter_past_esp: /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: + * Interrupts are disabled here, but we can't trace it until + * enough kernel state to call TRACE_IRQS_OFF can be called - but + * we immediately enable interrupts at that point anyway. */ - ENABLE_INTERRUPTS(CLBR_NONE) pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -302,6 +302,7 @@ sysenter_past_esp: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET esp, 0 pushfl + orl $X86_EFLAGS_IF, (%esp) CFI_ADJUST_CFA_OFFSET 4 pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 @@ -315,6 +316,11 @@ sysenter_past_esp: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + ENABLE_INTERRUPTS(CLBR_NONE) + /* * Load the potential sixth argument from user stack. * Careful about security. @@ -322,14 +328,12 @@ sysenter_past_esp: cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp + movl %ebp,PT_EBP(%esp) .section __ex_table,"a" .align 4 .long 1b,syscall_fault .previous - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -543,9 +547,6 @@ END(syscall_exit_work) RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: - pushl %eax # save orig_eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace -- cgit v1.2.3 From e5699a8231593d0e11e65ccf248549935304dab1 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 24 Mar 2008 14:48:36 -0700 Subject: x86: clean up vSMP detection vSMP detection: access pci config space early in boot to detect if the system is a vSMPowered box, and cache the result in a flag, so that is_vsmp_box() retrieves the value of the flag always. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 1e9a791dbe39..caf2a26f5cfd 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -108,25 +108,34 @@ static void __init set_vsmp_pv_ops(void) #endif #ifdef CONFIG_PCI -static int vsmp = -1; +static int is_vsmp = -1; -int is_vsmp_box(void) +static void __init detect_vsmp_box(void) { - if (vsmp != -1) - return vsmp; + is_vsmp = 0; - vsmp = 0; if (!early_pci_allowed()) - return vsmp; + return; - /* Check if we are running on a ScaleMP vSMP box */ + /* Check if we are running on a ScaleMP vSMPowered box */ if (read_pci_config(0, 0x1f, 0, PCI_VENDOR_ID) == (PCI_VENDOR_ID_SCALEMP | (PCI_DEVICE_ID_SCALEMP_VSMP_CTL << 16))) - vsmp = 1; + is_vsmp = 1; +} - return vsmp; +int is_vsmp_box(void) +{ + if (is_vsmp != -1) + return is_vsmp; + else { + WARN_ON_ONCE(1); + return 0; + } } #else +static int __init detect_vsmp_box(void) +{ +} int is_vsmp_box(void) { return 0; @@ -135,6 +144,7 @@ int is_vsmp_box(void) void __init vsmp_init(void) { + detect_vsmp_box(); if (!is_vsmp_box()) return; -- cgit v1.2.3 From 15a601eb9cdc2a9cc69d5fc745317805a85c064c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 12 Mar 2008 11:54:16 -0400 Subject: x86: fix test_poke for vmalloced pages * Ingo Molnar (mingo@elte.hu) wrote: > > * Mathieu Desnoyers wrote: > > > The shadow vmap for DEBUG_RODATA kernel text modification uses > > virt_to_page to get the pages from the pointer address. > > > > However, I think vmalloc_to_page would be required in case the page is > > used for modules. > > > > Since only the core kernel text is marked read-only, use > > kernel_text_address() to make sure we only shadow map the core kernel > > text, not modules. > > actually, i think we should mark module text readonly too. > Yes, but in the meantime, the x86 tree would need this patch to make kprobes work correctly on modules. I suspect that without this fix, with the enhanced hotplug and kprobes patch, kprobes will use text_poke to insert breakpoints in modules (vmalloced pages used), which will map the wrong pages and corrupt random kernel locations instead of updating the correct page. Work that would write protect the module pages should clearly be done, but it can come in a later time. We have to make sure we interact correctly with the page allocation debugging, as an example. Here is the patch against x86.git 2.6.25-rc5 : The shadow vmap for DEBUG_RODATA kernel text modification uses virt_to_page to get the pages from the pointer address. However, I think vmalloc_to_page would be required in case the page is used for modules. Since only the core kernel text is marked read-only, use kernel_text_address() to make sure we only shadow map the core kernel text, not modules. Signed-off-by: Mathieu Desnoyers CC: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 0c92ad4d257a..df4099dc1c68 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -515,7 +515,7 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) BUG_ON(len > sizeof(long)); BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) - ((long)addr & ~(sizeof(long) - 1))); - { + if (kernel_text_address((unsigned long)addr)) { struct page *pages[2] = { virt_to_page(addr), virt_to_page(addr + PAGE_SIZE) }; if (!pages[1]) @@ -526,6 +526,13 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); local_irq_restore(flags); vunmap(vaddr); + } else { + /* + * modules are in vmalloc'ed memory, always writable. + */ + local_irq_save(flags); + memcpy(addr, opcode, len); + local_irq_restore(flags); } sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but -- cgit v1.2.3 From 4039ae538030d1c5fc70a9c4e168a758d35b8159 Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Sun, 23 Mar 2008 00:00:06 +0300 Subject: x86: relocate_kernel_32.S - clear register in more elegant way Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/relocate_kernel_32.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index ec4e6a0e0c2e..fbc4fad23137 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -185,8 +185,7 @@ identity_mapped: /* Set cr4 to a known state: * Setting everything to zero seems safe. */ - movl %cr4, %eax - andl $0, %eax + xorl %eax, %eax movl %eax, %cr4 jmp 1f -- cgit v1.2.3 From a7bba17bf09e1c5bdbdd6c0ab0c7833baedf4653 Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Sun, 23 Mar 2008 00:00:07 +0300 Subject: x86: relocate_kernel - use PAGE_SIZE instead of numeric constant Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/relocate_kernel_32.S | 2 +- arch/x86/kernel/relocate_kernel_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fbc4fad23137..ce12bb8678ad 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -154,7 +154,7 @@ relocate_new_kernel: movl %eax, %cr3 /* setup a new stack at the end of the physical control page */ - lea 4096(%edi), %esp + lea PAGE_SIZE(%edi), %esp /* jump to identity mapped page */ movl %edi, %eax diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index e252c0ed9539..c2c8b9d6e241 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -159,7 +159,7 @@ relocate_new_kernel: movq %r9, %cr3 /* setup a new stack at the end of the physical control page */ - lea 4096(%r8), %rsp + lea PAGE_SIZE(%r8), %rsp /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 -- cgit v1.2.3 From fd3af53122e616c0ddba44a3da6d1c1877f72d29 Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Sun, 23 Mar 2008 00:00:08 +0300 Subject: x86: relocate_kernel - use predefined macroses for processor state Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/relocate_kernel_32.S | 17 +++++++++-------- arch/x86/kernel/relocate_kernel_64.S | 32 +++++++++++--------------------- 2 files changed, 20 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index ce12bb8678ad..a7ecc8e0bc67 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -9,6 +9,7 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function @@ -167,16 +168,16 @@ identity_mapped: pushl %edx /* Set cr0 to a known state: - * 31 0 == Paging disabled - * 18 0 == Alignment check disabled - * 16 0 == Write protect disabled - * 3 0 == No task switch - * 2 0 == Don't do FP software emulation. - * 0 1 == Proctected mode enabled + * - Paging disabled + * - Alignment check disabled + * - Write protect disabled + * - No task switch + * - Don't do FP software emulation. + * - Proctected mode enabled */ movl %cr0, %eax - andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax - orl $(1<<0), %eax + andl $~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax + orl $(X86_CR0_PE), %eax movl %eax, %cr0 /* clear cr4 if applicable */ diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c2c8b9d6e241..0c93a945b32e 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -9,6 +9,7 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function @@ -171,33 +172,22 @@ identity_mapped: pushq %rdx /* Set cr0 to a known state: - * 31 1 == Paging enabled - * 18 0 == Alignment check disabled - * 16 0 == Write protect disabled - * 3 0 == No task switch - * 2 0 == Don't do FP software emulation. - * 0 1 == Proctected mode enabled + * - Paging enabled + * - Alignment check disabled + * - Write protect disabled + * - No task switch + * - Don't do FP software emulation. + * - Proctected mode enabled */ movq %cr0, %rax - andq $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax - orl $((1<<31)|(1<<0)), %eax + andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax + orl $(X86_CR0_PG | X86_CR0_PE), %eax movq %rax, %cr0 /* Set cr4 to a known state: - * 10 0 == xmm exceptions disabled - * 9 0 == xmm registers instructions disabled - * 8 0 == performance monitoring counter disabled - * 7 0 == page global disabled - * 6 0 == machine check exceptions disabled - * 5 1 == physical address extension enabled - * 4 0 == page size extensions disabled - * 3 0 == Debug extensions disabled - * 2 0 == Time stamp disable (disabled) - * 1 0 == Protected mode virtual interrupts disabled - * 0 0 == VME disabled + * - physical address extension enabled */ - - movq $((1<<5)), %rax + movq $X86_CR4_PAE, %rax movq %rax, %cr4 jmp 1f -- cgit v1.2.3 From 366932deb335f0b84a08463c5c912bd42ac3397a Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Sun, 23 Mar 2008 00:00:09 +0300 Subject: x86: relocate_kernel - use predefined macroses for page attributes Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/relocate_kernel_32.S | 5 +++-- arch/x86/kernel/relocate_kernel_64.S | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index a7ecc8e0bc67..c30fe25d470d 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -10,14 +10,15 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function */ #define PTR(x) (x << 2) -#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ -#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ +#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define PAE_PGD_ATTR (_PAGE_PRESENT) .text .align PAGE_SIZE diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 0c93a945b32e..f5afe665a82b 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -10,13 +10,14 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function */ #define PTR(x) (x << 3) -#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ +#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) .text .align PAGE_SIZE -- cgit v1.2.3 From 5524ea320d80e3ac6aeeec44216660831c76da08 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 11 Mar 2008 02:23:20 +0100 Subject: x86: don't set up early exception handlers for external interrupts All of early setup runs with interrupts disabled, so there is no need to set up early exception handlers for vectors >= 32 This saves some minor text size. Signed-off-by: Andi Kleen Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/head_64.S | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 48be76cda93b..d6d54faa84df 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -127,7 +127,7 @@ void __init x86_64_start_kernel(char * real_mode_data) /* Cleanup the over mapped high alias */ cleanup_highmap(); - for (i = 0; i < IDT_ENTRIES; i++) { + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { #ifdef CONFIG_EARLY_PRINTK set_intr_gate(i, &early_idt_handlers[i]); #else diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 017216916dff..2c0abe0e3c68 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -278,10 +278,8 @@ bad_address: .globl early_idt_handlers early_idt_handlers: - early_idt_tramp 0, 63 - early_idt_tramp 64, 127 - early_idt_tramp 128, 191 - early_idt_tramp 192, 255 + .set maxe, NUM_EXCEPTION_VECTORS-1 + early_idt_tramp 0, maxe #endif ENTRY(early_idt_handler) -- cgit v1.2.3 From 749c970ae9fa43b4fcf17ac53022a953007d58f4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 11 Mar 2008 02:23:22 +0100 Subject: x86: replace early exception setup macro recursion with loop The early exception handlers are currently set up using a macro recursion. There is only one user left. Replace the macro with a standard loop in place. Noop patch, just a cleanup. [ tglx@linutronix.de: simplified ] Signed-off-by: Andi Kleen Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2c0abe0e3c68..5e0391229502 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -268,18 +268,14 @@ bad_address: jmp bad_address #ifdef CONFIG_EARLY_PRINTK -.macro early_idt_tramp first, last - .ifgt \last-\first - early_idt_tramp \first, \last-1 - .endif - movl $\last,%esi - jmp early_idt_handler -.endm - .globl early_idt_handlers early_idt_handlers: - .set maxe, NUM_EXCEPTION_VECTORS-1 - early_idt_tramp 0, maxe + i = 0 + .rept NUM_EXCEPTION_VECTORS + movl $i, %esi + jmp early_idt_handler + i = i + 1 + .endr #endif ENTRY(early_idt_handler) -- cgit v1.2.3 From 41bd4eac748f39d7f3ed770fae3e595a747172bd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 11 Mar 2008 02:23:21 +0100 Subject: x86: move early exception handlers into init.text Currently they are in .text.head because the rest of head_64.S. .text.head is not removed as init data, but the early exception handlers should be because they are not needed after early boot of the BP. So move them over. Signed-off-by: Andi Kleen Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 5e0391229502..c1d7a877d814 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -267,6 +267,7 @@ ENTRY(secondary_startup_64) bad_address: jmp bad_address + .section ".init.text","ax" #ifdef CONFIG_EARLY_PRINTK .globl early_idt_handlers early_idt_handlers: @@ -321,6 +322,7 @@ early_idt_msg: early_idt_ripmsg: .asciz "RIP %s\n" #endif /* CONFIG_EARLY_PRINTK */ + .previous .balign PAGE_SIZE -- cgit v1.2.3 From 67794292c8615b05f46419ba8d4fd99e7c9a5db9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Mar 2008 21:27:10 +0100 Subject: x86: replace the now useless max_pfn_mapped define Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/e820_64.c | 28 ++++++++++++++-------------- arch/x86/kernel/setup_64.c | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 956b60f3ebd5..e277c370246d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -115,7 +115,7 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) if (!phys_addr || !size) return NULL; - if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) + if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE) return __va(phys_addr); return NULL; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index d6ada0833876..a720f3d5ed9d 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -36,11 +36,11 @@ struct e820map e820; unsigned long end_pfn; /* - * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. - * The direct mapping extends to end_pfn_map, so that we can directly access + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. + * The direct mapping extends to max_pfn_mapped, so that we can directly access * apertures, ACPI and other tables without having to play with fixmaps. */ -unsigned long end_pfn_map; +unsigned long max_pfn_mapped; /* * Last pfn which the user wants to use. @@ -281,16 +281,16 @@ unsigned long __init e820_end_of_ram(void) end_pfn = find_max_pfn_with_active_regions(); - if (end_pfn > end_pfn_map) - end_pfn_map = end_pfn; - if (end_pfn_map > MAXMEM>>PAGE_SHIFT) - end_pfn_map = MAXMEM>>PAGE_SHIFT; + if (end_pfn > max_pfn_mapped) + max_pfn_mapped = end_pfn; + if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) + max_pfn_mapped = MAXMEM>>PAGE_SHIFT; if (end_pfn > end_user_pfn) end_pfn = end_user_pfn; - if (end_pfn > end_pfn_map) - end_pfn = end_pfn_map; + if (end_pfn > max_pfn_mapped) + end_pfn = max_pfn_mapped; - printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map); + printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); return end_pfn; } @@ -366,9 +366,9 @@ static int __init e820_find_active_region(const struct e820entry *ei, if (*ei_startpfn >= *ei_endpfn) return 0; - /* Check if end_pfn_map should be updated */ - if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map) - end_pfn_map = *ei_endpfn; + /* Check if max_pfn_mapped should be updated */ + if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped) + max_pfn_mapped = *ei_endpfn; /* Skip if map is outside the node */ if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || @@ -759,7 +759,7 @@ static int __init parse_memmap_opt(char *p) saved_max_pfn = e820_end_of_ram(); remove_all_active_ranges(); #endif - end_pfn_map = 0; + max_pfn_mapped = 0; e820.nr_map = 0; userdef = 1; return 0; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c6fe1e4bc7c2..413b8fc31545 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -347,7 +347,7 @@ void __init setup_arch(char **cmdline_p) check_efer(); - init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); + init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); if (efi_enabled) efi_init(); -- cgit v1.2.3 From cc6150321903ca4c3bc9d53b0cdafb05d77d64d0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Mar 2008 03:53:28 +0100 Subject: x86: account overlapped mappings in max_pfn_mapped When end_pfn is not aligned to 2MB (or 1GB) then the kernel might map more memory than end_pfn. Account this in max_pfn_mapped. Signed-off-by: Andi Kleen Cc: andreas.herrmann3@amd.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 413b8fc31545..3d76dbd9f2c0 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -347,7 +347,7 @@ void __init setup_arch(char **cmdline_p) check_efer(); - init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); + max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); if (efi_enabled) efi_init(); -- cgit v1.2.3 From f5c24a7fd0798d636af184cc7032e7e0cb149112 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Mar 2008 03:53:30 +0100 Subject: x86: don't use large pages to map the first 2/4MB of memory Intel recommends to not use large pages for the first 1MB of the physical memory because there are fixed size MTRRs there which cause splitups in the TLBs. On AMD doing so is also a good idea. The implementation is a little different between 32bit and 64bit. On 32bit I just taught the initial page table set up about this because it was very simple to do. This also has the advantage that the risk of a prefetch ever seeing the page even if it only exists for a short time is minimized. On 64bit that is not quite possible, so use set_memory_4k() a little later (in check_bugs) instead. Signed-off-by: Andi Kleen Acked-by: andreas.herrmann3@amd.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/bugs_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c index 60207e999a04..9a3ed0649d4e 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/bugs_64.c @@ -9,6 +9,7 @@ #include #include #include +#include void __init check_bugs(void) { @@ -18,4 +19,15 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif alternative_instructions(); + + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); } -- cgit v1.2.3 From 8346ea17aa20e9864b0f7dc03d55f3cd5620b8c1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Mar 2008 03:53:32 +0100 Subject: x86: split large page mapping for AMD TSEG On AMD SMM protected memory is part of the address map, but handled internally like an MTRR. That leads to large pages getting split internally which has some performance implications. Check for the AMD TSEG MSR and split the large page mapping on that area explicitely if it is part of the direct mapping. There is also SMM ASEG, but it is in the first 1MB and already covered by the earlier split first page patch. Idea for this came from an earlier patch by Andreas Herrmann On a RevF dual Socket Opteron system kernbench shows a clear improvement from this: (together with the earlier patches in this series, especially the split first 2MB patch) [lower is better] no split stddev split stddev delta Elapsed Time 87.146 (0.727516) 84.296 (1.09098) -3.2% User Time 274.537 (4.05226) 273.692 (3.34344) -0.3% System Time 34.907 (0.42492) 34.508 (0.26832) -1.1% Percent CPU 322.5 (38.3007) 326.5 (44.5128) +1.2% => About 3.2% improvement in elapsed time for kernbench. With GB pages on AMD Fam1h the impact of splitting is much higher of course, since it would split two full GB pages (together with the first 1MB split patch) instead of two 2MB pages. I could not benchmark a clear difference in kernbench on gbpages, so I kept it disabled for that case That was only limited benchmarking of course, so if someone was interested in running more tests for the gbpages case that could be revisited (contributions welcome) I didn't bother implementing this for 32bit because it is very unlikely the 32bit lowmem mapping overlaps into the TSEG near 4GB and the 2MB low split is already handled for both. [ mingo@elte.hu: do it on gbpages kernels too, there's no clear reason why it shouldnt help there. ] Signed-off-by: Andi Kleen Acked-by: andreas.herrmann3@amd.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 3d76dbd9f2c0..b5425979501c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -729,6 +729,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (amd_apic_timer_broken()) disable_apic_timer = 1; + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && + (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) + set_memory_4k((unsigned long)__va(tseg), 1); + } } void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 5af5573ee06c361378e22a9dd71dae0320e841f7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 13:28:56 -0300 Subject: x86: move ipi definitions to mach_ipi.h take them out of the x86_64-only asm/mach_apic.h Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 ++ arch/x86/kernel/crash.c | 4 ---- arch/x86/kernel/io_apic_64.c | 2 ++ arch/x86/kernel/smp.c | 6 +----- arch/x86/kernel/tlb_64.c | 3 ++- 5 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5362cfd30ecd..206278f1c6f4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -41,6 +41,8 @@ #include #include +#include + int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9a5fa0abfcc7..2251d0ae9570 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -26,11 +26,7 @@ #include #include -#ifdef CONFIG_X86_32 #include -#else -#include -#endif /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 1627c0d53e0b..7d5cdf320eba 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -50,6 +50,8 @@ #include #include +#include + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 16c52aaaca35..8f75893a6467 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -26,12 +26,8 @@ #include #include #include -#ifdef CONFIG_X86_32 -#include #include -#else -#include -#endif +#include /* * Some notes on x86 processor bugs affecting SMP operation: * diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 615d84817758..1558e513757e 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -11,11 +11,12 @@ #include #include #include -#include #include #include #include #include + +#include /* * Smarter SMP flushing macros. * c/o Linus Torvalds. -- cgit v1.2.3 From ab68ed98f665436601feec853c8f400d28c39e92 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 25 Mar 2008 22:16:32 +0300 Subject: x86: entry_32.S - use flags from processor-flags.h By including processor-flags.h we are allowed to use predefined macroses instead of keeping own ones Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a664d5726d8d..9ba49a26dff8 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -51,6 +51,7 @@ #include #include #include +#include #include "irq_vectors.h" /* @@ -68,13 +69,6 @@ #define nr_syscalls ((syscall_table_size)/4) -CF_MASK = 0x00000001 -TF_MASK = 0x00000100 -IF_MASK = 0x00000200 -DF_MASK = 0x00000400 -NT_MASK = 0x00004000 -VM_MASK = 0x00020000 - #ifdef CONFIG_PREEMPT #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else @@ -84,7 +78,7 @@ VM_MASK = 0x00020000 .macro TRACE_IRQS_IRET #ifdef CONFIG_TRACE_IRQFLAGS - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? jz 1f TRACE_IRQS_ON 1: @@ -246,7 +240,7 @@ ret_from_intr: check_userspace: movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al - andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax jb resume_kernel # not returning to v8086 or userspace @@ -271,7 +265,7 @@ need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq jmp need_resched @@ -388,7 +382,7 @@ syscall_exit: # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF - testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit + testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit jz no_singlestep orl $_TIF_SINGLESTEP,TI_flags(%ebp) no_singlestep: @@ -403,7 +397,7 @@ restore_all: # See comments in process.c:copy_thread() for details. movb PT_OLDSS(%esp), %ah movb PT_CS(%esp), %al - andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS @@ -490,7 +484,7 @@ work_resched: work_notifysig: # deal with pending signals and # notify-resume requests #ifdef CONFIG_VM86 - testl $VM_MASK, PT_EFLAGS(%esp) + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space -- cgit v1.2.3 From dd46e3ca73d136aa7f9f1813e4cbb6934c3611cc Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:10:46 -0300 Subject: x86: move apic declarations to mach_apic.h take them out of the x86_64-specific asm/mach_apic.h Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/io_apic_64.c | 2 +- arch/x86/kernel/setup_64.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 206278f1c6f4..7dd6250aaf6c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -42,6 +41,7 @@ #include #include +#include int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 33d38f8305ee..0173065dc3b7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -4,8 +4,8 @@ #include #include #include -#include +#include #include "cpu.h" /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 7d5cdf320eba..6dd33628f28a 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -51,6 +50,7 @@ #include #include +#include struct irq_cfg { cpumask_t domain; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b5425979501c..540686be35d0 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -67,6 +66,7 @@ #include #include +#include #ifdef CONFIG_PARAVIRT #include #else -- cgit v1.2.3 From 0e03eb86b51b21054aea01ada1d03e9c2265dd20 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 26 Mar 2008 12:09:16 -0400 Subject: x86: Centaur Isaiah processor to use sysenter in 64-bit compatibility mode rather than syscall Upcoming 64 bit processors from Centaur can use sysenter. Signed-off-by: Dave Jones Signed-off-by: Jesse Ahrens Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 540686be35d0..b80300710c08 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -885,6 +885,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) srat_detect_node(); } +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x6 && c->x86_model >= 0xf) + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); +} + +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + if (c->x86 == 0x6 && c->x86_model >= 0xf) { + c->x86_cache_alignment = c->x86_clflush_size * 2; + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +} + static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -893,6 +919,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) c->x86_vendor = X86_VENDOR_AMD; else if (!strcmp(v, "GenuineIntel")) c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "CentaurHauls")) + c->x86_vendor = X86_VENDOR_CENTAUR; else c->x86_vendor = X86_VENDOR_UNKNOWN; } @@ -989,6 +1017,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) set_cpu_cap(c, X86_FEATURE_PAT); break; + case X86_VENDOR_CENTAUR: + early_init_centaur(c); + break; } } @@ -1025,6 +1056,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) init_intel(c); break; + case X86_VENDOR_CENTAUR: + init_centaur(c); + break; + case X86_VENDOR_UNKNOWN: default: display_cacheinfo(c); -- cgit v1.2.3 From 2df297261903249f5ac2d3d14ededbda229397e2 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:53:54 +0300 Subject: x86: move es7000_plat closer to its user Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 1b225cea5d7b..4cc325edc70d 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -867,8 +867,6 @@ void __init find_smp_config (void) smp_scan_config(address, 0x400); } -int es7000_plat; - /* -------------------------------------------------------------------------- ACPI-based MP Configuration -------------------------------------------------------------------------- */ @@ -1029,6 +1027,8 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) panic("Max # of irq sources exceeded!\n"); } +int es7000_plat; + void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; -- cgit v1.2.3 From 987dd2d4d465e80e00d6a0b16787a78aa75dd66a Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:06 +0300 Subject: x86: don't call MP_processor_info for disabled cpu Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 4cc325edc70d..c487bc99d7a3 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -895,6 +895,10 @@ void __cpuinit mp_register_lapic (u8 id, u8 enabled) id, MAX_APICS); return; } + if (!enabled) { + ++disabled_cpus; + return; + } if (id == boot_cpu_physical_apicid) boot_cpu = 1; -- cgit v1.2.3 From c853c67690448415af2d204062028d1456f524de Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:13 +0300 Subject: x86: separate generic_processor_info into its own function Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 158 ++++++++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 76 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index c487bc99d7a3..b0aed978eef1 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -109,12 +109,91 @@ static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; #endif -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit generic_processor_info(int apicid, int version) { - int ver, apicid, cpu; + int cpu; cpumask_t tmp_map; physid_mask_t phys_cpu; + + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + + if (apicid == boot_cpu_physical_apicid) + /* + * x86_bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + cpu = 0; + + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (num_processors > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#ifdef CONFIG_SMP + /* are we being called early in kernel startup? */ + if (x86_cpu_to_apicid_early_ptr) { + u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; + u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + + cpu_to_apicid[cpu] = apicid; + bios_cpu_apicid[cpu] = apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = apicid; + per_cpu(x86_bios_cpu_apicid, cpu) = apicid; + } +#endif + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); +} + +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +{ + int apicid; + if (!(m->mpc_cpuflag & CPU_ENABLED)) { disabled_cpus++; return; @@ -184,80 +263,7 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) boot_cpu_physical_apicid = m->mpc_apicid; } - ver = m->mpc_apicver; - - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; - - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(ver)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#ifdef CONFIG_SMP - /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; - - cpu_to_apicid[cpu] = m->mpc_apicid; - bios_cpu_apicid[cpu] = m->mpc_apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; - } -#endif - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); + generic_processor_info(apicid, m->mpc_apicver); } static void __init MP_bus_info (struct mpc_config_bus *m) -- cgit v1.2.3 From 08bef9d337f26747b9520278872d20f15983fcda Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:20 +0300 Subject: x86: don't use MP_processor_info for ACPI mode Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index b0aed978eef1..9f23018190c5 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -114,7 +114,6 @@ static void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; physid_mask_t phys_cpu; - /* * Validate version @@ -893,34 +892,18 @@ void __init mp_register_lapic_address(u64 address) void __cpuinit mp_register_lapic (u8 id, u8 enabled) { - struct mpc_config_processor processor; - int boot_cpu = 0; - if (MAX_APICS - id <= 0) { printk(KERN_WARNING "Processor #%d invalid (max %d)\n", id, MAX_APICS); return; } + if (!enabled) { ++disabled_cpus; return; } - if (id == boot_cpu_physical_apicid) - boot_cpu = 1; - - processor.mpc_type = MP_PROCESSOR; - processor.mpc_apicid = id; - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - - MP_processor_info(&processor); + generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR))); } #ifdef CONFIG_X86_IO_APIC -- cgit v1.2.3 From e81b2c62d66068d210ddeacd77076068184d414a Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:31 +0300 Subject: x86: move apic_ver array to apic_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +++ arch/x86/kernel/mpparse_32.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6f506020bd7d..bdfffb091d12 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1225,6 +1225,9 @@ fake_ioapic_page: * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ + +int apic_version[MAX_APICS]; + int __init APIC_init_uniprocessor(void) { if (enable_local_apic < 0) diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 9f23018190c5..18882697987e 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -41,7 +41,6 @@ unsigned int __cpuinitdata maxcpus = NR_CPUS; * Various Linux-internal data structures created from the * MP-table. */ -int apic_version [MAX_APICS]; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) int mp_bus_id_to_type [MAX_MP_BUSSES]; #endif -- cgit v1.2.3 From 8f6e2ca9f862cb3738ad83fb18c572d8a59c0849 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:38 +0300 Subject: x86: move mp_lapic_addr to apic_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ arch/x86/kernel/mpparse_32.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index bdfffb091d12..68ea75fa6d35 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -50,6 +50,8 @@ # error SPURIOUS_APIC_VECTOR definition error #endif +unsigned long mp_lapic_addr; + /* * Knob to control our willingness to enable the local APIC. * diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 18882697987e..728aa9900934 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -60,7 +60,6 @@ int mp_irq_entries; int nr_ioapics; int pic_mode; -unsigned long mp_lapic_addr; unsigned int def_to_bigsmp = 0; -- cgit v1.2.3 From 40014bace17ba393409fd8a4915a87e43687aac8 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:44 +0300 Subject: x86: move phys_cpu_present_map to smpboot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 +++- arch/x86/kernel/smpboot.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 728aa9900934..f7eceabc7da9 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -70,8 +70,10 @@ unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; -/* Bitmask of physically existing CPUs */ +/* Make it easy to share the UP and SMP code: */ +#ifndef CONFIG_X86_SMP physid_mask_t phys_cpu_present_map; +#endif #ifndef CONFIG_SMP DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 61b9a5b6fc07..8b6eefd9e906 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -85,6 +85,10 @@ u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata void *x86_bios_cpu_apicid_early_ptr; DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + u8 apicid_2_node[MAX_APICID]; #endif -- cgit v1.2.3 From 2bb9e9d7c1b03454665cd99f7d73e67139cdf2e6 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:50 +0300 Subject: x86: move num_processors to smpboot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 5 ++++- arch/x86/kernel/smpboot.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index f7eceabc7da9..4d810b0384a5 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -65,8 +65,11 @@ unsigned int def_to_bigsmp = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -/* Internal processor count */ + +/* Make it easy to share the UP and SMP code: */ +#ifndef CONFIG_X86_SMP unsigned int num_processors; +#endif unsigned disabled_cpus __cpuinitdata; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b6eefd9e906..e1288c2626f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -86,6 +86,9 @@ void *x86_bios_cpu_apicid_early_ptr; DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +/* Internal processor count */ +unsigned int num_processors; + /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -- cgit v1.2.3 From 53c4c793b30bbf6e1a25cab61790b18f205dd365 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:54:57 +0300 Subject: x86: move disabled_cpus to smpboot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 3 +-- arch/x86/kernel/smpboot.c | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 4d810b0384a5..bf29dcc37de7 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -69,9 +69,8 @@ unsigned int boot_cpu_physical_apicid = -1U; /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP unsigned int num_processors; -#endif - unsigned disabled_cpus __cpuinitdata; +#endif /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e1288c2626f8..d3402e2c57eb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,6 +88,7 @@ EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); /* Internal processor count */ unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -- cgit v1.2.3 From 059c9640b57cb8e70c60de141ec817b450431816 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 28 Mar 2008 11:57:55 +0100 Subject: x86: mpparse, move disabled cpus to smpboot.c, fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index bf29dcc37de7..0f877a572cf0 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -196,7 +196,9 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) int apicid; if (!(m->mpc_cpuflag & CPU_ENABLED)) { +#ifdef CONFIG_X86_SMP disabled_cpus++; +#endif return; } @@ -901,7 +903,9 @@ void __cpuinit mp_register_lapic (u8 id, u8 enabled) } if (!enabled) { +#ifdef CONFIG_X86_SMP ++disabled_cpus; +#endif return; } -- cgit v1.2.3 From 0c254e38d294d3720588e2a1fd954d828073f1dc Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:04 +0300 Subject: x86: move def_to_bigsmp to setup_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 2 -- arch/x86/kernel/setup_32.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 0f877a572cf0..e5376dc5d0cc 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -61,8 +61,6 @@ int nr_ioapics; int pic_mode; -unsigned int def_to_bigsmp = 0; - /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 58f3c1fbc5c3..4b198d9d0de3 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -155,6 +155,8 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +unsigned int def_to_bigsmp; + #ifndef CONFIG_X86_PAE unsigned long mmu_cr4_features; #else -- cgit v1.2.3 From 837e0e7a7f574220c87c552cca9f425575418621 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:10 +0300 Subject: x86: move boot_cpu_physical_apicid to apic_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +++ arch/x86/kernel/mpparse_32.c | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 68ea75fa6d35..f0abd59a2a3c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -52,6 +52,9 @@ unsigned long mp_lapic_addr; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; + /* * Knob to control our willingness to enable the local APIC. * diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index e5376dc5d0cc..9143810bb637 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -61,13 +61,11 @@ int nr_ioapics; int pic_mode; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; - /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; +unsigned int boot_cpu_physical_apicid = -1U; #endif /* Make it easy to share the UP and SMP code: */ -- cgit v1.2.3 From fae9811b775655a02dcb51fa0b6423b546468bd1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 28 Mar 2008 12:22:10 +0100 Subject: x86: mpparse, move boot cpu physical apicid to apic_32.c, fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 9143810bb637..9b61b50a96c9 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -65,8 +65,10 @@ int pic_mode; #ifndef CONFIG_X86_SMP unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; +#ifndef CONFIG_X86_LOCAL_APIC unsigned int boot_cpu_physical_apicid = -1U; #endif +#endif /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP -- cgit v1.2.3 From acff5a768935f7f39e4e3be03940d70c005ffe96 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:16 +0300 Subject: x86: move x86_bios_cpu_apicid to apic_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +++ arch/x86/kernel/mpparse_32.c | 4 ---- arch/x86/kernel/smpboot.c | 2 -- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f0abd59a2a3c..65036bbaf058 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -55,6 +55,9 @@ unsigned long mp_lapic_addr; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; +DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + /* * Knob to control our willingness to enable the local APIC. * diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 9b61b50a96c9..c79d6e06c3fa 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -75,10 +75,6 @@ unsigned int boot_cpu_physical_apicid = -1U; physid_mask_t phys_cpu_present_map; #endif -#ifndef CONFIG_SMP -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -#endif - /* * Intel MP BIOS table parsing routines: */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d3402e2c57eb..7bcee1584b50 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -83,8 +83,6 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); /* Internal processor count */ unsigned int num_processors; -- cgit v1.2.3 From 903dcb5a1bd0ef2b09d756f646e367cd12659b6f Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:22 +0300 Subject: x86: move generic_processor_info to apic_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 82 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/mpparse_32.c | 81 ------------------------------------------- 2 files changed, 82 insertions(+), 81 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 65036bbaf058..a99398f71234 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1469,6 +1469,88 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } +unsigned int __cpuinitdata maxcpus = NR_CPUS; + +void __cpuinit generic_processor_info(int apicid, int version) +{ + int cpu; + cpumask_t tmp_map; + physid_mask_t phys_cpu; + + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + + if (apicid == boot_cpu_physical_apicid) + /* + * x86_bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + cpu = 0; + + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (num_processors > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#ifdef CONFIG_SMP + /* are we being called early in kernel startup? */ + if (x86_cpu_to_apicid_early_ptr) { + u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; + u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + + cpu_to_apicid[cpu] = apicid; + bios_cpu_apicid[cpu] = apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = apicid; + per_cpu(x86_bios_cpu_apicid, cpu) = apicid; + } +#endif + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); +} + /* * Power management */ diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index c79d6e06c3fa..cd4522b3e90e 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -35,7 +35,6 @@ /* Have we found an MP table */ int smp_found_config; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -105,86 +104,6 @@ static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; #endif -static void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - physid_mask_t phys_cpu; - - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - if (apicid == boot_cpu_physical_apicid) - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#ifdef CONFIG_SMP - /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } -#endif - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int apicid; -- cgit v1.2.3 From 8ccab29ca8c441ae00d878d2f0000275f430f8a5 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:28 +0300 Subject: x86: don't call MP_processor_info for disabled cpu (64bit) Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 83a36eed081b..3681b9d8f557 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -678,6 +678,10 @@ void __cpuinit mp_register_lapic(u8 id, u8 enabled) struct mpc_config_processor processor; int boot_cpu = 0; + if (!enabled) { + ++disabled_cpus; + return; + } if (id == boot_cpu_physical_apicid) boot_cpu = 1; -- cgit v1.2.3 From 0e01c00c1fadd21356a6cf57d6680497256e1a01 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:34 +0300 Subject: x86: separate generic_processor_info into its own function (64bit) Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 3681b9d8f557..7f8ece4190e6 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -93,22 +93,10 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; - char *bootup_cpu = ""; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) { - disabled_cpus++; - return; - } - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - bootup_cpu = " (Bootup-CPU)"; - boot_cpu_physical_apicid = m->mpc_apicid; - } - - printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." @@ -126,8 +114,8 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - physid_set(m->mpc_apicid, phys_cpu_present_map); - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first @@ -140,17 +128,34 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; - cpu_to_apicid[cpu] = m->mpc_apicid; - bios_cpu_apicid[cpu] = m->mpc_apicid; + cpu_to_apicid[cpu] = apicid; + bios_cpu_apicid[cpu] = apicid; } else { - per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; + per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +{ + char *bootup_cpu = ""; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) { + disabled_cpus++; + return; + } + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + bootup_cpu = " (Bootup-CPU)"; + boot_cpu_physical_apicid = m->mpc_apicid; + } + + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); + generic_processor_info(m->mpc_apicid, 0); +} + static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; -- cgit v1.2.3 From 468e85b9594ed3000a7076f1caf27aa0cf7799fc Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:41 +0300 Subject: x86: don't use MP_processor_info for ACPI mode (64bit) Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 7f8ece4190e6..4da834e1188e 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -680,27 +680,12 @@ void __init mp_register_lapic_address(u64 address) void __cpuinit mp_register_lapic(u8 id, u8 enabled) { - struct mpc_config_processor processor; - int boot_cpu = 0; - if (!enabled) { ++disabled_cpus; return; } - if (id == boot_cpu_physical_apicid) - boot_cpu = 1; - - processor.mpc_type = MP_PROCESSOR; - processor.mpc_apicid = id; - processor.mpc_apicver = 0; - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); - processor.mpc_cpufeature = 0; - processor.mpc_featureflag = 0; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - MP_processor_info(&processor); + generic_processor_info(id, 0); } #define MP_ISA_BUS 0 -- cgit v1.2.3 From 3f530709d907d93a4d6881e8190916028181a840 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:47 +0300 Subject: x86: move mp_lapic_addr to apic_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 ++ arch/x86/kernel/mpparse_64.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7dd6250aaf6c..0794646e68f0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -85,6 +85,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; +unsigned long mp_lapic_addr; + /* * Get the LAPIC version */ diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 4da834e1188e..a91e21ebf1ca 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -55,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; int mp_irq_entries; int nr_ioapics; -unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -- cgit v1.2.3 From 7abb3cca33fe220abaf680afcd247370749622ee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 8 Apr 2008 12:20:07 +0200 Subject: x86: move phys cpu present map to smpboot.c, 64-bit, prepare Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index a91e21ebf1ca..f044e98800ef 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -68,9 +68,11 @@ unsigned disabled_cpus __cpuinitdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; +#ifdef CONFIG_SMP u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = {[0 ... NR_CPUS - 1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; +#endif DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -- cgit v1.2.3 From 1d8554326533568c7e9d5285600c3d0c027b45cc Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:53 +0300 Subject: x86: move phys_cpu_present_map to smpboot.c (64bit) Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 3 --- arch/x86/kernel/smpboot.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index f044e98800ef..8e8e38f2b182 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -65,9 +65,6 @@ unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; - #ifdef CONFIG_SMP u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = {[0 ... NR_CPUS - 1] = BAD_APICID }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7bcee1584b50..eee7768de2ae 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,12 +88,12 @@ void *x86_bios_cpu_apicid_early_ptr; unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; - u8 apicid_2_node[MAX_APICID]; #endif +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -- cgit v1.2.3 From 7b8cbd2c2f1bf9e3090d3c3fc09330ed1ca28d25 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:55:59 +0300 Subject: x86: move num_processors to smpboot.c (64 bit) Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 3 --- arch/x86/kernel/smpboot.c | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8e8e38f2b182..71f098414d0b 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -60,9 +60,6 @@ int nr_ioapics; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); -/* Internal processor count */ -unsigned int num_processors; - unsigned disabled_cpus __cpuinitdata; #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eee7768de2ae..fd0bdd36f4ea 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -84,13 +84,14 @@ u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; -/* Internal processor count */ -unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; u8 apicid_2_node[MAX_APICID]; #endif +/* Internal processor count */ +unsigned int num_processors; + /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -- cgit v1.2.3 From 3103623eed1a3ea4a36ee26725842a8038760648 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:56:06 +0300 Subject: x86: move disabled_cpus to smpboot.c (64bit) Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 2 -- arch/x86/kernel/smpboot.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 71f098414d0b..a1d8d4432988 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -60,8 +60,6 @@ int nr_ioapics; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); -unsigned disabled_cpus __cpuinitdata; - #ifdef CONFIG_SMP u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = {[0 ... NR_CPUS - 1] = BAD_APICID }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd0bdd36f4ea..f45d740b1b6a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -84,8 +84,6 @@ u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; -unsigned disabled_cpus __cpuinitdata; - u8 apicid_2_node[MAX_APICID]; #endif @@ -98,6 +96,8 @@ physid_mask_t phys_cpu_present_map; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; +unsigned disabled_cpus __cpuinitdata; + /* Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality * for idle threads. -- cgit v1.2.3 From 86cc0d916a9cc55b0b46a9b31e9379cd3c9e10dc Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:56:12 +0300 Subject: x86: move boot_cpu_physical_apicid to apic_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 4 ++++ arch/x86/kernel/mpparse_64.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0794646e68f0..d7d3594f93e3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -87,6 +87,10 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); + /* * Get the LAPIC version */ diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index a1d8d4432988..49e3bfe01022 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -56,10 +56,6 @@ int mp_irq_entries; int nr_ioapics; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); - #ifdef CONFIG_SMP u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = {[0 ... NR_CPUS - 1] = BAD_APICID }; -- cgit v1.2.3 From be8a5685e4cdb904e6542e741fcc3bae1becb8ee Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 27 Mar 2008 23:56:19 +0300 Subject: x86: move generic_processor_info to apic_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 47 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/mpparse_64.c | 49 +------------------------------------------- 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d7d3594f93e3..4ee521ff0a3e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -91,6 +91,7 @@ unsigned long mp_lapic_addr; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); +unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ @@ -1057,6 +1058,52 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT1, value); } +void __cpuinit generic_processor_info(int apicid, int version) +{ + int cpu; + cpumask_t tmp_map; + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { + /* + * x86_bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + cpu = 0; + } + /* are we being called early in kernel startup? */ + if (x86_cpu_to_apicid_early_ptr) { + u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; + u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + + cpu_to_apicid[cpu] = apicid; + bios_cpu_apicid[cpu] = apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = apicid; + per_cpu(x86_bios_cpu_apicid, cpu) = apicid; + } + + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); +} + /* * Power management */ diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 49e3bfe01022..d62294003036 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -35,7 +35,6 @@ /* Have we found an MP table */ int smp_found_config; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -82,52 +81,6 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - } - /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } - - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { char *bootup_cpu = ""; @@ -666,7 +619,6 @@ void __init mp_register_lapic_address(u64 address) if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); } - void __cpuinit mp_register_lapic(u8 id, u8 enabled) { if (!enabled) { @@ -677,6 +629,7 @@ void __cpuinit mp_register_lapic(u8 id, u8 enabled) generic_processor_info(id, 0); } + #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 -- cgit v1.2.3 From fe176de0ffdc2dd300fbcece84434a32b482b5b1 Mon Sep 17 00:00:00 2001 From: Ben Castricum Date: Thu, 27 Mar 2008 20:52:35 +0100 Subject: x86: microcode: show results on success too Report when microcode was successfully updated. It used to be there but now with DEBUG unset it becomes very silent. Also some cosmetic fixes. Signed-off-by: Ben Castricum Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index f2702d01b8a8..25cf6dee4e56 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -290,7 +290,7 @@ static int get_maching_microcode(void *mc, int cpu) } return 0; find: - pr_debug("microcode: CPU %d found a matching microcode update with" + pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { @@ -336,11 +336,11 @@ static void apply_microcode(int cpu) spin_unlock_irqrestore(µcode_update_lock, flags); if (val[1] != uci->mc->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d updated from revision " + printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); return; } - pr_debug("microcode: CPU%d updated from revision " + printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %08x \n", cpu_num, uci->rev, val[1], uci->mc->hdr.date); uci->rev = val[1]; @@ -534,7 +534,7 @@ static int cpu_request_microcode(int cpu) c->x86, c->x86_model, c->x86_mask); error = request_firmware(&firmware, name, µcode_pdev->dev); if (error) { - pr_debug("ucode data file %s load failed\n", name); + pr_debug("microcode: ucode data file %s load failed\n", name); return error; } buf = firmware->data; @@ -709,7 +709,7 @@ static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) if (!cpu_online(cpu)) return 0; - pr_debug("Microcode:CPU %d added\n", cpu); + pr_debug("microcode: CPU%d added\n", cpu); memset(uci, 0, sizeof(*uci)); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); @@ -733,7 +733,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("Microcode:CPU %d removed\n", cpu); + pr_debug("microcode: CPU%d removed\n", cpu); microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; @@ -745,7 +745,7 @@ static int mc_sysdev_resume(struct sys_device *dev) if (!cpu_online(cpu)) return 0; - pr_debug("Microcode:CPU %d resumed\n", cpu); + pr_debug("microcode: CPU%d resumed\n", cpu); /* only CPU 0 will apply ucode here */ apply_microcode(0); return 0; @@ -783,7 +783,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) } case CPU_DOWN_FAILED_FROZEN: if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "Microcode: Failed to create the sysfs " + printk(KERN_ERR "microcode: Failed to create the sysfs " "group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: -- cgit v1.2.3 From e937fcf2fa0c1d21f9c0008ab600d46c240a984c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 28 Mar 2008 12:33:52 +0100 Subject: x86: mpparse: 64-bit fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index d62294003036..5e789bdb34fa 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -63,6 +63,20 @@ void *x86_bios_cpu_apicid_early_ptr; DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +/* Make it easy to share the UP and SMP code: */ +#ifndef CONFIG_X86_SMP +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +#ifndef CONFIG_X86_LOCAL_APIC +unsigned int boot_cpu_physical_apicid = -1U; +#endif +#endif + +/* Make it easy to share the UP and SMP code: */ +#ifndef CONFIG_X86_SMP +physid_mask_t phys_cpu_present_map; +#endif + /* * Intel MP BIOS table parsing routines: */ -- cgit v1.2.3 From f7743fe676fadac8706e7cbd0176b46d7397996d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 27 Mar 2008 17:28:40 -0700 Subject: x86: paravirt_ops: don't steal memory resources in paravirt_disable_iospace The memory resource is also used for main memory, and we need it to allocate physical addresses for memory hotplug. Knobbling io space is enough to get the job done anyway. Signed-off-by: Jeremy Fitzhardinge Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 075962cc75ab..3733412d1357 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -206,13 +206,6 @@ static struct resource reserve_ioports = { .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; -static struct resource reserve_iomem = { - .start = 0, - .end = -1, - .name = "paravirt-iomem", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - /* * Reserve the whole legacy IO space to prevent any legacy drivers * from wasting time probing for their hardware. This is a fairly @@ -222,16 +215,7 @@ static struct resource reserve_iomem = { */ int paravirt_disable_iospace(void) { - int ret; - - ret = request_resource(&ioport_resource, &reserve_ioports); - if (ret == 0) { - ret = request_resource(&iomem_resource, &reserve_iomem); - if (ret) - release_resource(&reserve_ioports); - } - - return ret; + return request_resource(&ioport_resource, &reserve_ioports); } static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; -- cgit v1.2.3 From 6093015db2bd9e70cf20cdd23be1a50733baafdd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 30 Mar 2008 11:45:23 +0200 Subject: x86: cleanup replace most vm86 flags with flags from processor-flags.h, fix - fix build error - fix CONFIG_HEADERS_CHECK error Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 57a5704e3f6c..3284502a1bf8 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -959,7 +959,7 @@ debug_vm86: clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~TF_MASK; + regs->flags &= ~X86_EFLAGS_TF; return; } -- cgit v1.2.3 From 6b6891f9c545ccd45d6d8ddfd33ce27c22c271a7 Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Fri, 28 Mar 2008 17:56:57 +0300 Subject: x86: cleanup - rename VM_MASK to X86_VM_MASK This patch renames VM_MASK to X86_VM_MASK (which in turn defined as alias to X86_EFLAGS_VM) to better distinguish from virtual memory flags. We can't just use X86_EFLAGS_VM instead because it is also used for conditional compilation Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 8 ++++---- arch/x86/kernel/vm86_32.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 3284502a1bf8..bb9107c56ff5 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -498,7 +498,7 @@ do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, { struct task_struct *tsk = current; - if (regs->flags & VM_MASK) { + if (regs->flags & X86_VM_MASK) { if (vm86) goto vm86_trap; goto trap_signal; @@ -643,7 +643,7 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code) } put_cpu(); - if (regs->flags & VM_MASK) + if (regs->flags & X86_VM_MASK) goto gp_in_vm86; if (!user_mode(regs)) @@ -922,7 +922,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_dr7; } - if (regs->flags & VM_MASK) + if (regs->flags & X86_VM_MASK) goto debug_vm86; /* Save debug status register where ptrace can see it */ @@ -1094,7 +1094,7 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) * Handle strange cache flush from user space exception * in all other cases. This is undocumented behaviour. */ - if (regs->flags & VM_MASK) { + if (regs->flags & X86_VM_MASK) { handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); return; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 51040698c222..c866c00f4a85 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -299,7 +299,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk VEFLAGS = info->regs.pt.flags; info->regs.pt.flags &= SAFE_MASK; info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; - info->regs.pt.flags |= VM_MASK; + info->regs.pt.flags |= X86_VM_MASK; switch (info->cpu_type) { case CPU_286: -- cgit v1.2.3 From a5c15d419d4b68535222b51f9054dd08d5e67470 Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Fri, 28 Mar 2008 17:56:56 +0300 Subject: x86: replace most VM86 flags with flags from processor-flags.h Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 2 +- arch/x86/kernel/signal_32.c | 4 ++-- arch/x86/kernel/vm86_32.c | 34 +++++++++++++++++----------------- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index cc8ae90103ff..b8c6743a13da 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -489,7 +489,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, break; case KPROBE_HIT_SS: if (p == kprobe_running()) { - regs->flags &= ~TF_MASK; + regs->flags &= ~X86_EFLAGS_TF; regs->flags |= kcb->kprobe_saved_flags; return 0; } else { diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index aa1b6a0a22e4..f1b117930837 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -419,7 +419,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, * The tracer may want to single-step inside the * handler too. */ - regs->flags &= ~(TF_MASK | X86_EFLAGS_DF); + regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -507,7 +507,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * The tracer may want to single-step inside the * handler too. */ - regs->flags &= ~(TF_MASK | X86_EFLAGS_DF); + regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index c866c00f4a85..38f566fa27d2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -139,7 +139,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } - set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); if (tmp) { @@ -306,13 +306,13 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.v86mask = 0; break; case CPU_386: - tsk->thread.v86mask = NT_MASK | IOPL_MASK; + tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; case CPU_486: - tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; + tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; default: - tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; + tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; } @@ -363,24 +363,24 @@ static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) static inline void set_IF(struct kernel_vm86_regs *regs) { - VEFLAGS |= VIF_MASK; - if (VEFLAGS & VIP_MASK) + VEFLAGS |= X86_EFLAGS_VIF; + if (VEFLAGS & X86_EFLAGS_VIP) return_to_32bit(regs, VM86_STI); } static inline void clear_IF(struct kernel_vm86_regs *regs) { - VEFLAGS &= ~VIF_MASK; + VEFLAGS &= ~X86_EFLAGS_VIF; } static inline void clear_TF(struct kernel_vm86_regs *regs) { - regs->pt.flags &= ~TF_MASK; + regs->pt.flags &= ~X86_EFLAGS_TF; } static inline void clear_AC(struct kernel_vm86_regs *regs) { - regs->pt.flags &= ~AC_MASK; + regs->pt.flags &= ~X86_EFLAGS_AC; } /* @@ -399,7 +399,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs { set_flags(VEFLAGS, flags, current->thread.v86mask); set_flags(regs->pt.flags, flags, SAFE_MASK); - if (flags & IF_MASK) + if (flags & X86_EFLAGS_IF) set_IF(regs); else clear_IF(regs); @@ -409,7 +409,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg { set_flags(VFLAGS, flags, current->thread.v86mask); set_flags(regs->pt.flags, flags, SAFE_MASK); - if (flags & IF_MASK) + if (flags & X86_EFLAGS_IF) set_IF(regs); else clear_IF(regs); @@ -419,9 +419,9 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) { unsigned long flags = regs->pt.flags & RETURN_MASK; - if (VEFLAGS & VIF_MASK) - flags |= IF_MASK; - flags |= IOPL_MASK; + if (VEFLAGS & X86_EFLAGS_VIF) + flags |= X86_EFLAGS_IF; + flags |= X86_EFLAGS_IOPL; return flags | (VEFLAGS & current->thread.v86mask); } @@ -573,11 +573,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) #define CHECK_IF_IN_TRAP \ if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ - newflags |= TF_MASK + newflags |= X86_EFLAGS_TF #define VM86_FAULT_RETURN do { \ - if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ + if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & TF_MASK) \ + if (orig_flags & X86_EFLAGS_TF) \ handle_vm86_trap(regs, 0, 1); \ return; } while (0) -- cgit v1.2.3 From 05f2d12c3563dea8c81b301f9f3cf7919af23b13 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 28 Mar 2008 14:12:02 -0500 Subject: x86: change GET_APIC_ID() from an inline function to an out-of-line function Introduce a function to read the local APIC_ID. This change is in preparation for additional changes to the APICID functions that will come in a later patch. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 10 +++++----- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/io_apic_32.c | 6 +++--- arch/x86/kernel/io_apic_64.c | 5 ++--- arch/x86/kernel/mpparse_32.c | 2 +- arch/x86/kernel/mpparse_64.c | 2 +- arch/x86/kernel/smpboot.c | 6 +++--- 8 files changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a99398f71234..4905a11b30e3 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1195,7 +1195,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); #ifdef CONFIG_X86_IO_APIC { @@ -1265,7 +1265,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); #endif phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4ee521ff0a3e..9b4cacdfd74f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -650,10 +650,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = apic_read(APIC_ID); + reg0 = read_apic_id(); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); + reg1 = read_apic_id(); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -892,7 +892,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } /** @@ -919,7 +919,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } /* @@ -1140,7 +1140,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = apic_read(APIC_ID); + apic_pm_state.apic_id = read_apic_id(); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 07352b74bda6..6a44e8dace37 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -97,7 +97,7 @@ static void flat_send_IPI_all(int vector) static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); + return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 8ed6eb967652..bfebe7a1966d 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1482,8 +1482,8 @@ void /*__init*/ print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); - v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v)); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, + GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1692,7 +1692,7 @@ void disable_IO_APIC(void) entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; entry.dest.physical.physical_dest = - GET_APIC_ID(apic_read(APIC_ID)); + GET_APIC_ID(read_apic_id()); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 6dd33628f28a..0ac92d6acf57 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1068,8 +1068,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); - v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v)); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1263,7 +1262,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = GET_APIC_ID(apic_read(APIC_ID)); + entry.dest = GET_APIC_ID(read_apic_id()); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index cd4522b3e90e..4b46a37e0634 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -802,7 +802,7 @@ void __init mp_register_lapic_address(u64 address) set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); } diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 5e789bdb34fa..29d2c40e54a2 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -631,7 +631,7 @@ void __init mp_register_lapic_address(u64 address) mp_lapic_addr = (unsigned long)address; set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } void __cpuinit mp_register_lapic(u8 id, u8 enabled) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f45d740b1b6a..5da35d2cdbd8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -237,7 +237,7 @@ void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(apic_read(APIC_ID)); + phys_id = GET_APIC_ID(read_apic_id()); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -1205,9 +1205,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) return; } - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) { + if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_physical_apicid); + GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } -- cgit v1.2.3 From ae261868658773538ddda829c50224e5851c2342 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 28 Mar 2008 14:12:06 -0500 Subject: x86: add functions to determine if platform is a UV platform Add functions that can be used to determine if an x86_64 system is a SGI "UV" system. UV systems come in 3 types and are identified by the OEM ID in the MADT. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 +--- arch/x86/kernel/genapic_64.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e277c370246d..05878ac934db 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -56,9 +56,7 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 #include - -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } - +#include #else /* X86 */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 4ae7b6440260..c873f60c74a6 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -33,6 +33,8 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); struct genapic __read_mostly *genapic = &apic_flat; +static enum uv_system_type uv_system_type; + /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ @@ -64,3 +66,26 @@ void send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } + +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) + uv_system_type = UV_NON_UNIQUE_APIC; + } + return 0; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} -- cgit v1.2.3 From a65d1d644c2b65bfb99e766e7160d764b8b2bfa4 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 28 Mar 2008 14:12:08 -0500 Subject: x86: increase size of APICID Increase the number of bits in an apicid from 8 to 32. By default, MP_processor_info() gets the APICID from the mpc_config_processor structure. However, this structure limits the size of APICID to 8 bits. This patch allows the caller of MP_processor_info() to optionally pass a larger APICID that will be used instead of the one in the mpc_config_processor struct. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 4 ++-- arch/x86/kernel/mpparse_64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 4b46a37e0634..7b7e008496e0 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -807,7 +807,7 @@ void __init mp_register_lapic_address(u64 address) Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); } -void __cpuinit mp_register_lapic (u8 id, u8 enabled) +void __cpuinit mp_register_lapic (int id, u8 enabled) { if (MAX_APICS - id <= 0) { printk(KERN_WARNING "Processor #%d invalid (max %d)\n", @@ -862,7 +862,7 @@ static u8 uniq_ioapic_id(u8 id) return id; } -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) { int idx = 0; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 29d2c40e54a2..4840a846904e 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -633,7 +633,7 @@ void __init mp_register_lapic_address(u64 address) if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } -void __cpuinit mp_register_lapic(u8 id, u8 enabled) +void __cpuinit mp_register_lapic(int id, u8 enabled) { if (!enabled) { ++disabled_cpus; @@ -683,7 +683,7 @@ static u8 uniq_ioapic_id(u8 id) return find_first_zero_bit(used, 256); } -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) { int idx = 0; -- cgit v1.2.3 From ac049c1db72963e19b29b63c42ab8759384eef20 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 28 Mar 2008 14:12:09 -0500 Subject: x86: parsing for ACPI "SAPIC" table Add kernel support for new ACPI "sapic" tables that contain 16-bit APICIDs. This patch simply adds parsing of an optional SAPIC table if present. Otherwise, the traditional local APIC table is used. Note: the SAPIC table is not a new ACPI table - it exists on other architectures but is not currently recognized by x86_64. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 05878ac934db..b33ebf6ea4f1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -264,6 +264,24 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) return 0; } +static int __init +acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) +{ + struct acpi_madt_local_sapic *processor = NULL; + + processor = (struct acpi_madt_local_sapic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + mp_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ + + return 0; +} + static int __init acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, const unsigned long end) @@ -757,8 +775,12 @@ static int __init acpi_parse_madt_lapic_entries(void) mp_register_lapic_address(acpi_lapic_addr); - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, - MAX_APICS); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, + acpi_parse_sapic, MAX_APICS); + + if (!count) + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, + acpi_parse_lapic, MAX_APICS); if (!count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); /* TBD: Cleanup to allow fallback to MPS */ -- cgit v1.2.3 From 570da318cf0e3053e62030253494c410a18d4be7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 8 Apr 2008 12:20:50 +0200 Subject: x86: support for new UV apic, prepare Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index c873f60c74a6..4cc1c218ae4c 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -25,9 +25,11 @@ #endif /* which logical CPU number maps to which CPU (physical APIC ID) */ +#ifdef CONFIG_SMP u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_cpu_to_apicid_early_ptr; +#endif DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); -- cgit v1.2.3 From ac23d4ee3f84de33c16ed7e68f9adee2386e74fb Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 28 Mar 2008 14:12:16 -0500 Subject: x86: support for new UV apic UV supports really big systems. So big, in fact, that the APICID register does not contain enough bits to contain an APICID that is unique across all cpus. The UV BIOS supports 3 APICID modes: - legacy mode. This mode uses the old APIC mode where APICID is in bits [31:24] of the APICID register. - x2apic mode. This mode is whitebox-compatible. APICIDs are unique across all cpus. Standard x2apic APIC operations (Intel-defined) can be used for IPIs. The node identifier fits within the Intel-defined portion of the APICID register. - x2apic-uv mode. In this mode, the APICIDs on each node have unique IDs, but IDs on different node are not unique. For example, if each mode has 32 cpus, the APICIDs on each node might be 0 - 31. Every node has the same set of IDs. The UV hub is used to route IPIs/interrupts to the correct node. Traditional APIC operations WILL NOT WORK. In x2apic-uv mode, the ACPI tables all contain a full unique ID (note: exact bit layout still changing but the following is close): nnnnnnnnnnlc0cch n = unique node number l = socket number on board c = core h = hyperthread Only the "lc0cch" bits are written to the APICID register. The remaining bits are supplied by having the get_apic_id() function "OR" the extra bits into the value read from the APICID register. (Hmmm.. why not keep the ENTIRE APICID register in per-cpu data....) The x2apic-uv mode is recognized by the MADT table containing: oem_id = "SGI" oem_table_id = "UV-X" Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/apic_64.c | 2 + arch/x86/kernel/genapic_64.c | 18 +++ arch/x86/kernel/genx2apic_uv_x.c | 245 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup64.c | 4 + arch/x86/kernel/smpboot.c | 5 + 6 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/genx2apic_uv_x.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1fe841a86f7e..0bf2fb55aa74 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -89,7 +89,7 @@ scx200-y += scx200_32.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9b4cacdfd74f..8b0fad47a5d2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -738,6 +738,7 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; + preempt_disable(); value = apic_read(APIC_LVR); BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); @@ -831,6 +832,7 @@ void __cpuinit setup_local_APIC(void) else value = APIC_DM_NMI | APIC_LVT_MASKED; apic_write(APIC_LVT1, value); + preempt_enable(); } void __cpuinit lapic_setup_esr(void) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 4cc1c218ae4c..910a4a777a4c 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ void *x86_cpu_to_apicid_early_ptr; #endif DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); +DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; @@ -42,6 +44,9 @@ static enum uv_system_type uv_system_type; */ void __init setup_apic_routing(void) { + if (uv_system_type == UV_NON_UNIQUE_APIC) + genapic = &apic_x2apic_uv_x; + else #ifdef CONFIG_ACPI /* * Quirk: some x86_64 machines can only use physical APIC mode @@ -82,6 +87,19 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +unsigned int read_apic_id(void) +{ + unsigned int id; + + WARN_ON(preemptible()); + id = apic_read(APIC_ID); + if (uv_system_type >= UV_X2APIC) + id |= __get_cpu_var(x2apic_extra_bits); + else + id = (id >> 24) & 0xFFu;; + return id; +} + enum uv_system_type get_uv_system_type(void) { return uv_system_type; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c new file mode 100644 index 000000000000..5d77c9cd8e15 --- /dev/null +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -0,0 +1,245 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV APIC functions (note: not an Intel compatible APIC) + * + * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); +EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); + +struct uv_blade_info *uv_blade_info; +EXPORT_SYMBOL_GPL(uv_blade_info); + +short *uv_node_to_blade; +EXPORT_SYMBOL_GPL(uv_node_to_blade); + +short *uv_cpu_to_blade; +EXPORT_SYMBOL_GPL(uv_cpu_to_blade); + +short uv_possible_blades; +EXPORT_SYMBOL_GPL(uv_possible_blades); + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t uv_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static cpumask_t uv_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) +{ + unsigned long val; + int nasid; + + nasid = uv_apicid_to_nasid(phys_apicid); + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); + uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + return 0; +} + +static void uv_send_IPI_one(int cpu, int vector) +{ + unsigned long val, apicid; + int nasid; + + apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ + nasid = uv_apicid_to_nasid(apicid); + val = + (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << + UVH_IPI_INT_APIC_ID_SHFT) | + (vector << UVH_IPI_INT_VECTOR_SHFT); + uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + printk(KERN_DEBUG + "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n", + cpu, apicid, vector, nasid, val); +} + +static void uv_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned int cpu; + + for (cpu = 0; cpu < NR_CPUS; ++cpu) + if (cpu_isset(cpu, mask)) + uv_send_IPI_one(cpu, vector); +} + +static void uv_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + uv_send_IPI_mask(mask, vector); +} + +static void uv_send_IPI_all(int vector) +{ + uv_send_IPI_mask(cpu_online_map, vector); +} + +static int uv_apic_id_registered(void) +{ + return 1; +} + +static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return GET_APIC_ID(read_apic_id()) >> index_msb; +} + +#ifdef ZZZ /* Needs x2apic patch */ +static void uv_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} +#endif + +struct genapic apic_x2apic_uv_x = { + .name = "UV large system", + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .target_cpus = uv_target_cpus, + .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ + .apic_id_registered = uv_apic_id_registered, + .send_IPI_all = uv_send_IPI_all, + .send_IPI_allbutself = uv_send_IPI_allbutself, + .send_IPI_mask = uv_send_IPI_mask, + /* ZZZ.send_IPI_self = uv_send_IPI_self, */ + .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ +}; + +static __cpuinit void set_x2apic_extra_bits(int nasid) +{ + __get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6); +} + +/* + * Called on boot cpu. + */ +static __init void uv_system_init(void) +{ + union uvh_si_addr_map_config_u m_n_config; + int bytes, nid, cpu, lcpu, nasid, last_nasid, blade; + unsigned long mmr_base; + + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + mmr_base = + uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & + ~UV_MMR_ENABLE; + printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); + + last_nasid = -1; + for_each_possible_cpu(cpu) { + nid = cpu_to_node(cpu); + nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); + if (nasid != last_nasid) + uv_possible_blades++; + last_nasid = nasid; + } + printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); + + bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); + uv_blade_info = alloc_bootmem_pages(bytes); + + bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); + uv_node_to_blade = alloc_bootmem_pages(bytes); + memset(uv_node_to_blade, 255, bytes); + + bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); + uv_cpu_to_blade = alloc_bootmem_pages(bytes); + memset(uv_cpu_to_blade, 255, bytes); + + last_nasid = -1; + blade = -1; + lcpu = -1; + for_each_possible_cpu(cpu) { + nid = cpu_to_node(cpu); + nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); + if (nasid != last_nasid) { + blade++; + lcpu = -1; + uv_blade_info[blade].nr_posible_cpus = 0; + uv_blade_info[blade].nr_online_cpus = 0; + } + last_nasid = nasid; + lcpu++; + + uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt; + uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt; + uv_cpu_hub_info(cpu)->numa_blade_id = blade; + uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; + uv_cpu_hub_info(cpu)->local_nasid = nasid; + uv_cpu_hub_info(cpu)->gnode_upper = + nasid & ~((1 << uv_hub_info->n_val) - 1); + uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; + uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ + uv_blade_info[blade].nasid = nasid; + uv_blade_info[blade].nr_posible_cpus++; + uv_node_to_blade[nid] = blade; + uv_cpu_to_blade[cpu] = blade; + + printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n", + cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid); + printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade); + } +} + +/* + * Called on each cpu to initialize the per_cpu UV data area. + */ +void __cpuinit uv_cpu_init(void) +{ + if (!uv_node_to_blade) + uv_system_init(); + + uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; + + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) + set_x2apic_extra_bits(uv_hub_info->local_nasid); +} diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 6b4e3262e8cb..4be499cd6a0d 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifndef CONFIG_DEBUG_BOOT_PARAMS struct boot_params __initdata boot_params; @@ -264,4 +265,7 @@ void __cpuinit cpu_init (void) fpu_init(); raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5da35d2cdbd8..22bf6c29454f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1101,6 +1101,7 @@ static __init void disable_smp(void) */ static int __init smp_sanity_check(unsigned max_cpus) { + preempt_disable(); if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk(KERN_WARNING "weird, boot CPU (#%d) not listed" "by the BIOS.\n", hard_smp_processor_id()); @@ -1112,6 +1113,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * get out of here now! */ if (!smp_found_config && !acpi_lapic) { + preempt_enable(); printk(KERN_NOTICE "SMP motherboard not detected.\n"); disable_smp(); if (APIC_init_uniprocessor()) @@ -1130,6 +1132,7 @@ static int __init smp_sanity_check(unsigned max_cpus) boot_cpu_physical_apicid); physid_set(hard_smp_processor_id(), phys_cpu_present_map); } + preempt_enable(); /* * If we couldn't find a local APIC, then get out of here now! @@ -1205,11 +1208,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) return; } + preempt_disable(); if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } + preempt_enable(); #ifdef CONFIG_X86_32 connect_bsp_APIC(); -- cgit v1.2.3 From b447a468fcd130aa8951672b6115c673c274e888 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 25 Mar 2008 15:06:51 -0700 Subject: x86: clean up non-smp usage of cpu maps Cleanup references to the early cpu maps for the non-SMP configuration and remove some functions called for SMP configurations only. Cc: Andi Kleen Cc: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1179aa06cdbf..dc7940955b7a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP) /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the @@ -21,21 +21,12 @@ static void __init setup_per_cpu_maps(void) int cpu; for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - if (per_cpu_offset(cpu)) { -#endif - per_cpu(x86_cpu_to_apicid, cpu) = - x86_cpu_to_apicid_init[cpu]; - per_cpu(x86_bios_cpu_apicid, cpu) = + per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; + per_cpu(x86_bios_cpu_apicid, cpu) = x86_bios_cpu_apicid_init[cpu]; #ifdef CONFIG_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = + per_cpu(x86_cpu_to_node_map, cpu) = x86_cpu_to_node_map_init[cpu]; -#endif -#ifdef CONFIG_SMP - } else - printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n", - cpu); #endif } @@ -72,17 +63,20 @@ void __init setup_per_cpu_areas(void) /* Copy section for each CPU (we discard the original) */ size = PERCPU_ENOUGH_ROOM; - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); - for_each_cpu_mask(i, cpu_possible_map) { + + for_each_possible_cpu(i) { char *ptr; #ifndef CONFIG_NEED_MULTIPLE_NODES ptr = alloc_bootmem_pages(size); #else int node = early_cpu_to_node(i); - if (!node_online(node) || !NODE_DATA(node)) + if (!node_online(node) || !NODE_DATA(node)) { ptr = alloc_bootmem_pages(size); + printk(KERN_INFO + "cpu %d has no node or node-local memory\n", i); + } else ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); #endif @@ -96,7 +90,7 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } - /* setup percpu data maps early */ + /* Setup percpu data maps */ setup_per_cpu_maps(); } -- cgit v1.2.3 From 1725037f7232c1518b9be1832f5823b7c576c35c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 31 Mar 2008 14:52:15 +0200 Subject: x86: set_cyc2ns_scale() remove prev scale Peter Zijlstra pointed out that it's unused. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_32.c | 3 +-- arch/x86/kernel/tsc_64.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 68657d8526fb..3d7e6e9fa6c2 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -84,8 +84,8 @@ DEFINE_PER_CPU(unsigned long, cyc2ns); static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - unsigned long flags, prev_scale, *scale; unsigned long long tsc_now, ns_now; + unsigned long flags, *scale; local_irq_save(flags); sched_clock_idle_sleep_event(); @@ -95,7 +95,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) rdtscll(tsc_now); ns_now = __cycles_2_ns(tsc_now); - prev_scale = *scale; if (cpu_khz) *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index d3bebaaad842..ceeba01e7f47 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -44,8 +44,8 @@ DEFINE_PER_CPU(unsigned long, cyc2ns); static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - unsigned long flags, prev_scale, *scale; unsigned long long tsc_now, ns_now; + unsigned long flags, *scale; local_irq_save(flags); sched_clock_idle_sleep_event(); @@ -55,7 +55,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) rdtscll(tsc_now); ns_now = __cycles_2_ns(tsc_now); - prev_scale = *scale; if (cpu_khz) *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; -- cgit v1.2.3 From f5149a49f994e5c469ac398af7cdeb8eb612d3a4 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Sun, 30 Mar 2008 21:02:07 -0500 Subject: x86: support for new UV apic, fix Yinghai Lu pointed out a bug in the previous patches, fix double-shift of apicid. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 910a4a777a4c..c9eabe36ee36 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -95,8 +95,6 @@ unsigned int read_apic_id(void) id = apic_read(APIC_ID); if (uv_system_type >= UV_X2APIC) id |= __get_cpu_var(x2apic_extra_bits); - else - id = (id >> 24) & 0xFFu;; return id; } -- cgit v1.2.3 From 13af4836b3914b23946f6a8982934e2c828c183f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Apr 2008 13:23:22 +0200 Subject: x86: improve default idle Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 8 -------- arch/x86/kernel/process_64.c | 8 -------- 2 files changed, 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 08c41ed5e805..3903a8f2eb97 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -113,16 +113,8 @@ void default_idle(void) local_irq_disable(); if (!need_resched()) { - ktime_t t0, t1; - u64 t0n, t1n; - - t0 = ktime_get(); - t0n = ktime_to_ns(t0); safe_halt(); /* enables interrupts racelessly */ local_irq_disable(); - t1 = ktime_get(); - t1n = ktime_to_ns(t1); - sched_clock_idle_wakeup_event(t1n - t0n); } local_irq_enable(); current_thread_info()->status |= TS_POLLING; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4f40272474dd..e75ccc8a2b87 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -107,16 +107,8 @@ void default_idle(void) smp_mb(); local_irq_disable(); if (!need_resched()) { - ktime_t t0, t1; - u64 t0n, t1n; - - t0 = ktime_get(); - t0n = ktime_to_ns(t0); safe_halt(); /* enables interrupts racelessly */ local_irq_disable(); - t1 = ktime_get(); - t1n = ktime_to_ns(t1); - sched_clock_idle_wakeup_event(t1n - t0n); } local_irq_enable(); current_thread_info()->status |= TS_POLLING; -- cgit v1.2.3 From 431ef7a2a486201967304fcc9cfc33e945626fed Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 1 Apr 2008 19:41:50 +0400 Subject: x86: debug Store - call kfree if only we really need it We should call for kfree if only we really need it. Though it's safe to call kfree with NULL pointer passed in this code we've already tested the pointer and can eliminate the call Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index dcd918c1580d..11c11b8ec48d 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -220,11 +220,11 @@ int ds_allocate(void **dsp, size_t bts_size_in_bytes) int ds_free(void **dsp) { - if (*dsp) + if (*dsp) { kfree((void *)get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = NULL; - + kfree(*dsp); + *dsp = NULL; + } return 0; } -- cgit v1.2.3 From af926a5830079bf36253dcf3a1b75b1497fc7fd1 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:40:32 +0400 Subject: x86: move x86_bios_cpu_apicid to io_apic_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 3 +++ arch/x86/kernel/genapic_64.c | 2 -- arch/x86/kernel/mpparse_64.c | 2 -- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 8b0fad47a5d2..274ebabf49a2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -91,6 +91,9 @@ unsigned long mp_lapic_addr; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); +DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index c9eabe36ee36..7df38c4c8575 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -31,8 +31,6 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_cpu_to_apicid_early_ptr; #endif -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 4840a846904e..378c4ba80b47 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -60,8 +60,6 @@ u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = {[0 ... NR_CPUS - 1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; #endif -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP -- cgit v1.2.3 From 76eb41319d6ab98d17c81a8001a6d7ed9f8359ee Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:40:41 +0400 Subject: x86: move x86_cpu_to_apicid to setup.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 4 ++++ arch/x86/kernel/smpboot.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index dc7940955b7a..01119d9b013e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,6 +9,10 @@ #include #include #include +#include + +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP) /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 22bf6c29454f..412061a0bf2b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,8 +77,6 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_cpu_to_apicid_early_ptr; -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; -- cgit v1.2.3 From 0fc0906e59df1427d194b78376d15ca48079f6bf Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:40:48 +0400 Subject: x86: move phys_cpu_present_map to setup.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 5 ----- arch/x86/kernel/mpparse_64.c | 5 ----- arch/x86/kernel/setup.c | 4 ++++ arch/x86/kernel/smpboot.c | 3 --- 4 files changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 7b7e008496e0..4f4cfad9ae57 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -69,11 +69,6 @@ unsigned int boot_cpu_physical_apicid = -1U; #endif #endif -/* Make it easy to share the UP and SMP code: */ -#ifndef CONFIG_X86_SMP -physid_mask_t phys_cpu_present_map; -#endif - /* * Intel MP BIOS table parsing routines: */ diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 378c4ba80b47..8d7365511ac0 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -70,11 +70,6 @@ unsigned int boot_cpu_physical_apicid = -1U; #endif #endif -/* Make it easy to share the UP and SMP code: */ -#ifndef CONFIG_X86_SMP -physid_mask_t phys_cpu_present_map; -#endif - /* * Intel MP BIOS table parsing routines: */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 01119d9b013e..011fcdd213ff 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,11 +9,15 @@ #include #include #include +#include #include DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP) /* * Copy data used in early init routines from the initial arrays to the diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 412061a0bf2b..7e6aa1c790a2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,9 +88,6 @@ u8 apicid_2_node[MAX_APICID]; /* Internal processor count */ unsigned int num_processors; -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; - /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -- cgit v1.2.3 From 708650afe98a50d0b280bea9dcf5f160b94ee9fb Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:40:54 +0400 Subject: x86: move x86_cpu_to_apicid_init to smpboot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 6 ------ arch/x86/kernel/smpboot.c | 3 ++- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 7df38c4c8575..9546ef408b92 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -25,12 +25,6 @@ #include #endif -/* which logical CPU number maps to which CPU (physical APIC ID) */ -#ifdef CONFIG_SMP -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata - = { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_cpu_to_apicid_early_ptr; -#endif DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7e6aa1c790a2..e3ea074ba6a4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -72,12 +72,13 @@ * integrate apic between arches, we can probably do a better job, but * right now, they'll stay here -- glommer */ -#ifdef CONFIG_X86_32 + /* which logical CPU number maps to which CPU (physical APIC ID) */ u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_cpu_to_apicid_early_ptr; +#ifdef CONFIG_X86_32 u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; -- cgit v1.2.3 From 16ecf7a47cf4f1c97189a551b001195aed550cc2 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:00 +0400 Subject: x86: move x86_bios_cpu_apicid_init to smpboot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 6 ------ arch/x86/kernel/smpboot.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8d7365511ac0..3196c2318640 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -55,12 +55,6 @@ int mp_irq_entries; int nr_ioapics; -#ifdef CONFIG_SMP -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata - = {[0 ... NR_CPUS - 1] = BAD_APICID }; -void *x86_bios_cpu_apicid_early_ptr; -#endif - /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP unsigned int num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e3ea074ba6a4..abf63767cd46 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -78,11 +78,11 @@ u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_cpu_to_apicid_early_ptr; -#ifdef CONFIG_X86_32 u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; void *x86_bios_cpu_apicid_early_ptr; +#ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; #endif -- cgit v1.2.3 From 61048c6328819b0973ef662f6d46f2e2bc753ceb Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:07 +0400 Subject: x86: don't set IO APIC features if IO APIC is not enabled Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 4f4cfad9ae57..09cb77813680 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -221,6 +221,8 @@ static void __init MP_bus_info (struct mpc_config_bus *m) } } +#ifdef CONFIG_X86_IO_APIC + static int bad_ioapic(unsigned long address) { if (nr_ioapics >= MAX_IO_APICS) { @@ -263,6 +265,8 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m) panic("Max # of irq sources exceeded!!\n"); } +#endif + static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) { Dprintk("Lint: type %d, pol %d, trig %d, bus %d," @@ -421,21 +425,25 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) } case MP_IOAPIC: { +#ifdef CONFIG_X86_IO_APIC struct mpc_config_ioapic *m= (struct mpc_config_ioapic *)mpt; MP_ioapic_info(m); mpt+=sizeof(*m); count+=sizeof(*m); +#endif break; } case MP_INTSRC: { +#ifdef CONFIG_X86_IO_APIC struct mpc_config_intsrc *m= (struct mpc_config_intsrc *)mpt; MP_intsrc_info(m); mpt+=sizeof(*m); count+=sizeof(*m); +#endif break; } case MP_LINTSRC: @@ -463,6 +471,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) return num_processors; } +#ifdef CONFIG_X86_IO_APIC + static int __init ELCR_trigger(unsigned int irq) { unsigned int port; @@ -537,11 +547,15 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) MP_intsrc_info(&intsrc); } +#endif + static inline void __init construct_default_ISA_mptable(int mpc_default_type) { struct mpc_config_processor processor; struct mpc_config_bus bus; +#ifdef CONFIG_X86_IO_APIC struct mpc_config_ioapic ioapic; +#endif struct mpc_config_lintsrc lintsrc; int linttypes[2] = { mp_ExtINT, mp_NMI }; int i; @@ -597,6 +611,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) MP_bus_info(&bus); } +#ifdef CONFIG_X86_IO_APIC ioapic.mpc_type = MP_IOAPIC; ioapic.mpc_apicid = 2; ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; @@ -608,7 +623,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) * We set up most of the low 16 IO-APIC pins according to MPS rules. */ construct_default_ioirq_mptable(mpc_default_type); - +#endif lintsrc.mpc_type = MP_LINTSRC; lintsrc.mpc_irqflag = 0; /* conforming */ lintsrc.mpc_srcbusid = 0; @@ -670,6 +685,8 @@ void __init get_smp_config (void) printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); return; } + +#ifdef CONFIG_X86_IO_APIC /* * If there are no explicit MP IRQ entries, then we are * broken. We set up most of the low 16 IO-APIC pins to @@ -687,7 +704,7 @@ void __init get_smp_config (void) construct_default_ioirq_mptable(0); } - +#endif } else BUG(); @@ -967,8 +984,9 @@ void __init mp_config_acpi_legacy_irqs (void) intsrc.mpc_type = MP_INTSRC; intsrc.mpc_irqflag = 0; /* Conforming */ intsrc.mpc_srcbus = MP_ISA_BUS; +#ifdef CONFIG_X86_IO_APIC intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; - +#endif /* * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. -- cgit v1.2.3 From ba1ce61ff226bddebd2101a29fe56b4664ef7cec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Apr 2008 13:11:09 +0200 Subject: x86: don't set io apic features if io-apic is not enabled, fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 09cb77813680..44f52f623fd6 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -429,9 +429,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) struct mpc_config_ioapic *m= (struct mpc_config_ioapic *)mpt; MP_ioapic_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); #endif + mpt+=sizeof(struct mpc_config_ioapic); + count+=sizeof(struct mpc_config_ioapic); break; } case MP_INTSRC: @@ -441,9 +441,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) (struct mpc_config_intsrc *)mpt; MP_intsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); #endif + mpt+=sizeof(struct mpc_config_intsrc); + count+=sizeof(struct mpc_config_intsrc); break; } case MP_LINTSRC: -- cgit v1.2.3 From 9f640ccbc67b7c306206502bca420a80ad15c965 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:13 +0400 Subject: x86: move mp_ioapics to io_apic_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 4 ++++ arch/x86/kernel/mpparse_32.c | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index bfebe7a1966d..2e01b69a46e0 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -71,6 +71,10 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +int nr_ioapics; + static int disable_timer_pin_1 __initdata; /* diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 44f52f623fd6..302253cbfc6a 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -47,17 +47,12 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; static int mp_current_pci_id; -/* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - /* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* MP IRQ source entries */ int mp_irq_entries; -int nr_ioapics; - int pic_mode; /* Make it easy to share the UP and SMP code: */ -- cgit v1.2.3 From 9c7408f3c491b6fe990cd2dacd5471ca21760551 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:19 +0400 Subject: x86: move mp_ioapics to io_apic_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 4 ++++ arch/x86/kernel/mpparse_64.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 0ac92d6acf57..e9217edab434 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -103,6 +103,10 @@ DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +int nr_ioapics; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 3196c2318640..f1015bf53cf0 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -44,8 +44,6 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; static int mp_current_pci_id = 0; -/* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -53,8 +51,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* MP IRQ source entries */ int mp_irq_entries; -int nr_ioapics; - /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP unsigned int num_processors; -- cgit v1.2.3 From 9e5c5f1dd29c86307e6b3cfa75e85d0efccc1f6b Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:26 +0400 Subject: x86: move mp_ioapic_routing to boot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 ++ arch/x86/kernel/mpparse_32.c | 7 +------ arch/x86/kernel/mpparse_64.c | 7 +------ 3 files changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b33ebf6ea4f1..9cf575184536 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -320,6 +320,8 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC +struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 302253cbfc6a..b6f1e4e235e3 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -837,12 +837,7 @@ void __cpuinit mp_register_lapic (int id, u8 enabled) #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 -static struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - u32 pin_programmed[4]; -} mp_ioapic_routing[MAX_IO_APICS]; +extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic (int gsi) { diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index f1015bf53cf0..813057cb2ddb 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -630,12 +630,7 @@ void __cpuinit mp_register_lapic(int id, u8 enabled) #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 -static struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - u32 pin_programmed[4]; -} mp_ioapic_routing[MAX_IO_APICS]; +extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic(int gsi) { -- cgit v1.2.3 From 584f734d035db5c5c07f938e464ddeeefde7ec31 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:32 +0400 Subject: x86: move mp_irqs to io_apics_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 6 ++++++ arch/x86/kernel/mpparse_32.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 2e01b69a46e0..db1b1f30b650 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -75,6 +75,12 @@ int nr_ioapic_registers[MAX_IO_APICS]; struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; +/* MP IRQ source entries */ +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* # of MP IRQ source entries */ +int mp_irq_entries; + static int disable_timer_pin_1 __initdata; /* diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index b6f1e4e235e3..2b16e5c71a64 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -47,12 +47,6 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; static int mp_current_pci_id; -/* # of MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* MP IRQ source entries */ -int mp_irq_entries; - int pic_mode; /* Make it easy to share the UP and SMP code: */ -- cgit v1.2.3 From 350bae1d3f0d0c763c5bb9cc5fb5c363bd0086db Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:38 +0400 Subject: x86: move mp_irqs to io_apic_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 6 ++++++ arch/x86/kernel/mpparse_64.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index e9217edab434..65b6840e1820 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -107,6 +107,12 @@ int nr_ioapic_registers[MAX_IO_APICS]; struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; +/* MP IRQ source entries */ +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* # of MP IRQ source entries */ +int mp_irq_entries; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 813057cb2ddb..07c98dbd468a 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -45,12 +45,6 @@ int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; static int mp_current_pci_id = 0; -/* # of MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* MP IRQ source entries */ -int mp_irq_entries; - /* Make it easy to share the UP and SMP code: */ #ifndef CONFIG_X86_SMP unsigned int num_processors; -- cgit v1.2.3 From 2fe60147570231cde0d1f14711d2e34ccdf54b65 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:44 +0400 Subject: x86: move up & smp variables to setup.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 --- arch/x86/kernel/apic_64.c | 4 ---- arch/x86/kernel/mpparse_32.c | 13 ------------- arch/x86/kernel/mpparse_64.c | 9 --------- arch/x86/kernel/setup.c | 8 ++++++++ arch/x86/kernel/smpboot.c | 5 ----- 6 files changed, 8 insertions(+), 34 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4905a11b30e3..687208190b06 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -52,9 +52,6 @@ unsigned long mp_lapic_addr; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; - DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 274ebabf49a2..9e8e5c050c55 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -87,10 +87,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); - DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 2b16e5c71a64..ed4b3bc0e97a 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -49,15 +49,6 @@ static int mp_current_pci_id; int pic_mode; -/* Make it easy to share the UP and SMP code: */ -#ifndef CONFIG_X86_SMP -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -#ifndef CONFIG_X86_LOCAL_APIC -unsigned int boot_cpu_physical_apicid = -1U; -#endif -#endif - /* * Intel MP BIOS table parsing routines: */ @@ -93,9 +84,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) int apicid; if (!(m->mpc_cpuflag & CPU_ENABLED)) { -#ifdef CONFIG_X86_SMP disabled_cpus++; -#endif return; } @@ -817,9 +806,7 @@ void __cpuinit mp_register_lapic (int id, u8 enabled) } if (!enabled) { -#ifdef CONFIG_X86_SMP ++disabled_cpus; -#endif return; } diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 07c98dbd468a..f860727e9151 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -45,15 +45,6 @@ int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; static int mp_current_pci_id = 0; -/* Make it easy to share the UP and SMP code: */ -#ifndef CONFIG_X86_SMP -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -#ifndef CONFIG_X86_LOCAL_APIC -unsigned int boot_cpu_physical_apicid = -1U; -#endif -#endif - /* * Intel MP BIOS table parsing routines: */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 011fcdd213ff..ed157c90412e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -12,6 +12,14 @@ #include #include +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); + +physid_mask_t phys_cpu_present_map; + DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index abf63767cd46..21ad3f396a05 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -86,14 +86,9 @@ void *x86_bios_cpu_apicid_early_ptr; u8 apicid_2_node[MAX_APICID]; #endif -/* Internal processor count */ -unsigned int num_processors; - /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -unsigned disabled_cpus __cpuinitdata; - /* Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality * for idle threads. -- cgit v1.2.3 From dfac2189c2e1fbb90ee83f15b5e404425754e9f4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:50 +0400 Subject: x86: move mp_register_lapic to boot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 21 ++++++++++++++++----- arch/x86/kernel/mpparse_32.c | 16 ---------------- arch/x86/kernel/mpparse_64.c | 10 ---------- 3 files changed, 16 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9cf575184536..11bd11847b19 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC # include @@ -239,6 +240,16 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) return 0; } +static void __cpuinit acpi_register_lapic(int id, u8 enabled) +{ + if (!enabled) { + ++disabled_cpus; + return; + } + + generic_processor_info(id, 0); +} + static int __init acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -258,8 +269,8 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) * to not preallocating memory for all NR_CPUS * when we use CPU hotplug. */ - mp_register_lapic(processor->id, /* APIC ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ + acpi_register_lapic(processor->id, /* APIC ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); return 0; } @@ -276,8 +287,8 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) acpi_table_print_madt_entry(header); - mp_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ + acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); return 0; } @@ -554,7 +565,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) buffer.pointer = NULL; tmp_map = cpu_present_map; - mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); /* * If mp_register_lapic successfully generates a new logical cpu diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index ed4b3bc0e97a..ebec70a14198 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -797,22 +797,6 @@ void __init mp_register_lapic_address(u64 address) Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); } -void __cpuinit mp_register_lapic (int id, u8 enabled) -{ - if (MAX_APICS - id <= 0) { - printk(KERN_WARNING "Processor #%d invalid (max %d)\n", - id, MAX_APICS); - return; - } - - if (!enabled) { - ++disabled_cpus; - return; - } - - generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR))); -} - #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index f860727e9151..03c19a2e6e9e 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -601,16 +601,6 @@ void __init mp_register_lapic_address(u64 address) if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } -void __cpuinit mp_register_lapic(int id, u8 enabled) -{ - if (!enabled) { - ++disabled_cpus; - return; - } - - generic_processor_info(id, 0); -} - #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 -- cgit v1.2.3 From 31d2092eb0c23636b73d2c24c0c11b66470cef58 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:41:57 +0400 Subject: x86: move mp_register_lapic_address to boot.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 12 +++++++++++- arch/x86/kernel/mpparse_32.c | 12 ------------ arch/x86/kernel/mpparse_64.c | 8 -------- 3 files changed, 11 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 11bd11847b19..057ccf1d5ad4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -765,6 +765,16 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) * Parse LAPIC entries in MADT * returns 0 on success, < 0 on error */ + +static void __init acpi_register_lapic_address(unsigned long address) +{ + mp_lapic_addr = address; + + set_fixmap_nocache(FIX_APIC_BASE, address); + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); +} + static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -786,7 +796,7 @@ static int __init acpi_parse_madt_lapic_entries(void) return count; } - mp_register_lapic_address(acpi_lapic_addr); + acpi_register_lapic_address(acpi_lapic_addr); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, acpi_parse_sapic, MAX_APICS); diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index ebec70a14198..052043ed6499 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -785,18 +785,6 @@ void __init find_smp_config (void) #ifdef CONFIG_ACPI -void __init mp_register_lapic_address(u64 address) -{ - mp_lapic_addr = (unsigned long) address; - - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); - - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); -} - #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 03c19a2e6e9e..1c3bf80b3ba6 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -594,14 +594,6 @@ void __init find_smp_config(void) #ifdef CONFIG_ACPI -void __init mp_register_lapic_address(u64 address) -{ - mp_lapic_addr = (unsigned long)address; - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); -} - #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 -- cgit v1.2.3 From 4ef81297f72655c4f4c1ae9c371453f9ca796aad Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:03 +0400 Subject: x86: lindent mpparse_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 420 ++++++++++++++++++++++--------------------- 1 file changed, 214 insertions(+), 206 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 052043ed6499..f1c896ab8275 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -41,10 +41,10 @@ int smp_found_config; * MP-table. */ #if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type [MAX_MP_BUSSES]; +int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; static int mp_current_pci_id; int pic_mode; @@ -53,7 +53,6 @@ int pic_mode; * Intel MP BIOS table parsing routines: */ - /* * Checksum an MP configuration block. */ @@ -75,8 +74,9 @@ static int __init mpf_checksum(unsigned char *mp, int len) * doing this .... */ -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; +static int mpc_record; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] + __cpuinitdata; #endif static void __cpuinit MP_processor_info(struct mpc_config_processor *m) @@ -87,66 +87,63 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) disabled_cpus++; return; } - #ifdef CONFIG_X86_NUMAQ apicid = mpc_apic_id(m, translation_table[mpc_record]); #else Dprintk("Processor #%d %u:%u APIC version %d\n", m->mpc_apicid, (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver); + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, m->mpc_apicver); apicid = m->mpc_apicid; #endif - if (m->mpc_featureflag&(1<<0)) + if (m->mpc_featureflag & (1 << 0)) Dprintk(" Floating point unit present.\n"); - if (m->mpc_featureflag&(1<<7)) + if (m->mpc_featureflag & (1 << 7)) Dprintk(" Machine Exception supported.\n"); - if (m->mpc_featureflag&(1<<8)) + if (m->mpc_featureflag & (1 << 8)) Dprintk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag&(1<<9)) + if (m->mpc_featureflag & (1 << 9)) Dprintk(" Internal APIC present.\n"); - if (m->mpc_featureflag&(1<<11)) + if (m->mpc_featureflag & (1 << 11)) Dprintk(" SEP present.\n"); - if (m->mpc_featureflag&(1<<12)) + if (m->mpc_featureflag & (1 << 12)) Dprintk(" MTRR present.\n"); - if (m->mpc_featureflag&(1<<13)) + if (m->mpc_featureflag & (1 << 13)) Dprintk(" PGE present.\n"); - if (m->mpc_featureflag&(1<<14)) + if (m->mpc_featureflag & (1 << 14)) Dprintk(" MCA present.\n"); - if (m->mpc_featureflag&(1<<15)) + if (m->mpc_featureflag & (1 << 15)) Dprintk(" CMOV present.\n"); - if (m->mpc_featureflag&(1<<16)) + if (m->mpc_featureflag & (1 << 16)) Dprintk(" PAT present.\n"); - if (m->mpc_featureflag&(1<<17)) + if (m->mpc_featureflag & (1 << 17)) Dprintk(" PSE present.\n"); - if (m->mpc_featureflag&(1<<18)) + if (m->mpc_featureflag & (1 << 18)) Dprintk(" PSN present.\n"); - if (m->mpc_featureflag&(1<<19)) + if (m->mpc_featureflag & (1 << 19)) Dprintk(" Cache Line Flush Instruction present.\n"); /* 20 Reserved */ - if (m->mpc_featureflag&(1<<21)) + if (m->mpc_featureflag & (1 << 21)) Dprintk(" Debug Trace and EMON Store present.\n"); - if (m->mpc_featureflag&(1<<22)) + if (m->mpc_featureflag & (1 << 22)) Dprintk(" ACPI Thermal Throttle Registers present.\n"); - if (m->mpc_featureflag&(1<<23)) + if (m->mpc_featureflag & (1 << 23)) Dprintk(" MMX present.\n"); - if (m->mpc_featureflag&(1<<24)) + if (m->mpc_featureflag & (1 << 24)) Dprintk(" FXSR present.\n"); - if (m->mpc_featureflag&(1<<25)) + if (m->mpc_featureflag & (1 << 25)) Dprintk(" XMM present.\n"); - if (m->mpc_featureflag&(1<<26)) + if (m->mpc_featureflag & (1 << 26)) Dprintk(" Willamette New Instructions present.\n"); - if (m->mpc_featureflag&(1<<27)) + if (m->mpc_featureflag & (1 << 27)) Dprintk(" Self Snoop present.\n"); - if (m->mpc_featureflag&(1<<28)) + if (m->mpc_featureflag & (1 << 28)) Dprintk(" HT present.\n"); - if (m->mpc_featureflag&(1<<29)) + if (m->mpc_featureflag & (1 << 29)) Dprintk(" Thermal Monitor present.\n"); /* 30, 31 Reserved */ - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { Dprintk(" Bootup CPU\n"); boot_cpu_physical_apicid = m->mpc_apicid; @@ -155,7 +152,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) generic_processor_info(apicid, m->mpc_apicver); } -static void __init MP_bus_info (struct mpc_config_bus *m) +static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; @@ -171,14 +168,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m) #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->mpc_busid, str, MAX_MP_BUSSES - 1); + " is too large, max. supported is %d\n", + m->mpc_busid, str, MAX_MP_BUSSES - 1); return; } #endif set_bit(m->mpc_busid, mp_bus_not_pci); - if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { + if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { #ifdef CONFIG_X86_NUMAQ mpc_oem_pci_bus(m, translation_table[mpc_record]); #endif @@ -187,11 +184,11 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mp_current_pci_id++; #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - } else if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { + } else if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; } else { printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); @@ -216,13 +213,13 @@ static int bad_ioapic(unsigned long address) return 0; } -static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +static void __init MP_ioapic_info(struct mpc_config_ioapic *m) { if (!(m->mpc_flags & MPC_APIC_USABLE)) return; printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); if (bad_ioapic(m->mpc_apicaddr)) return; @@ -231,38 +228,41 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) nr_ioapics++; } -static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +static void __init MP_intsrc_info(struct mpc_config_intsrc *m) { - mp_irqs [mp_irq_entries] = *m; + mp_irqs[mp_irq_entries] = *m; Dprintk("Int: type %d, pol %d, trig %d, bus %d," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } #endif -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { Dprintk("Lint: type %d, pol %d, trig %d, bus %d," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); } #ifdef CONFIG_X86_NUMAQ -static void __init MP_translation_info (struct mpc_config_translation *m) +static void __init MP_translation_info(struct mpc_config_translation *m) { - printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); + printk(KERN_INFO + "Translation: record %d, type %d, quad %d, global %d, local %d\n", + mpc_record, m->trans_type, m->trans_quad, m->trans_global, + m->trans_local); - if (mpc_record >= MAX_MPC_ENTRY) + if (mpc_record >= MAX_MPC_ENTRY) printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); else - translation_table[mpc_record] = m; /* stash this for later */ + translation_table[mpc_record] = m; /* stash this for later */ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) node_set_online(m->trans_quad); } @@ -271,59 +271,59 @@ static void __init MP_translation_info (struct mpc_config_translation *m) * Read/parse the MPC oem tables */ -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ - unsigned short oemsize) +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, + unsigned short oemsize) { - int count = sizeof (*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable)+count; - + int count = sizeof(*oemtable); /* the header size */ + unsigned char *oemptr = ((unsigned char *)oemtable) + count; + mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) - { - printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], - oemtable->oem_signature[1], - oemtable->oem_signature[2], - oemtable->oem_signature[3]); + printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", + oemtable); + if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { + printk(KERN_WARNING + "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", + oemtable->oem_signature[0], oemtable->oem_signature[1], + oemtable->oem_signature[2], oemtable->oem_signature[3]); return; } - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) - { + if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); return; } while (count < oemtable->oem_length) { switch (*oemptr) { - case MP_TRANSLATION: + case MP_TRANSLATION: { - struct mpc_config_translation *m= - (struct mpc_config_translation *)oemptr; + struct mpc_config_translation *m = + (struct mpc_config_translation *)oemptr; MP_translation_info(m); oemptr += sizeof(*m); count += sizeof(*m); ++mpc_record; break; } - default: + default: { - printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); + printk(KERN_WARNING + "Unrecognised OEM table entry type! - %d\n", + (int)*oemptr); return; } } - } + } } static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) + char *productid) { if (strncmp(oem, "IBM NUMA", 8)) printk("Warning! May not be a NUMA-Q system!\n"); if (mpc->mpc_oemptr) - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, - mpc->mpc_oemsize); + smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, + mpc->mpc_oemsize); } -#endif /* CONFIG_X86_NUMAQ */ +#endif /* CONFIG_X86_NUMAQ */ /* * Read/parse the MPC @@ -333,34 +333,34 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) { char str[16]; char oem[10]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { + if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", - *(u32 *)mpc->mpc_signature); + *(u32 *) mpc->mpc_signature); return 0; } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { + if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { printk(KERN_ERR "SMP mptable: checksum error!\n"); return 0; } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { + if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", - mpc->mpc_spec); + mpc->mpc_spec); return 0; } if (!mpc->mpc_lapic) { printk(KERN_ERR "SMP mptable: null local APIC address!\n"); return 0; } - memcpy(oem,mpc->mpc_oem,8); - oem[8]=0; - printk(KERN_INFO "OEM ID: %s ",oem); + memcpy(oem, mpc->mpc_oem, 8); + oem[8] = 0; + printk(KERN_INFO "OEM ID: %s ", oem); - memcpy(str,mpc->mpc_productid,12); - str[12]=0; - printk("Product ID: %s ",str); + memcpy(str, mpc->mpc_productid, 12); + str[12] = 0; + printk("Product ID: %s ", str); mps_oem_check(mpc, oem, str); @@ -374,17 +374,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) mp_lapic_addr = mpc->mpc_lapic; /* - * Now process the configuration blocks. + * Now process the configuration blocks. */ #ifdef CONFIG_X86_NUMAQ mpc_record = 0; #endif while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: + switch (*mpt) { + case MP_PROCESSOR: { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; + struct mpc_config_processor *m = + (struct mpc_config_processor *)mpt; /* ACPI may have already provided this data */ if (!acpi_lapic) MP_processor_info(m); @@ -392,48 +392,48 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) count += sizeof(*m); break; } - case MP_BUS: + case MP_BUS: { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; + struct mpc_config_bus *m = + (struct mpc_config_bus *)mpt; MP_bus_info(m); mpt += sizeof(*m); count += sizeof(*m); break; } - case MP_IOAPIC: + case MP_IOAPIC: { #ifdef CONFIG_X86_IO_APIC - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; + struct mpc_config_ioapic *m = + (struct mpc_config_ioapic *)mpt; MP_ioapic_info(m); #endif - mpt+=sizeof(struct mpc_config_ioapic); - count+=sizeof(struct mpc_config_ioapic); + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); break; } - case MP_INTSRC: + case MP_INTSRC: { #ifdef CONFIG_X86_IO_APIC - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; MP_intsrc_info(m); #endif - mpt+=sizeof(struct mpc_config_intsrc); - count+=sizeof(struct mpc_config_intsrc); + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); break; } - case MP_LINTSRC: + case MP_LINTSRC: { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; + struct mpc_config_lintsrc *m = + (struct mpc_config_lintsrc *)mpt; MP_lintsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); + mpt += sizeof(*m); + count += sizeof(*m); break; } - default: + default: { count = mpc->mpc_length; break; @@ -466,7 +466,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) int ELCR_fallback = 0; intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_irqflag = 0; /* conforming */ intsrc.mpc_srcbus = 0; intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; @@ -481,12 +481,16 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * If it does, we assume it's valid. */ if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + printk(KERN_INFO + "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) + || ELCR_trigger(13)) + printk(KERN_WARNING + "ELCR contains invalid data... not using ELCR\n"); else { - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); + printk(KERN_INFO + "Using ELCR to identify PCI interrupts\n"); ELCR_fallback = 1; } } @@ -515,13 +519,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ MP_intsrc_info(&intsrc); } intsrc.mpc_irqtype = mp_ExtINT; intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ MP_intsrc_info(&intsrc); } @@ -551,8 +555,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.mpc_cpuflag = CPU_ENABLED; processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | - boot_cpu_data.x86_mask; + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; processor.mpc_reserved[0] = 0; processor.mpc_reserved[1] = 0; @@ -564,23 +567,23 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) bus.mpc_type = MP_BUS; bus.mpc_busid = 0; switch (mpc_default_type) { - default: - printk("???\n"); - printk(KERN_ERR "Unknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); + default: + printk("???\n"); + printk(KERN_ERR "Unknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); } MP_bus_info(&bus); if (mpc_default_type > 4) { @@ -603,7 +606,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); #endif lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_irqflag = 0; /* conforming */ lintsrc.mpc_srcbusid = 0; lintsrc.mpc_srcbusirq = 0; lintsrc.mpc_destapic = MP_APIC_ALL; @@ -619,23 +622,25 @@ static struct intel_mp_floating *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ -void __init get_smp_config (void) +void __init get_smp_config(void) { struct intel_mp_floating *mpf = mpf_found; /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical + * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); + printk(KERN_INFO + "Using ACPI (MADT) for SMP configuration information\n"); return; - } - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); + } else if (acpi_lapic) + printk(KERN_INFO + "Using ACPI for processor (LAPIC) configuration information\n"); - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2 & (1<<7)) { + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", + mpf->mpf_specification); + if (mpf->mpf_feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { @@ -648,7 +653,8 @@ void __init get_smp_config (void) */ if (mpf->mpf_feature1 != 0) { - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); + printk(KERN_INFO "Default MP configuration #%d\n", + mpf->mpf_feature1); construct_default_ISA_mptable(mpf->mpf_feature1); } else if (mpf->mpf_physptr) { @@ -659,8 +665,10 @@ void __init get_smp_config (void) */ if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); + printk(KERN_ERR + "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR + "... disabling SMP support. (tell your hw vendor)\n"); return; } @@ -673,7 +681,8 @@ void __init get_smp_config (void) if (!mp_irq_entries) { struct mpc_config_bus bus; - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + printk(KERN_ERR + "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); bus.mpc_type = MP_BUS; bus.mpc_busid = 0; @@ -692,26 +701,26 @@ void __init get_smp_config (void) */ } -static int __init smp_scan_config (unsigned long base, unsigned long length) +static int __init smp_scan_config(unsigned long base, unsigned long length) { unsigned long *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length); + printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp, length); if (sizeof(*mpf) != 16) printk("Error: MPF size\n"); while (length > 0) { mpf = (struct intel_mp_floating *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4))) { smp_found_config = 1; printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", - mpf, virt_to_phys(mpf)); + mpf, virt_to_phys(mpf)); reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); if (mpf->mpf_physptr) { @@ -741,7 +750,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) return 0; } -void __init find_smp_config (void) +void __init find_smp_config(void) { unsigned int address; @@ -753,9 +762,9 @@ void __init find_smp_config (void) * 2) Scan the top 1K of base RAM * 3) Scan the 64K of bios */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) + if (smp_scan_config(0x0, 0x400) || + smp_scan_config(639 * 0x400, 0x400) || + smp_scan_config(0xF0000, 0x10000)) return; /* * If it is an SMP machine we should know now, unless the @@ -792,14 +801,14 @@ void __init find_smp_config (void) extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; -static int mp_find_ioapic (int gsi) +static int mp_find_ioapic(int gsi) { int i = 0; /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) + && (gsi <= mp_ioapic_routing[i].gsi_end)) return i; } @@ -833,34 +842,32 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); - - /* + + /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); + io_apic_get_redir_entries(idx); printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, - mp_ioapic_routing[idx].gsi_end); + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; } -void __init -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; + int ioapic = -1; + int pin = -1; - /* + /* * Convert 'gsi' to 'ioapic.pin'. */ ioapic = mp_find_ioapic(gsi); @@ -870,7 +877,7 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) /* * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE + * erroneously sets the trigger to level, resulting in a HUGE * increase of timer interrupts! */ if ((bus_irq == 0) && (trigger == 3)) @@ -880,13 +887,13 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) intsrc.mpc_irqtype = mp_INT; intsrc.mpc_irqflag = (trigger << 2) | polarity; intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + intsrc.mpc_dstirq = pin; /* INTIN# */ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); mp_irqs[mp_irq_entries] = intsrc; @@ -896,14 +903,14 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) int es7000_plat; -void __init mp_config_acpi_legacy_irqs (void) +void __init mp_config_acpi_legacy_irqs(void) { struct mpc_config_intsrc intsrc; int i = 0; int ioapic = -1; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* + /* * Fabricate the legacy ISA bus (bus #31). */ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; @@ -917,20 +924,20 @@ void __init mp_config_acpi_legacy_irqs (void) if (es7000_plat == 1) return; - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). */ ioapic = mp_find_ioapic(0); if (ioapic < 0) return; intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ + intsrc.mpc_irqflag = 0; /* Conforming */ intsrc.mpc_srcbus = MP_ISA_BUS; #ifdef CONFIG_X86_IO_APIC intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; #endif - /* + /* * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. */ @@ -941,28 +948,29 @@ void __init mp_config_acpi_legacy_irqs (void) struct mpc_config_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) + if (irq->mpc_srcbus == MP_ISA_BUS + && irq->mpc_srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) + (irq->mpc_dstirq == i)) break; } if (idx != mp_irq_entries) { printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ + continue; /* IRQ already used */ } intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ + intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, + "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); mp_irqs[mp_irq_entries] = intsrc; @@ -985,7 +993,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * represent all possible interrupts, and IRQs * assigned to actual devices. */ - static int gsi_to_irq[MAX_GSI_NUM]; + static int gsi_to_irq[MAX_GSI_NUM]; /* Don't set up the ACPI SCI because it's already set up */ if (acpi_gbl_FADT.sci_interrupt == gsi) @@ -1002,8 +1010,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) if (ioapic_renumber_irq) gsi = ioapic_renumber_irq(ioapic, gsi); - /* - * Avoid pin reprogramming. PRTs typically include entries + /* + * Avoid pin reprogramming. PRTs typically include entries * with redundant pin->gsi mappings (but unique PCI devices); * we only program the IOAPIC on the first. */ @@ -1011,23 +1019,23 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); if (idx > 3) { printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); return gsi; } - if ((1<= 64, use IRQ compression */ if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { + && (triggering == ACPI_LEVEL_SENSITIVE)) { /* * For PCI devices assign IRQs in order, avoiding gaps * due to unused I/O APIC pins. @@ -1058,8 +1066,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) } io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); return gsi; } -- cgit v1.2.3 From 888032cd23f0244fdefdcbe265952e7831a9cacc Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:09 +0400 Subject: x86: add early flags to mpparse_32.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 57 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index f1c896ab8275..6c9c29621900 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -329,7 +329,7 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, * Read/parse the MPC */ -static int __init smp_read_mpc(struct mp_config_table *mpc) +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { char str[16]; char oem[10]; @@ -373,6 +373,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; + if (early) + return 1; + /* * Now process the configuration blocks. */ @@ -622,10 +625,13 @@ static struct intel_mp_floating *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ -void __init get_smp_config(void) +static void __init __get_smp_config(unsigned early) { struct intel_mp_floating *mpf = mpf_found; + if (acpi_lapic && early) + return; + /* * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. @@ -652,6 +658,13 @@ void __init get_smp_config(void) * Now see if we need to read further. */ if (mpf->mpf_feature1 != 0) { + if (early) { + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + return; + } printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); @@ -663,7 +676,7 @@ void __init get_smp_config(void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); @@ -672,6 +685,8 @@ void __init get_smp_config(void) return; } + if (early) + return; #ifdef CONFIG_X86_IO_APIC /* * If there are no explicit MP IRQ entries, then we are @@ -695,13 +710,25 @@ void __init get_smp_config(void) } else BUG(); - printk(KERN_INFO "Processors: %d\n", num_processors); + if (!early) + printk(KERN_INFO "Processors: %d\n", num_processors); /* * Only use the first configuration found. */ } -static int __init smp_scan_config(unsigned long base, unsigned long length) +void __init early_get_smp_config(void) +{ + __get_smp_config(1); +} + +void __init get_smp_config(void) +{ + __get_smp_config(0); +} + +static int __init smp_scan_config(unsigned long base, unsigned long length, + unsigned reserve) { unsigned long *bp = phys_to_virt(base); struct intel_mp_floating *mpf; @@ -750,7 +777,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) return 0; } -void __init find_smp_config(void) +static void __init __find_smp_config(unsigned reserve) { unsigned int address; @@ -762,9 +789,9 @@ void __init find_smp_config(void) * 2) Scan the top 1K of base RAM * 3) Scan the 64K of bios */ - if (smp_scan_config(0x0, 0x400) || - smp_scan_config(639 * 0x400, 0x400) || - smp_scan_config(0xF0000, 0x10000)) + if (smp_scan_config(0x0, 0x400, reserve) || + smp_scan_config(639 * 0x400, 0x400, reserve) || + smp_scan_config(0xF0000, 0x10000, reserve)) return; /* * If it is an SMP machine we should know now, unless the @@ -785,7 +812,17 @@ void __init find_smp_config(void) address = get_bios_ebda(); if (address) - smp_scan_config(address, 0x400); + smp_scan_config(address, 0x400, reserve); +} + +void __init early_find_smp_config(void) +{ + __find_smp_config(0); +} + +void __init find_smp_config(void) +{ + __find_smp_config(1); } /* -------------------------------------------------------------------------- -- cgit v1.2.3 From 746f2244065ddfbe0c5d339e309db4d2b48f185b Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:15 +0400 Subject: x86: unify arch/x86/kernel/mpparse_64.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 56 +++----------------------------------------- arch/x86/kernel/mpparse_64.c | 8 ++++++- 2 files changed, 10 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 6c9c29621900..9a9819b2c5f5 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -82,6 +82,7 @@ static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int apicid; + char *bootup_cpu = ""; if (!(m->mpc_cpuflag & CPU_ENABLED)) { disabled_cpus++; @@ -90,65 +91,14 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) #ifdef CONFIG_X86_NUMAQ apicid = mpc_apic_id(m, translation_table[mpc_record]); #else - Dprintk("Processor #%d %u:%u APIC version %d\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, m->mpc_apicver); apicid = m->mpc_apicid; #endif - - if (m->mpc_featureflag & (1 << 0)) - Dprintk(" Floating point unit present.\n"); - if (m->mpc_featureflag & (1 << 7)) - Dprintk(" Machine Exception supported.\n"); - if (m->mpc_featureflag & (1 << 8)) - Dprintk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag & (1 << 9)) - Dprintk(" Internal APIC present.\n"); - if (m->mpc_featureflag & (1 << 11)) - Dprintk(" SEP present.\n"); - if (m->mpc_featureflag & (1 << 12)) - Dprintk(" MTRR present.\n"); - if (m->mpc_featureflag & (1 << 13)) - Dprintk(" PGE present.\n"); - if (m->mpc_featureflag & (1 << 14)) - Dprintk(" MCA present.\n"); - if (m->mpc_featureflag & (1 << 15)) - Dprintk(" CMOV present.\n"); - if (m->mpc_featureflag & (1 << 16)) - Dprintk(" PAT present.\n"); - if (m->mpc_featureflag & (1 << 17)) - Dprintk(" PSE present.\n"); - if (m->mpc_featureflag & (1 << 18)) - Dprintk(" PSN present.\n"); - if (m->mpc_featureflag & (1 << 19)) - Dprintk(" Cache Line Flush Instruction present.\n"); - /* 20 Reserved */ - if (m->mpc_featureflag & (1 << 21)) - Dprintk(" Debug Trace and EMON Store present.\n"); - if (m->mpc_featureflag & (1 << 22)) - Dprintk(" ACPI Thermal Throttle Registers present.\n"); - if (m->mpc_featureflag & (1 << 23)) - Dprintk(" MMX present.\n"); - if (m->mpc_featureflag & (1 << 24)) - Dprintk(" FXSR present.\n"); - if (m->mpc_featureflag & (1 << 25)) - Dprintk(" XMM present.\n"); - if (m->mpc_featureflag & (1 << 26)) - Dprintk(" Willamette New Instructions present.\n"); - if (m->mpc_featureflag & (1 << 27)) - Dprintk(" Self Snoop present.\n"); - if (m->mpc_featureflag & (1 << 28)) - Dprintk(" HT present.\n"); - if (m->mpc_featureflag & (1 << 29)) - Dprintk(" Thermal Monitor present.\n"); - /* 30, 31 Reserved */ - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - Dprintk(" Bootup CPU\n"); + bootup_cpu = " (Bootup-CPU)"; boot_cpu_physical_apicid = m->mpc_apicid; } + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); generic_processor_info(apicid, m->mpc_apicver); } diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 1c3bf80b3ba6..5c916383bb71 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -65,19 +65,25 @@ static int __init mpf_checksum(unsigned char *mp, int len) static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { + int apicid; char *bootup_cpu = ""; if (!(m->mpc_cpuflag & CPU_ENABLED)) { disabled_cpus++; return; } +#ifdef CONFIG_X86_NUMAQ + apicid = mpc_apic_id(m, translation_table[mpc_record]); +#else + apicid = m->mpc_apicid; +#endif if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { bootup_cpu = " (Bootup-CPU)"; boot_cpu_physical_apicid = m->mpc_apicid; } printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); - generic_processor_info(m->mpc_apicid, 0); + generic_processor_info(apicid, m->mpc_apicver); } static void __init MP_bus_info(struct mpc_config_bus *m) -- cgit v1.2.3 From f8924e770e048429ae13bfabe1ddad9bf1e64df7 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:21 +0400 Subject: x86: unify mp_bus_info Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 15 ++++++++------- arch/x86/kernel/mpparse_64.c | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 9a9819b2c5f5..9120573e2616 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -124,8 +124,12 @@ static void __init MP_bus_info(struct mpc_config_bus *m) } #endif - set_bit(m->mpc_busid, mp_bus_not_pci); - if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; +#endif + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { #ifdef CONFIG_X86_NUMAQ mpc_oem_pci_bus(m, translation_table[mpc_record]); #endif @@ -134,16 +138,13 @@ static void __init MP_bus_info(struct mpc_config_bus *m) mp_current_pci_id++; #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - } else if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else { - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); #endif - } + } else + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 5c916383bb71..831097f2022a 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -92,17 +92,43 @@ static void __init MP_bus_info(struct mpc_config_bus *m) memcpy(str, m->mpc_bustype, 6); str[6] = 0; + +#ifdef CONFIG_X86_NUMAQ + mpc_oem_bus_info(m, str, translation_table[mpc_record]); +#else Dprintk("Bus #%d is %s\n", m->mpc_busid, str); +#endif + +#if MAX_MP_BUSSES < 256 + if (m->mpc_busid >= MAX_MP_BUSSES) { + printk(KERN_WARNING "MP table busid value (%d) for bustype %s " + " is too large, max. supported is %d\n", + m->mpc_busid, str, MAX_MP_BUSSES - 1); + return; + } +#endif - if (strncmp(str, "ISA", 3) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); - } else if (strncmp(str, "PCI", 3) == 0) { + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; +#endif + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { +#ifdef CONFIG_X86_NUMAQ + mpc_oem_pci_bus(m, translation_table[mpc_record]); +#endif clear_bit(m->mpc_busid, mp_bus_not_pci); mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; mp_current_pci_id++; - } else { - printk(KERN_ERR "Unknown bustype %s\n", str); - } +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; +#endif + } else + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } static int bad_ioapic(unsigned long address) -- cgit v1.2.3 From e950bea8bff23c14eb38dc706aadf197ed81abf4 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:27 +0400 Subject: x86: unify smp_read_mpc Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 25 +++++++++++++------------ arch/x86/kernel/mpparse_64.c | 27 ++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 9120573e2616..c185065c3ebc 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -288,39 +288,40 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) unsigned char *mpt = ((unsigned char *)mpc) + count; if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { - printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", - *(u32 *) mpc->mpc_signature); + printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], mpc->mpc_signature[1], + mpc->mpc_signature[2], mpc->mpc_signature[3]); return 0; } if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { - printk(KERN_ERR "SMP mptable: checksum error!\n"); + printk(KERN_ERR "MPTABLE: checksum error!\n"); return 0; } if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", mpc->mpc_spec); return 0; } if (!mpc->mpc_lapic) { - printk(KERN_ERR "SMP mptable: null local APIC address!\n"); + printk(KERN_ERR "MPTABLE: null local APIC address!\n"); return 0; } memcpy(oem, mpc->mpc_oem, 8); oem[8] = 0; - printk(KERN_INFO "OEM ID: %s ", oem); + printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); memcpy(str, mpc->mpc_productid, 12); str[12] = 0; printk("Product ID: %s ", str); +#ifdef CONFIG_X86_32 mps_oem_check(mpc, oem, str); +#endif + printk(KERN_INFO "MPTABLE: Product ID: %s ", str); - printk("APIC at: 0x%X\n", mpc->mpc_lapic); + printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); - /* - * Save the local APIC address (it might be non-default) -- but only - * if we're not using ACPI. - */ + /* save the local APIC address, it might be non-default */ if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; @@ -399,7 +400,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) } setup_apic_routing(); if (!num_processors) - printk(KERN_ERR "SMP mptable: no processors registered!\n"); + printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; } diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 831097f2022a..d188848a893e 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -188,13 +188,13 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { char str[16]; + char oem[10]; int count = sizeof(*mpc); unsigned char *mpt = ((unsigned char *)mpc) + count; if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], - mpc->mpc_signature[1], + mpc->mpc_signature[0], mpc->mpc_signature[1], mpc->mpc_signature[2], mpc->mpc_signature[3]); return 0; } @@ -211,12 +211,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) printk(KERN_ERR "MPTABLE: null local APIC address!\n"); return 0; } - memcpy(str, mpc->mpc_oem, 8); - str[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ", str); + memcpy(oem, mpc->mpc_oem, 8); + oem[8] = 0; + printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); memcpy(str, mpc->mpc_productid, 12); str[12] = 0; + printk("Product ID: %s ", str); + +#ifdef CONFIG_X86_32 + mps_oem_check(mpc, oem, str); +#endif printk(KERN_INFO "MPTABLE: Product ID: %s ", str); printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); @@ -231,12 +236,16 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) /* * Now process the configuration blocks. */ +#ifdef CONFIG_X86_NUMAQ + mpc_record = 0; +#endif while (count < mpc->mpc_length) { switch (*mpt) { case MP_PROCESSOR: { struct mpc_config_processor *m = (struct mpc_config_processor *)mpt; + /* ACPI may have already provided this data */ if (!acpi_lapic) MP_processor_info(m); mpt += sizeof(*m); @@ -280,7 +289,15 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) count += sizeof(*m); break; } + default: + { + count = mpc->mpc_length; + break; + } } +#ifdef CONFIG_X86_NUMAQ + ++mpc_record; +#endif } setup_apic_routing(); if (!num_processors) -- cgit v1.2.3 From 62441bf1e0d5153dfb0cf8497df16deacff90789 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:34 +0400 Subject: x86: unify construct_default_ioirq_mptable Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 15 +++++++-------- arch/x86/kernel/mpparse_64.c | 18 ++++++++++++++---- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index c185065c3ebc..e92c29e5fd4f 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -436,13 +436,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * If it does, we assume it's valid. */ if (mpc_default_type == 5) { - printk(KERN_INFO - "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " + "falling back to ELCR\n"); - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) - || ELCR_trigger(13)) - printk(KERN_WARNING - "ELCR contains invalid data... not using ELCR\n"); + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || + ELCR_trigger(13)) + printk(KERN_ERR "ELCR contains invalid data... " + "not using ELCR\n"); else { printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); @@ -523,8 +523,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) bus.mpc_busid = 0; switch (mpc_default_type) { default: - printk("???\n"); - printk(KERN_ERR "Unknown standard configuration %d\n", + printk(KERN_ERR "???\nUnknown standard configuration %d\n", mpc_default_type); /* fall through */ case 1: diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index d188848a893e..11b74c9b8e01 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -401,10 +401,12 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) * 2 CPUs, numbered 0 & 1. */ processor.mpc_type = MP_PROCESSOR; - processor.mpc_apicver = 0; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = 0; - processor.mpc_featureflag = 0; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; processor.mpc_reserved[0] = 0; processor.mpc_reserved[1] = 0; for (i = 0; i < 2; i++) { @@ -423,6 +425,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) case 5: memcpy(bus.mpc_bustype, "ISA ", 6); break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); } MP_bus_info(&bus); if (mpc_default_type > 4) { @@ -433,7 +443,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) ioapic.mpc_type = MP_IOAPIC; ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = 0; + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ioapic.mpc_flags = MPC_APIC_USABLE; ioapic.mpc_apicaddr = 0xFEC00000; MP_ioapic_info(&ioapic); -- cgit v1.2.3 From 4421b1c8b9f5da24f8c737ede2c05d399dea2015 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:40 +0400 Subject: x86: unify get_smp_config Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 20 +++++++++++--------- arch/x86/kernel/mpparse_64.c | 10 +++++++++- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index e92c29e5fd4f..bc2000ee0391 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -588,15 +588,16 @@ static void __init __get_smp_config(unsigned early) * processors, where MPS only supports physical. */ if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO - "Using ACPI (MADT) for SMP configuration information\n"); + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " + "information\n"); return; } else if (acpi_lapic) - printk(KERN_INFO - "Using ACPI for processor (LAPIC) configuration information\n"); + printk(KERN_INFO "Using ACPI for processor (LAPIC) " + "configuration information\n"); printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); +#ifdef CONFIG_X86_32 if (mpf->mpf_feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; @@ -604,7 +605,7 @@ static void __init __get_smp_config(unsigned early) printk(KERN_INFO " Virtual Wire compatibility mode.\n"); pic_mode = 0; } - +#endif /* * Now see if we need to read further. */ @@ -631,8 +632,8 @@ static void __init __get_smp_config(unsigned early) smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR - "... disabling SMP support. (tell your hw vendor)\n"); + printk(KERN_ERR "... disabling SMP support. " + "(tell your hw vendor)\n"); return; } @@ -647,8 +648,9 @@ static void __init __get_smp_config(unsigned early) if (!mp_irq_entries) { struct mpc_config_bus bus; - printk(KERN_ERR - "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " + "using default mptable. " + "(tell your hw vendor)\n"); bus.mpc_type = MP_BUS; bus.mpc_busid = 0; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 11b74c9b8e01..8c7af5b7ddd4 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -490,7 +490,15 @@ static void __init __get_smp_config(unsigned early) printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - +#ifdef CONFIG_X86_32 + if (mpf->mpf_feature2 & (1 << 7)) { + printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(KERN_INFO " Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } +#endif /* * Now see if we need to read further. */ -- cgit v1.2.3 From 92fd4b7abdb2b5b85d73ca0adbb6ad3f8b79f805 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:46 +0400 Subject: x86: unify smp_scan_config Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 21 ++++++++++++++++----- arch/x86/kernel/mpparse_64.c | 26 +++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index bc2000ee0391..7feafa5040d8 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -683,12 +683,13 @@ void __init get_smp_config(void) static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { - unsigned long *bp = phys_to_virt(base); + extern void __bad_mpf_size(void); + unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp, length); + Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); if (sizeof(*mpf) != 16) - printk("Error: MPF size\n"); + __bad_mpf_size(); while (length > 0) { mpf = (struct intel_mp_floating *)bp; @@ -699,6 +700,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, || (mpf->mpf_specification == 4))) { smp_found_config = 1; + mpf_found = mpf; +#ifdef CONFIG_X86_32 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", mpf, virt_to_phys(mpf)); reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, @@ -721,8 +724,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, BOOTMEM_DEFAULT); } - mpf_found = mpf; - return 1; +#else + if (!reserve) + return 1; + + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); + if (mpf->mpf_physptr) + reserve_bootmem_generic(mpf->mpf_physptr, + PAGE_SIZE); +#endif + return 1; } bp += 4; length -= 16; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8c7af5b7ddd4..9a9610089910 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -593,7 +593,30 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, smp_found_config = 1; mpf_found = mpf; +#ifdef CONFIG_X86_32 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", + mpf, virt_to_phys(mpf)); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, + BOOTMEM_DEFAULT); + if (mpf->mpf_physptr) { + /* + * We cannot access to MPC table to compute + * table size yet, as only few megabytes from + * the bottom is mapped now. + * PC-9800's MPC table places on the very last + * of physical memory; so that simply reserving + * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * in reserve_bootmem. + */ + unsigned long size = PAGE_SIZE; + unsigned long end = max_low_pfn * PAGE_SIZE; + if (mpf->mpf_physptr + size > end) + size = end - mpf->mpf_physptr; + reserve_bootmem(mpf->mpf_physptr, size, + BOOTMEM_DEFAULT); + } +#else if (!reserve) return 1; @@ -601,7 +624,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, if (mpf->mpf_physptr) reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE); - return 1; +#endif + return 1; } bp += 4; length -= 16; -- cgit v1.2.3 From 22cbb4bd12a86b80125accb77515482894ee670f Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:42:53 +0400 Subject: x86: unify uniq_io_apic_id Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 14 +++++++++++++- arch/x86/kernel/mpparse_64.c | 8 ++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 7feafa5040d8..a50a31331f97 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -814,17 +814,29 @@ static int mp_find_ioapic(int gsi) } printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - return -1; } static u8 uniq_ioapic_id(u8 id) { +#ifdef CONFIG_X86_32 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) return io_apic_get_unique_id(nr_ioapics, id); else return id; +#else + int i; + DECLARE_BITMAP(used, 256); + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mpc_apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif } void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 9a9610089910..de64a89434c6 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -707,6 +707,13 @@ static int mp_find_ioapic(int gsi) static u8 uniq_ioapic_id(u8 id) { +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else int i; DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); @@ -717,6 +724,7 @@ static u8 uniq_ioapic_id(u8 id) if (!test_bit(id, used)) return id; return find_first_zero_bit(used, 256); +#endif } void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) -- cgit v1.2.3 From 57b733e902f179dc453609db6334f8e9801cbb7a Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:43:00 +0400 Subject: x86: unify mp_register_ioapic Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 10 ++++++---- arch/x86/kernel/mpparse_64.c | 16 +++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index a50a31331f97..1ad8a5d4e3ee 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -854,8 +854,11 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); +#ifdef CONFIG_X86_32 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); - +#else + mp_ioapics[idx].mpc_apicver = 0; +#endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). @@ -865,10 +868,9 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, - mp_ioapics[idx].mpc_apicaddr, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index de64a89434c6..5f1f7fdcd627 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -742,11 +742,14 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); +#ifdef CONFIG_X86_32 + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); +#else mp_ioapics[idx].mpc_apicver = 0; - - /* - * Build basic IRQ lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI IRQs). +#endif + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; @@ -755,9 +758,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, - mp_ioapic_routing[idx].gsi_end); + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; } -- cgit v1.2.3 From c769bfee5731f2614983bd7a9079eb90514e4b9f Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:43:06 +0400 Subject: x86: unify mp_config_acpi_legacy_irqs Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_64.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 5f1f7fdcd627..8ad365136a6b 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -804,19 +804,31 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) panic("Max # of irq sources exceeded!\n"); } +int es7000_plat; + void __init mp_config_acpi_legacy_irqs(void) { struct mpc_config_intsrc intsrc; int i = 0; int ioapic = -1; - /* +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) + /* * Fabricate the legacy ISA bus (bus #31). */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +#endif set_bit(MP_ISA_BUS, mp_bus_not_pci); + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). + /* + * Older generations of ES7000 have no legacy identity mappings + */ + if (es7000_plat == 1) + return; + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). */ ioapic = mp_find_ioapic(0); if (ioapic < 0) @@ -827,7 +839,7 @@ void __init mp_config_acpi_legacy_irqs(void) intsrc.mpc_srcbus = MP_ISA_BUS; intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; - /* + /* * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. */ -- cgit v1.2.3 From cfa08d6cc3421bb08ebaa34467107e58999a7c28 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:43:12 +0400 Subject: x86: unify mp_register_gsi Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse_32.c | 22 ++++++++++---- arch/x86/kernel/mpparse_64.c | 70 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 1ad8a5d4e3ee..6376791cffe5 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -994,14 +994,15 @@ void __init mp_config_acpi_legacy_irqs(void) } } -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 - int mp_register_gsi(u32 gsi, int triggering, int polarity) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; +#ifdef CONFIG_X86_32 +#define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 + static int pci_irq = IRQ_COMPRESSION_START; /* * Mapping between Global System Interrupts, which @@ -1009,6 +1010,11 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * assigned to actual devices. */ static int gsi_to_irq[MAX_GSI_NUM]; +#else + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; +#endif /* Don't set up the ACPI SCI because it's already set up */ if (acpi_gbl_FADT.sci_interrupt == gsi) @@ -1022,8 +1028,10 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; +#ifdef CONFIG_X86_32 if (ioapic_renumber_irq) gsi = ioapic_renumber_irq(ioapic, gsi); +#endif /* * Avoid pin reprogramming. PRTs typically include entries @@ -1041,11 +1049,15 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +#ifdef CONFIG_X86_32 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); +#else + return gsi; +#endif } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); - +#ifdef CONFIG_X86_32 /* * For GSI >= 64, use IRQ compression */ @@ -1079,7 +1091,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } } - +#endif io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8ad365136a6b..7d742577d68a 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -685,6 +685,8 @@ void __init find_smp_config(void) #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_IO_APIC + #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 @@ -770,7 +772,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) int ioapic = -1; int pin = -1; - /* + /* * Convert 'gsi' to 'ioapic.pin'. */ ioapic = mp_find_ioapic(gsi); @@ -780,7 +782,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) /* * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE + * erroneously sets the trigger to level, resulting in a HUGE * increase of timer interrupts! */ if ((bus_irq == 0) && (trigger == 3)) @@ -886,9 +888,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; +#ifdef CONFIG_X86_32 +#define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 + + static int pci_irq = IRQ_COMPRESSION_START; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; +#else if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; +#endif /* Don't set up the ACPI SCI because it's already set up */ if (acpi_gbl_FADT.sci_interrupt == gsi) @@ -902,8 +917,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - /* - * Avoid pin reprogramming. PRTs typically include entries +#ifdef CONFIG_X86_32 + if (ioapic_renumber_irq) + gsi = ioapic_renumber_irq(ioapic, gsi); +#endif + + /* + * Avoid pin reprogramming. PRTs typically include entries * with redundant pin->gsi mappings (but unique PCI devices); * we only program the IOAPIC on the first. */ @@ -918,14 +938,54 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +#ifdef CONFIG_X86_32 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); +#else return gsi; +#endif } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); - +#ifdef CONFIG_X86_32 + /* + * For GSI >= 64, use IRQ compression + */ + if ((gsi >= IRQ_COMPRESSION_START) + && (triggering == ACPI_LEVEL_SENSITIVE)) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + gsi = pci_irq++; + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } +#endif io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); return gsi; } + +#endif /* CONFIG_X86_IO_APIC */ #endif /* CONFIG_ACPI */ -- cgit v1.2.3 From 85bdddec5eaeb2464bf1cad6a17225416e65a8d6 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 4 Apr 2008 23:43:18 +0400 Subject: x86: merge mpparse_{32,64}.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/mpparse.c | 1102 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/mpparse_32.c | 1102 ------------------------------------------ arch/x86/kernel/mpparse_64.c | 991 ------------------------------------- 4 files changed, 1103 insertions(+), 2094 deletions(-) create mode 100644 arch/x86/kernel/mpparse.c delete mode 100644 arch/x86/kernel/mpparse_32.c delete mode 100644 arch/x86/kernel/mpparse_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0bf2fb55aa74..fdd8395e0ed3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,7 @@ obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o -obj-$(CONFIG_X86_MPPARSE) += mpparse_$(BITS).o +obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c new file mode 100644 index 000000000000..6e5e4547981c --- /dev/null +++ b/arch/x86/kernel/mpparse.c @@ -0,0 +1,1102 @@ +/* + * Intel Multiprocessor Specification 1.1 and 1.4 + * compliant MP-table parsing routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 2008 Alexey Starikovskiy + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef CONFIG_X86_32 +#include +#include +#endif + +/* Have we found an MP table */ +int smp_found_config; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); +int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; + +static int mp_current_pci_id; + +int pic_mode; + +/* + * Intel MP BIOS table parsing routines: + */ + +/* + * Checksum an MP configuration block. + */ + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum = 0; + + while (len--) + sum += *mp++; + + return sum & 0xFF; +} + +#ifdef CONFIG_X86_NUMAQ +/* + * Have to match translation table entries to main table entries by counter + * hence the mpc_record variable .... can't see a less disgusting way of + * doing this .... + */ + +static int mpc_record; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] + __cpuinitdata; +#endif + +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +{ + int apicid; + char *bootup_cpu = ""; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) { + disabled_cpus++; + return; + } +#ifdef CONFIG_X86_NUMAQ + apicid = mpc_apic_id(m, translation_table[mpc_record]); +#else + apicid = m->mpc_apicid; +#endif + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + bootup_cpu = " (Bootup-CPU)"; + boot_cpu_physical_apicid = m->mpc_apicid; + } + + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); + generic_processor_info(apicid, m->mpc_apicver); +} + +static void __init MP_bus_info(struct mpc_config_bus *m) +{ + char str[7]; + + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; + +#ifdef CONFIG_X86_NUMAQ + mpc_oem_bus_info(m, str, translation_table[mpc_record]); +#else + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); +#endif + +#if MAX_MP_BUSSES < 256 + if (m->mpc_busid >= MAX_MP_BUSSES) { + printk(KERN_WARNING "MP table busid value (%d) for bustype %s " + " is too large, max. supported is %d\n", + m->mpc_busid, str, MAX_MP_BUSSES - 1); + return; + } +#endif + + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; +#endif + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { +#ifdef CONFIG_X86_NUMAQ + mpc_oem_pci_bus(m, translation_table[mpc_record]); +#endif + clear_bit(m->mpc_busid, mp_bus_not_pci); + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; +#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; +#endif + } else + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); +} + +#ifdef CONFIG_X86_IO_APIC + +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + +static void __init MP_ioapic_info(struct mpc_config_ioapic *m) +{ + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + + if (bad_ioapic(m->mpc_apicaddr)) + return; + + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; +} + +static void __init MP_intsrc_info(struct mpc_config_intsrc *m) +{ + mp_irqs[mp_irq_entries] = *m; + Dprintk("Int: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +#endif + +static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) +{ + Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); +} + +#ifdef CONFIG_X86_NUMAQ +static void __init MP_translation_info(struct mpc_config_translation *m) +{ + printk(KERN_INFO + "Translation: record %d, type %d, quad %d, global %d, local %d\n", + mpc_record, m->trans_type, m->trans_quad, m->trans_global, + m->trans_local); + + if (mpc_record >= MAX_MPC_ENTRY) + printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); + else + translation_table[mpc_record] = m; /* stash this for later */ + if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) + node_set_online(m->trans_quad); +} + +/* + * Read/parse the MPC oem tables + */ + +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, + unsigned short oemsize) +{ + int count = sizeof(*oemtable); /* the header size */ + unsigned char *oemptr = ((unsigned char *)oemtable) + count; + + mpc_record = 0; + printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", + oemtable); + if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { + printk(KERN_WARNING + "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", + oemtable->oem_signature[0], oemtable->oem_signature[1], + oemtable->oem_signature[2], oemtable->oem_signature[3]); + return; + } + if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { + printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); + return; + } + while (count < oemtable->oem_length) { + switch (*oemptr) { + case MP_TRANSLATION: + { + struct mpc_config_translation *m = + (struct mpc_config_translation *)oemptr; + MP_translation_info(m); + oemptr += sizeof(*m); + count += sizeof(*m); + ++mpc_record; + break; + } + default: + { + printk(KERN_WARNING + "Unrecognised OEM table entry type! - %d\n", + (int)*oemptr); + return; + } + } + } +} + +static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (strncmp(oem, "IBM NUMA", 8)) + printk("Warning! May not be a NUMA-Q system!\n"); + if (mpc->mpc_oemptr) + smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, + mpc->mpc_oemsize); +} +#endif /* CONFIG_X86_NUMAQ */ + +/* + * Read/parse the MPC + */ + +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +{ + char str[16]; + char oem[10]; + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + + if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { + printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], mpc->mpc_signature[1], + mpc->mpc_signature[2], mpc->mpc_signature[3]); + return 0; + } + if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { + printk(KERN_ERR "MPTABLE: checksum error!\n"); + return 0; + } + if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", + mpc->mpc_spec); + return 0; + } + if (!mpc->mpc_lapic) { + printk(KERN_ERR "MPTABLE: null local APIC address!\n"); + return 0; + } + memcpy(oem, mpc->mpc_oem, 8); + oem[8] = 0; + printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); + + memcpy(str, mpc->mpc_productid, 12); + str[12] = 0; + printk("Product ID: %s ", str); + +#ifdef CONFIG_X86_32 + mps_oem_check(mpc, oem, str); +#endif + printk(KERN_INFO "MPTABLE: Product ID: %s ", str); + + printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default */ + if (!acpi_lapic) + mp_lapic_addr = mpc->mpc_lapic; + + if (early) + return 1; + + /* + * Now process the configuration blocks. + */ +#ifdef CONFIG_X86_NUMAQ + mpc_record = 0; +#endif + while (count < mpc->mpc_length) { + switch (*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m = + (struct mpc_config_processor *)mpt; + /* ACPI may have already provided this data */ + if (!acpi_lapic) + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m = + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { +#ifdef CONFIG_X86_IO_APIC + struct mpc_config_ioapic *m = + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); +#endif + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); + break; + } + case MP_INTSRC: + { +#ifdef CONFIG_X86_IO_APIC + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); +#endif + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m = + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + default: + { + count = mpc->mpc_length; + break; + } + } +#ifdef CONFIG_X86_NUMAQ + ++mpc_record; +#endif + } + setup_apic_routing(); + if (!num_processors) + printk(KERN_ERR "MPTABLE: no processors registered!\n"); + return num_processors; +} + +#ifdef CONFIG_X86_IO_APIC + +static int __init ELCR_trigger(unsigned int irq) +{ + unsigned int port; + + port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; +} + +static void __init construct_default_ioirq_mptable(int mpc_default_type) +{ + struct mpc_config_intsrc intsrc; + int i; + int ELCR_fallback = 0; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_srcbus = 0; + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + + intsrc.mpc_irqtype = mp_INT; + + /* + * If true, we have an ISA/PCI system with no IRQ entries + * in the MP table. To prevent the PCI interrupts from being set up + * incorrectly, we try to use the ELCR. The sanity check to see if + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can + * never be level sensitive, so we simply see if the ELCR agrees. + * If it does, we assume it's valid. + */ + if (mpc_default_type == 5) { + printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " + "falling back to ELCR\n"); + + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || + ELCR_trigger(13)) + printk(KERN_ERR "ELCR contains invalid data... " + "not using ELCR\n"); + else { + printk(KERN_INFO + "Using ELCR to identify PCI interrupts\n"); + ELCR_fallback = 1; + } + } + + for (i = 0; i < 16; i++) { + switch (mpc_default_type) { + case 2: + if (i == 0 || i == 13) + continue; /* IRQ0 & IRQ13 not connected */ + /* fall through */ + default: + if (i == 2) + continue; /* IRQ2 is never connected */ + } + + if (ELCR_fallback) { + /* + * If the ELCR indicates a level-sensitive interrupt, we + * copy that information over to the MP table in the + * irqflag field (level sensitive, active high polarity). + */ + if (ELCR_trigger(i)) + intsrc.mpc_irqflag = 13; + else + intsrc.mpc_irqflag = 0; + } + + intsrc.mpc_srcbusirq = i; + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + MP_intsrc_info(&intsrc); + } + + intsrc.mpc_irqtype = mp_ExtINT; + intsrc.mpc_srcbusirq = 0; + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + MP_intsrc_info(&intsrc); +} + +#endif + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_bus bus; +#ifdef CONFIG_X86_IO_APIC + struct mpc_config_ioapic ioapic; +#endif + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + switch (mpc_default_type) { + default: + printk(KERN_ERR "???\nUnknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { + bus.mpc_busid = 1; + memcpy(bus.mpc_bustype, "PCI ", 6); + MP_bus_info(&bus); + } + +#ifdef CONFIG_X86_IO_APIC + ioapic.mpc_type = MP_IOAPIC; + ioapic.mpc_apicid = 2; + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + ioapic.mpc_flags = MPC_APIC_USABLE; + ioapic.mpc_apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); + + /* + * We set up most of the low 16 IO-APIC pins according to MPS rules. + */ + construct_default_ioirq_mptable(mpc_default_type); +#endif + lintsrc.mpc_type = MP_LINTSRC; + lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_srcbusid = 0; + lintsrc.mpc_srcbusirq = 0; + lintsrc.mpc_destapic = MP_APIC_ALL; + for (i = 0; i < 2; i++) { + lintsrc.mpc_irqtype = linttypes[i]; + lintsrc.mpc_destapiclint = i; + MP_lintsrc_info(&lintsrc); + } +} + +static struct intel_mp_floating *mpf_found; + +/* + * Scan the memory blocks for an SMP configuration block. + */ +static void __init __get_smp_config(unsigned early) +{ + struct intel_mp_floating *mpf = mpf_found; + + if (acpi_lapic && early) + return; + /* + * ACPI supports both logical (e.g. Hyper-Threading) and physical + * processors, where MPS only supports physical. + */ + if (acpi_lapic && acpi_ioapic) { + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " + "information\n"); + return; + } else if (acpi_lapic) + printk(KERN_INFO "Using ACPI for processor (LAPIC) " + "configuration information\n"); + + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", + mpf->mpf_specification); +#ifdef CONFIG_X86_32 + if (mpf->mpf_feature2 & (1 << 7)) { + printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(KERN_INFO " Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } +#endif + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + if (early) { + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + return; + } + + printk(KERN_INFO "Default MP configuration #%d\n", + mpf->mpf_feature1); + construct_default_ISA_mptable(mpf->mpf_feature1); + + } else if (mpf->mpf_physptr) { + + /* + * Read the physical hardware table. Anything here will + * override the defaults. + */ + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { + smp_found_config = 0; + printk(KERN_ERR + "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. " + "(tell your hw vendor)\n"); + return; + } + + if (early) + return; +#ifdef CONFIG_X86_IO_APIC + /* + * If there are no explicit MP IRQ entries, then we are + * broken. We set up most of the low 16 IO-APIC pins to + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { + struct mpc_config_bus bus; + + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " + "using default mptable. " + "(tell your hw vendor)\n"); + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + memcpy(bus.mpc_bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); + } +#endif + } else + BUG(); + + if (!early) + printk(KERN_INFO "Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ +} + +void __init early_get_smp_config(void) +{ + __get_smp_config(1); +} + +void __init get_smp_config(void) +{ + __get_smp_config(0); +} + +static int __init smp_scan_config(unsigned long base, unsigned long length, + unsigned reserve) +{ + extern void __bad_mpf_size(void); + unsigned int *bp = phys_to_virt(base); + struct intel_mp_floating *mpf; + + Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); + if (sizeof(*mpf) != 16) + __bad_mpf_size(); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4))) { + + smp_found_config = 1; + mpf_found = mpf; +#ifdef CONFIG_X86_32 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", + mpf, virt_to_phys(mpf)); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, + BOOTMEM_DEFAULT); + if (mpf->mpf_physptr) { + /* + * We cannot access to MPC table to compute + * table size yet, as only few megabytes from + * the bottom is mapped now. + * PC-9800's MPC table places on the very last + * of physical memory; so that simply reserving + * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * in reserve_bootmem. + */ + unsigned long size = PAGE_SIZE; + unsigned long end = max_low_pfn * PAGE_SIZE; + if (mpf->mpf_physptr + size > end) + size = end - mpf->mpf_physptr; + reserve_bootmem(mpf->mpf_physptr, size, + BOOTMEM_DEFAULT); + } + +#else + if (!reserve) + return 1; + + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); + if (mpf->mpf_physptr) + reserve_bootmem_generic(mpf->mpf_physptr, + PAGE_SIZE); +#endif + return 1; + } + bp += 4; + length -= 16; + } + return 0; +} + +static void __init __find_smp_config(unsigned reserve) +{ + unsigned int address; + + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... + * + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0, 0x400, reserve) || + smp_scan_config(639 * 0x400, 0x400, reserve) || + smp_scan_config(0xF0000, 0x10000, reserve)) + return; + /* + * If it is an SMP machine we should know now, unless the + * configuration is in an EISA/MCA bus machine with an + * extended bios data area. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. + * + * NOTE! There are Linux loaders that will corrupt the EBDA + * area, and as such this kind of SMP config may be less + * trustworthy, simply because the SMP table may have been + * stomped on during early boot. These loaders are buggy and + * should be fixed. + * + * MP1.4 SPEC states to only scan first 1K of 4K EBDA. + */ + + address = get_bios_ebda(); + if (address) + smp_scan_config(address, 0x400, reserve); +} + +void __init early_find_smp_config(void) +{ + __find_smp_config(0); +} + +void __init find_smp_config(void) +{ + __find_smp_config(1); +} + +/* -------------------------------------------------------------------------- + ACPI-based MP Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI + +#ifdef CONFIG_X86_IO_APIC + +#define MP_ISA_BUS 0 +#define MP_MAX_IOAPIC_PIN 127 + +extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; + +static int mp_find_ioapic(int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_ioapic_routing[i].gsi_base) + && (gsi <= mp_ioapic_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + return -1; +} + +static u8 uniq_ioapic_id(u8 id) +{ +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else + int i; + DECLARE_BITMAP(used, 256); + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mpc_apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif +} + +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +{ + int idx = 0; + + if (bad_ioapic(address)) + return; + + idx = nr_ioapics; + + mp_ioapics[idx].mpc_type = MP_IOAPIC; + mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mpc_apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); +#ifdef CONFIG_X86_32 + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); +#else + mp_ioapics[idx].mpc_apicver = 0; +#endif + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). + */ + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; + mp_ioapic_routing[idx].gsi_base = gsi_base; + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); + + nr_ioapics++; +} + +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +{ + struct mpc_config_intsrc intsrc; + int ioapic = -1; + int pin = -1; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqtype = mp_INT; + intsrc.mpc_irqflag = (trigger << 2) | polarity; + intsrc.mpc_srcbus = MP_ISA_BUS; + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + intsrc.mpc_dstirq = pin; /* INTIN# */ + + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", + intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); + + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); +} + +int es7000_plat; + +void __init mp_config_acpi_legacy_irqs(void) +{ + struct mpc_config_intsrc intsrc; + int i = 0; + int ioapic = -1; + +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) + /* + * Fabricate the legacy ISA bus (bus #31). + */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +#endif + set_bit(MP_ISA_BUS, mp_bus_not_pci); + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + + /* + * Older generations of ES7000 have no legacy identity mappings + */ + if (es7000_plat == 1) + return; + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). + */ + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + return; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* Conforming */ + intsrc.mpc_srcbus = MP_ISA_BUS; +#ifdef CONFIG_X86_IO_APIC + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; +#endif + /* + * Use the default configuration for the IRQs 0-15. Unless + * overridden by (MADT) interrupt source override entries. + */ + for (i = 0; i < 16; i++) { + int idx; + + for (idx = 0; idx < mp_irq_entries; idx++) { + struct mpc_config_intsrc *irq = mp_irqs + idx; + + /* Do we already have a mapping for this ISA IRQ? */ + if (irq->mpc_srcbus == MP_ISA_BUS + && irq->mpc_srcbusirq == i) + break; + + /* Do we already have a mapping for this IOAPIC pin */ + if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && + (irq->mpc_dstirq == i)) + break; + } + + if (idx != mp_irq_entries) { + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); + continue; /* IRQ already used */ + } + + intsrc.mpc_irqtype = mp_INT; + intsrc.mpc_srcbusirq = i; /* Identity mapped */ + intsrc.mpc_dstirq = i; + + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " + "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, + intsrc.mpc_dstirq); + + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); + } +} + +int mp_register_gsi(u32 gsi, int triggering, int polarity) +{ + int ioapic = -1; + int ioapic_pin = 0; + int idx, bit = 0; +#ifdef CONFIG_X86_32 +#define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 + + static int pci_irq = IRQ_COMPRESSION_START; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; +#else + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; +#endif + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); + return gsi; + } + + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + +#ifdef CONFIG_X86_32 + if (ioapic_renumber_irq) + gsi = ioapic_renumber_irq(ioapic, gsi); +#endif + + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + bit = ioapic_pin % 32; + idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); + if (idx > 3) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); + return gsi; + } + if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +#ifdef CONFIG_X86_32 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); +#else + return gsi; +#endif + } + + mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); +#ifdef CONFIG_X86_32 + /* + * For GSI >= 64, use IRQ compression + */ + if ((gsi >= IRQ_COMPRESSION_START) + && (triggering == ACPI_LEVEL_SENSITIVE)) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + gsi = pci_irq++; + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } +#endif + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + return gsi; +} + +#endif /* CONFIG_X86_IO_APIC */ +#endif /* CONFIG_ACPI */ diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c deleted file mode 100644 index 6376791cffe5..000000000000 --- a/arch/x86/kernel/mpparse_32.c +++ /dev/null @@ -1,1102 +0,0 @@ -/* - * Intel Multiprocessor Specification 1.1 and 1.4 - * compliant MP-table parsing routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Fixes - * Erich Boleyn : MP v1.4 and additional changes. - * Alan Cox : Added EBDA scanning - * Ingo Molnar : various cleanups and rewrites - * Maciej W. Rozycki: Bits for default MP configurations - * Paul Diefenbaugh: Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* Have we found an MP table */ -int smp_found_config; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; -static int mp_current_pci_id; - -int pic_mode; - -/* - * Intel MP BIOS table parsing routines: - */ - -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -#ifdef CONFIG_X86_NUMAQ -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ - -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] - __cpuinitdata; -#endif - -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) -{ - int apicid; - char *bootup_cpu = ""; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) { - disabled_cpus++; - return; - } -#ifdef CONFIG_X86_NUMAQ - apicid = mpc_apic_id(m, translation_table[mpc_record]); -#else - apicid = m->mpc_apicid; -#endif - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - bootup_cpu = " (Bootup-CPU)"; - boot_cpu_physical_apicid = m->mpc_apicid; - } - - printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); - generic_processor_info(apicid, m->mpc_apicver); -} - -static void __init MP_bus_info(struct mpc_config_bus *m) -{ - char str[7]; - - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; - -#ifdef CONFIG_X86_NUMAQ - mpc_oem_bus_info(m, str, translation_table[mpc_record]); -#else - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); -#endif - -#if MAX_MP_BUSSES < 256 - if (m->mpc_busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->mpc_busid, str, MAX_MP_BUSSES - 1); - return; - } -#endif - - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; -#endif - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { -#ifdef CONFIG_X86_NUMAQ - mpc_oem_pci_bus(m, translation_table[mpc_record]); -#endif - clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; -#endif - } else - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); -} - -#ifdef CONFIG_X86_IO_APIC - -static int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); - return 1; - } - return 0; -} - -static void __init MP_ioapic_info(struct mpc_config_ioapic *m) -{ - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - - if (bad_ioapic(m->mpc_apicaddr)) - return; - - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; -} - -static void __init MP_intsrc_info(struct mpc_config_intsrc *m) -{ - mp_irqs[mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -#endif - -static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) -{ - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); -} - -#ifdef CONFIG_X86_NUMAQ -static void __init MP_translation_info(struct mpc_config_translation *m) -{ - printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", - mpc_record, m->trans_type, m->trans_quad, m->trans_global, - m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -/* - * Read/parse the MPC oem tables - */ - -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, - unsigned short oemsize) -{ - int count = sizeof(*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable) + count; - - mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", - oemtable); - if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { - printk(KERN_WARNING - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], oemtable->oem_signature[1], - oemtable->oem_signature[2], oemtable->oem_signature[3]); - return; - } - if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - while (count < oemtable->oem_length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_config_translation *m = - (struct mpc_config_translation *)oemptr; - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - { - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } - } - } -} - -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! May not be a NUMA-Q system!\n"); - if (mpc->mpc_oemptr) - smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, - mpc->mpc_oemsize); -} -#endif /* CONFIG_X86_NUMAQ */ - -/* - * Read/parse the MPC - */ - -static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) -{ - char str[16]; - char oem[10]; - int count = sizeof(*mpc); - unsigned char *mpt = ((unsigned char *)mpc) + count; - - if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { - printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], mpc->mpc_signature[1], - mpc->mpc_signature[2], mpc->mpc_signature[3]); - return 0; - } - if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { - printk(KERN_ERR "MPTABLE: checksum error!\n"); - return 0; - } - if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", - mpc->mpc_spec); - return 0; - } - if (!mpc->mpc_lapic) { - printk(KERN_ERR "MPTABLE: null local APIC address!\n"); - return 0; - } - memcpy(oem, mpc->mpc_oem, 8); - oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); - - memcpy(str, mpc->mpc_productid, 12); - str[12] = 0; - printk("Product ID: %s ", str); - -#ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); -#endif - printk(KERN_INFO "MPTABLE: Product ID: %s ", str); - - printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); - - /* save the local APIC address, it might be non-default */ - if (!acpi_lapic) - mp_lapic_addr = mpc->mpc_lapic; - - if (early) - return 1; - - /* - * Now process the configuration blocks. - */ -#ifdef CONFIG_X86_NUMAQ - mpc_record = 0; -#endif - while (count < mpc->mpc_length) { - switch (*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m = - (struct mpc_config_processor *)mpt; - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m = - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { -#ifdef CONFIG_X86_IO_APIC - struct mpc_config_ioapic *m = - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); -#endif - mpt += sizeof(struct mpc_config_ioapic); - count += sizeof(struct mpc_config_ioapic); - break; - } - case MP_INTSRC: - { -#ifdef CONFIG_X86_IO_APIC - struct mpc_config_intsrc *m = - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); -#endif - mpt += sizeof(struct mpc_config_intsrc); - count += sizeof(struct mpc_config_intsrc); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m = - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - default: - { - count = mpc->mpc_length; - break; - } - } -#ifdef CONFIG_X86_NUMAQ - ++mpc_record; -#endif - } - setup_apic_routing(); - if (!num_processors) - printk(KERN_ERR "MPTABLE: no processors registered!\n"); - return num_processors; -} - -#ifdef CONFIG_X86_IO_APIC - -static int __init ELCR_trigger(unsigned int irq) -{ - unsigned int port; - - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; -} - -static void __init construct_default_ioirq_mptable(int mpc_default_type) -{ - struct mpc_config_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ - intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; - - intsrc.mpc_irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " - "falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || - ELCR_trigger(13)) - printk(KERN_ERR "ELCR contains invalid data... " - "not using ELCR\n"); - else { - printk(KERN_INFO - "Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.mpc_irqflag = 13; - else - intsrc.mpc_irqflag = 0; - } - - intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); - } - - intsrc.mpc_irqtype = mp_ExtINT; - intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); -} - -#endif - -static inline void __init construct_default_ISA_mptable(int mpc_default_type) -{ - struct mpc_config_processor processor; - struct mpc_config_bus bus; -#ifdef CONFIG_X86_IO_APIC - struct mpc_config_ioapic ioapic; -#endif - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - switch (mpc_default_type) { - default: - printk(KERN_ERR "???\nUnknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.mpc_busid = 1; - memcpy(bus.mpc_bustype, "PCI ", 6); - MP_bus_info(&bus); - } - -#ifdef CONFIG_X86_IO_APIC - ioapic.mpc_type = MP_IOAPIC; - ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.mpc_flags = MPC_APIC_USABLE; - ioapic.mpc_apicaddr = 0xFEC00000; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); -#endif - lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ - lintsrc.mpc_srcbusid = 0; - lintsrc.mpc_srcbusirq = 0; - lintsrc.mpc_destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.mpc_irqtype = linttypes[i]; - lintsrc.mpc_destapiclint = i; - MP_lintsrc_info(&lintsrc); - } -} - -static struct intel_mp_floating *mpf_found; - -/* - * Scan the memory blocks for an SMP configuration block. - */ -static void __init __get_smp_config(unsigned early) -{ - struct intel_mp_floating *mpf = mpf_found; - - if (acpi_lapic && early) - return; - - /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " - "information\n"); - return; - } else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) " - "configuration information\n"); - - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); -#ifdef CONFIG_X86_32 - if (mpf->mpf_feature2 & (1 << 7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } -#endif - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - if (early) { - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - return; - } - - printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); - - } else if (mpf->mpf_physptr) { - - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { - smp_found_config = 0; - printk(KERN_ERR - "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. " - "(tell your hw vendor)\n"); - return; - } - - if (early) - return; -#ifdef CONFIG_X86_IO_APIC - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_config_bus bus; - - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " - "using default mptable. " - "(tell your hw vendor)\n"); - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - memcpy(bus.mpc_bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } -#endif - } else - BUG(); - - if (!early) - printk(KERN_INFO "Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ -} - -void __init early_get_smp_config(void) -{ - __get_smp_config(1); -} - -void __init get_smp_config(void) -{ - __get_smp_config(0); -} - -static int __init smp_scan_config(unsigned long base, unsigned long length, - unsigned reserve) -{ - extern void __bad_mpf_size(void); - unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); - if (sizeof(*mpf) != 16) - __bad_mpf_size(); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { - - smp_found_config = 1; - mpf_found = mpf; -#ifdef CONFIG_X86_32 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", - mpf, virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, - BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { - /* - * We cannot access to MPC table to compute - * table size yet, as only few megabytes from - * the bottom is mapped now. - * PC-9800's MPC table places on the very last - * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() - * in reserve_bootmem. - */ - unsigned long size = PAGE_SIZE; - unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size, - BOOTMEM_DEFAULT); - } - -#else - if (!reserve) - return 1; - - reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) - reserve_bootmem_generic(mpf->mpf_physptr, - PAGE_SIZE); -#endif - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -static void __init __find_smp_config(unsigned reserve) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0, 0x400, reserve) || - smp_scan_config(639 * 0x400, 0x400, reserve) || - smp_scan_config(0xF0000, 0x10000, reserve)) - return; - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - * - * MP1.4 SPEC states to only scan first 1K of 4K EBDA. - */ - - address = get_bios_ebda(); - if (address) - smp_scan_config(address, 0x400, reserve); -} - -void __init early_find_smp_config(void) -{ - __find_smp_config(0); -} - -void __init find_smp_config(void) -{ - __find_smp_config(1); -} - -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_IO_APIC - -#define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 - -extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic(int gsi) -{ - int i = 0; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - return -1; -} - -static u8 uniq_ioapic_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -#endif -} - -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) -{ - int idx = 0; - - if (bad_ioapic(address)) - return; - - idx = nr_ioapics; - - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].mpc_apicver = 0; -#endif - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - nr_ioapics++; -} - -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); -} - -int es7000_plat; - -void __init mp_config_acpi_legacy_irqs(void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; -#endif - set_bit(MP_ISA_BUS, mp_bus_not_pci); - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; - - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; -#ifdef CONFIG_X86_IO_APIC - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; -#endif - /* - * Use the default configuration for the IRQs 0-15. Unless - * overridden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); - } -} - -int mp_register_gsi(u32 gsi, int triggering, int polarity) -{ - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 - - static int pci_irq = IRQ_COMPRESSION_START; - /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; -#endif - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - -#ifdef CONFIG_X86_32 - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); -#endif - - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; - } - if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif - } - - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); -#ifdef CONFIG_X86_32 - /* - * For GSI >= 64, use IRQ compression - */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} - -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c deleted file mode 100644 index 7d742577d68a..000000000000 --- a/arch/x86/kernel/mpparse_64.c +++ /dev/null @@ -1,991 +0,0 @@ -/* - * Intel Multiprocessor Specification 1.1 and 1.4 - * compliant MP-table parsing routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Fixes - * Erich Boleyn : MP v1.4 and additional changes. - * Alan Cox : Added EBDA scanning - * Ingo Molnar : various cleanups and rewrites - * Maciej W. Rozycki: Bits for default MP configurations - * Paul Diefenbaugh: Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* Have we found an MP table */ -int smp_found_config; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; - -static int mp_current_pci_id = 0; - -/* - * Intel MP BIOS table parsing routines: - */ - -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) -{ - int apicid; - char *bootup_cpu = ""; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) { - disabled_cpus++; - return; - } -#ifdef CONFIG_X86_NUMAQ - apicid = mpc_apic_id(m, translation_table[mpc_record]); -#else - apicid = m->mpc_apicid; -#endif - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - bootup_cpu = " (Bootup-CPU)"; - boot_cpu_physical_apicid = m->mpc_apicid; - } - - printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); - generic_processor_info(apicid, m->mpc_apicver); -} - -static void __init MP_bus_info(struct mpc_config_bus *m) -{ - char str[7]; - - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; - -#ifdef CONFIG_X86_NUMAQ - mpc_oem_bus_info(m, str, translation_table[mpc_record]); -#else - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); -#endif - -#if MAX_MP_BUSSES < 256 - if (m->mpc_busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->mpc_busid, str, MAX_MP_BUSSES - 1); - return; - } -#endif - - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; -#endif - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { -#ifdef CONFIG_X86_NUMAQ - mpc_oem_pci_bus(m, translation_table[mpc_record]); -#endif - clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; -#endif - } else - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); -} - -static int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); - return 1; - } - return 0; -} - -static void __init MP_ioapic_info(struct mpc_config_ioapic *m) -{ - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk(KERN_INFO "I/O APIC #%d at 0x%X.\n", m->mpc_apicid, - m->mpc_apicaddr); - - if (bad_ioapic(m->mpc_apicaddr)) - return; - - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; -} - -static void __init MP_intsrc_info(struct mpc_config_intsrc *m) -{ - mp_irqs[mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries >= MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) -{ - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); -} - -/* - * Read/parse the MPC - */ -static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) -{ - char str[16]; - char oem[10]; - int count = sizeof(*mpc); - unsigned char *mpt = ((unsigned char *)mpc) + count; - - if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { - printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], mpc->mpc_signature[1], - mpc->mpc_signature[2], mpc->mpc_signature[3]); - return 0; - } - if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { - printk(KERN_ERR "MPTABLE: checksum error!\n"); - return 0; - } - if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", - mpc->mpc_spec); - return 0; - } - if (!mpc->mpc_lapic) { - printk(KERN_ERR "MPTABLE: null local APIC address!\n"); - return 0; - } - memcpy(oem, mpc->mpc_oem, 8); - oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); - - memcpy(str, mpc->mpc_productid, 12); - str[12] = 0; - printk("Product ID: %s ", str); - -#ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); -#endif - printk(KERN_INFO "MPTABLE: Product ID: %s ", str); - - printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); - - /* save the local APIC address, it might be non-default */ - if (!acpi_lapic) - mp_lapic_addr = mpc->mpc_lapic; - - if (early) - return 1; - - /* - * Now process the configuration blocks. - */ -#ifdef CONFIG_X86_NUMAQ - mpc_record = 0; -#endif - while (count < mpc->mpc_length) { - switch (*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m = - (struct mpc_config_processor *)mpt; - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m = - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m = - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m = - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m = - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - default: - { - count = mpc->mpc_length; - break; - } - } -#ifdef CONFIG_X86_NUMAQ - ++mpc_record; -#endif - } - setup_apic_routing(); - if (!num_processors) - printk(KERN_ERR "MPTABLE: no processors registered!\n"); - return num_processors; -} - -static int __init ELCR_trigger(unsigned int irq) -{ - unsigned int port; - - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; -} - -static void __init construct_default_ioirq_mptable(int mpc_default_type) -{ - struct mpc_config_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ - intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; - - intsrc.mpc_irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " - "falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || - ELCR_trigger(13)) - printk(KERN_ERR "ELCR contains invalid data... " - "not using ELCR\n"); - else { - printk(KERN_INFO - "Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.mpc_irqflag = 13; - else - intsrc.mpc_irqflag = 0; - } - - intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); - } - - intsrc.mpc_irqtype = mp_ExtINT; - intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); -} - -static inline void __init construct_default_ISA_mptable(int mpc_default_type) -{ - struct mpc_config_processor processor; - struct mpc_config_bus bus; - struct mpc_config_ioapic ioapic; - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - switch (mpc_default_type) { - default: - printk(KERN_ERR "???\nUnknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.mpc_busid = 1; - memcpy(bus.mpc_bustype, "PCI ", 6); - MP_bus_info(&bus); - } - - ioapic.mpc_type = MP_IOAPIC; - ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.mpc_flags = MPC_APIC_USABLE; - ioapic.mpc_apicaddr = 0xFEC00000; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); - - lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ - lintsrc.mpc_srcbusid = 0; - lintsrc.mpc_srcbusirq = 0; - lintsrc.mpc_destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.mpc_irqtype = linttypes[i]; - lintsrc.mpc_destapiclint = i; - MP_lintsrc_info(&lintsrc); - } -} - -static struct intel_mp_floating *mpf_found; - -/* - * Scan the memory blocks for an SMP configuration block. - */ -static void __init __get_smp_config(unsigned early) -{ - struct intel_mp_floating *mpf = mpf_found; - - if (acpi_lapic && early) - return; - /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " - "information\n"); - return; - } else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) " - "configuration information\n"); - - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); -#ifdef CONFIG_X86_32 - if (mpf->mpf_feature2 & (1 << 7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } -#endif - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - if (early) { - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - return; - } - - printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); - - } else if (mpf->mpf_physptr) { - - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { - smp_found_config = 0; - printk(KERN_ERR - "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. " - "(tell your hw vendor)\n"); - return; - } - - if (early) - return; - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_config_bus bus; - - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " - "using default mptable. " - "(tell your hw vendor)\n"); - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - memcpy(bus.mpc_bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } - - } else - BUG(); - - if (!early) - printk(KERN_INFO "Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ -} - -void __init early_get_smp_config(void) -{ - __get_smp_config(1); -} - -void __init get_smp_config(void) -{ - __get_smp_config(0); -} - -static int __init smp_scan_config(unsigned long base, unsigned long length, - unsigned reserve) -{ - extern void __bad_mpf_size(void); - unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); - if (sizeof(*mpf) != 16) - __bad_mpf_size(); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { - - smp_found_config = 1; - mpf_found = mpf; -#ifdef CONFIG_X86_32 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", - mpf, virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, - BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { - /* - * We cannot access to MPC table to compute - * table size yet, as only few megabytes from - * the bottom is mapped now. - * PC-9800's MPC table places on the very last - * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() - * in reserve_bootmem. - */ - unsigned long size = PAGE_SIZE; - unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size, - BOOTMEM_DEFAULT); - } - -#else - if (!reserve) - return 1; - - reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) - reserve_bootmem_generic(mpf->mpf_physptr, - PAGE_SIZE); -#endif - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -static void __init __find_smp_config(unsigned reserve) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0, 0x400, reserve) || - smp_scan_config(639 * 0x400, 0x400, reserve) || - smp_scan_config(0xF0000, 0x10000, reserve)) - return; - /* - * If it is an SMP machine we should know now. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - * - * MP1.4 SPEC states to only scan first 1K of 4K EBDA. - */ - - address = get_bios_ebda(); - if (address) - smp_scan_config(address, 0x400, reserve); -} - -void __init early_find_smp_config(void) -{ - __find_smp_config(0); -} - -void __init find_smp_config(void) -{ - __find_smp_config(1); -} - -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_IO_APIC - -#define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 - -extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic(int gsi) -{ - int i = 0; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - return -1; -} - -static u8 uniq_ioapic_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -#endif -} - -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) -{ - int idx = 0; - - if (bad_ioapic(address)) - return; - - idx = nr_ioapics; - - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].mpc_apicver = 0; -#endif - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - nr_ioapics++; -} - -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); -} - -int es7000_plat; - -void __init mp_config_acpi_legacy_irqs(void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; -#endif - set_bit(MP_ISA_BUS, mp_bus_not_pci); - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; - - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; - - /* - * Use the default configuration for the IRQs 0-15. Unless - * overridden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); - } -} - -int mp_register_gsi(u32 gsi, int triggering, int polarity) -{ - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 - - static int pci_irq = IRQ_COMPRESSION_START; - /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; -#endif - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - -#ifdef CONFIG_X86_32 - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); -#endif - - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; - } - if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif - } - - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); -#ifdef CONFIG_X86_32 - /* - * For GSI >= 64, use IRQ compression - */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} - -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ -- cgit v1.2.3 From 9d25cb0811fd0bca2cfd80095ee7663147363f68 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 5 Apr 2008 22:39:04 +0900 Subject: x86: avoid redundant loop in io_apic_level_ack_pending() If one can find an ack pending pin, there is no need to check the rest of them. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 65b6840e1820..cd01f3aa17ba 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -167,11 +167,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int value) writel(value, &io_apic->data); } -static int io_apic_level_ack_pending(unsigned int irq) +static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; unsigned long flags; - int pending = 0; spin_lock_irqsave(&ioapic_lock, flags); entry = irq_2_pin + irq; @@ -184,13 +183,17 @@ static int io_apic_level_ack_pending(unsigned int irq) break; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ - pending |= (reg >> 14) & 1; + if ((reg >> 14) & 1) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return true; + } if (!entry->next) break; entry = irq_2_pin + entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); - return pending; + + return false; } /* -- cgit v1.2.3 From a2249cba2f1d7d06633de09c71353ae6b1567206 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 5 Apr 2008 22:39:05 +0900 Subject: x86: use ioapic_read_entry() and ioapic_write_entry() Remove duplicate code by using ioapic_read_entry() and ioapic_write_entry() in io_apic_{32,64}.c Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 10 ++-------- arch/x86/kernel/io_apic_64.c | 25 ++++++------------------- 2 files changed, 8 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index db1b1f30b650..6d69de716b5e 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1228,7 +1228,6 @@ static void __init setup_IO_APIC_irqs(void) { struct IO_APIC_route_entry entry; int apic, pin, idx, irq, first_notcon = 1, vector; - unsigned long flags; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1294,9 +1293,7 @@ static void __init setup_IO_APIC_irqs(void) if (!apic && (irq < 16)) disable_8259A_irq(irq); } - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, entry); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry); } } @@ -2760,7 +2757,6 @@ int __init io_apic_get_redir_entries (int ioapic) int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) { struct IO_APIC_route_entry entry; - unsigned long flags; if (!IO_APIC_IRQ(irq)) { printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", @@ -2801,9 +2797,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a if (!ioapic && (irq < 16)) disable_8259A_irq(irq); - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(ioapic, pin, entry); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(ioapic, pin, entry); return 0; } diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index cd01f3aa17ba..6e383e126db2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -917,9 +917,8 @@ static void __init setup_IO_APIC_irqs(void) static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; - unsigned long flags; - memset(&entry,0,sizeof(entry)); + memset(&entry, 0, sizeof(entry)); disable_8259A_irq(0); @@ -947,10 +946,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry); enable_8259A_irq(0); } @@ -1611,17 +1607,14 @@ static inline void unlock_ExtINT_logic(void) int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; - unsigned long flags; pin = find_isa_irq_pin(8, mp_INT); apic = find_isa_irq_apic(8, mp_INT); if (pin == -1) return; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry0 = ioapic_read_entry(apic, pin); + clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -1634,10 +1627,7 @@ static inline void unlock_ExtINT_logic(void) entry1.trigger = 0; entry1.vector = 0; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry1); save_control = CMOS_READ(RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); @@ -1656,10 +1646,7 @@ static inline void unlock_ExtINT_logic(void) CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); clear_IO_APIC_pin(apic, pin); - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry0); } /* -- cgit v1.2.3 From 07004b12a1199f82c016eb976f493e5b70820a1d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 5 Apr 2008 22:39:06 +0900 Subject: x86: remove unnecessary memset() No need to clear the memory allocated by alloc_bootmem(). It is already filled with zero. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 6e383e126db2..616c53afb711 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2315,7 +2315,6 @@ static struct resource * __init ioapic_setup_resources(void) res = (void *)mem; if (mem != NULL) { - memset(mem, 0, n); mem += sizeof(struct resource) * nr_ioapics; for (i = 0; i < nr_ioapics; i++) { -- cgit v1.2.3 From addfc66bb55234c154bb43f0f7606bc5c9fc511d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 5 Apr 2008 22:39:07 +0900 Subject: x86: remove unnecessary tmp local variable There is no reason to use obscurer name. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 3 +-- arch/x86/kernel/io_apic_64.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 6d69de716b5e..2e2f42074e18 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1996,8 +1996,7 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < NR_IRQS ; irq++) { - int tmp = irq; - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { + if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 616c53afb711..09ea0ec39de6 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1527,8 +1527,7 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < NR_IRQS ; irq++) { - int tmp = irq; - if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) { + if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 -- cgit v1.2.3 From 7281c96f37f784d64b0241a7b082bb13bab9580b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 5 Apr 2008 22:39:08 +0900 Subject: x86: use cpumask_of_cpu() Use cpumask_of_cpu() rather than the pair of cpus_clear() and cpu_set(). Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 5 +---- arch/x86/kernel/io_apic_64.c | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 6a44e8dace37..1a9c68845ee8 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -138,12 +138,9 @@ static cpumask_t physflat_target_cpus(void) static cpumask_t physflat_vector_allocation_domain(int cpu) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + return cpumask_of_cpu(cpu); } - static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 09ea0ec39de6..b54464b26658 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1362,9 +1362,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - cpus_clear(mask); - cpu_set(first_cpu(cfg->domain), mask); - + mask = cpumask_of_cpu(first_cpu(cfg->domain)); send_IPI_mask(mask, cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); -- cgit v1.2.3 From 6107a7c4e2a871c37bb6c49e5e8286079f0968f9 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 5 Apr 2008 22:39:09 +0900 Subject: x86: use cpu_online() Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 66cd4afc1e57..9692202d3bfb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -411,12 +411,12 @@ static void native_machine_shutdown(void) #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_isset(reboot_cpu, cpu_online_map)) + cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_isset(reboot_cpu_id, cpu_online_map)) + if (!cpu_online(reboot_cpu_id)) reboot_cpu_id = smp_processor_id(); /* Make certain I only run on the appropriate processor */ -- cgit v1.2.3 From 711554dbc4d5402338ce115dca0df38e9f633330 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 7 Apr 2008 11:36:39 -0700 Subject: x86: print out buggy mptable print out buggy mptable, instead of skipping it quietly Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6e5e4547981c..70744e344fa1 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -390,10 +390,13 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) break; } default: - { - count = mpc->mpc_length; - break; - } + /* wrong mptable */ + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); + printk(KERN_ERR "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, + 1, mpc, mpc->mpc_length, 1); + count = mpc->mpc_length; + break; } #ifdef CONFIG_X86_NUMAQ ++mpc_record; -- cgit v1.2.3 From 63d38198a0f57dca87e6cb79931c7bedbb7ab069 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 7 Apr 2008 11:38:33 -0700 Subject: x86: fix paranoia about using BIOS quickboot mechanism. > > Make sure that we clear the "shutdown status flag" in the CMOS > > register after each CPU is brought up. This fixes a problem where the > > "shutdown status flag" may remain set when a CPU is brought up after > > booting. > > btw., what problem does this result in, exactly? The shutdown status flag set to "0xA", corresponds to "JMP double word request without INT init". This JMP at reboot time is at an unintended location. And results in Triple faults in our case. Though this error at reboot can be safely ignored in a VM environment, am not sure what the effect would be on a physical system. May be it will result in a triple fault and an eventual hardware reset thus masking this BUG in the kernel. This fix just makes sure that we reset that status flag after initialization is done. Fix paranoia about using BIOS quickboot mechanism. Make sure that we clear the "shutdown status flag" in the CMOS register after each CPU is brought up. This fixes a problem where the "shutdown status flag" may remain set when a CPU is brought up after booting. Signed-off-by: Alok N Kataria Signed-off-by: Dan Arai Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 21ad3f396a05..4517d1c01eb5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1001,6 +1001,11 @@ do_rest: /* mark "stuck" area as not stuck */ *((volatile unsigned long *)trampoline_base) = 0; + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + return boot_error; } @@ -1254,11 +1259,6 @@ void __init native_smp_prepare_boot_cpu(void) void __init native_smp_cpus_done(unsigned int max_cpus) { - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); - Dprintk("Boot done.\n"); impress_friends(); -- cgit v1.2.3 From df96323dfaebdf7e17cdf0656096e6ab2158ec76 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Fri, 11 Apr 2008 13:28:37 +0200 Subject: x86: section mismatch fixes, #1 This patch fixes mismatch warnings in smp_checks() (in arch/x86/kernel/smpboot.c): WARNING: arch/x86/kernel/built-in.o(.text+0x11922): Section mismatch in reference from the function smp_checks() to the variable .cpuinit.data:smp_b_stepping The function smp_checks() references the variable __cpuinitdata smp_b_stepping. This is often because smp_checks lacks a __cpuinitdata annotation or the annotation of smp_b_stepping is wrong. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4517d1c01eb5..ca3929b16049 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -437,7 +437,7 @@ valid_k7: #endif } -void smp_checks(void) +void __cpuinit smp_checks(void) { if (smp_b_stepping) printk(KERN_WARNING "WARNING: SMP operation may be unreliable" -- cgit v1.2.3 From e223f162a1d37aeeaa6c1ee37d81cc084aa2b004 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Thu, 10 Apr 2008 21:16:41 +0200 Subject: x86: setup_trampoline() - fix section mismatch warning this patch fixes section mismatch warnings (on x86_64 host) in setup_trampoline(), which was referencing __initdata variables trampoline_data and trampoline_end. Warning messages: WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x2b6a): Section mismatch in reference from the function setup_trampoline() to the variable .init.data:trampoline_data The function __cpuinit setup_trampoline() references a variable __initdata trampoline_data. If trampoline_data is only used by setup_trampoline then annotate trampoline_data with a matching annotation. WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x2b71): Section mismatch in reference from the function setup_trampoline() to the variable .init.data:trampoline_end The function __cpuinit setup_trampoline() references a variable __initdata trampoline_end. If trampoline_end is only used by setup_trampoline then annotate trampoline_end with a matching annotation. Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 4aedd0bcee4c..2a07e67d6697 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -32,7 +32,7 @@ /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU -.section .init.data, "aw", @progbits +.section .cpuinit.data, "aw", @progbits #else .section .rodata, "a", @progbits #endif -- cgit v1.2.3 From f49688d459c5eaa62db3597cbfd3cb13e361d415 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Fri, 22 Feb 2008 23:11:39 +0100 Subject: x86: coding style fixes to arch/x86/kernel/acpi/sleep.c Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 6bc815cd8cb3..dd78326ae47c 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -13,7 +13,7 @@ #include /* address in low memory of the wakeup routine. */ -unsigned long acpi_wakeup_address = 0; +unsigned long acpi_wakeup_address; unsigned long acpi_realmode_flags; extern char wakeup_start, wakeup_end; -- cgit v1.2.3 From e44b7b7525ad9d43163ab5e60c784325419e0ea6 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 10 Apr 2008 23:28:10 +0200 Subject: x86: move suspend wakeup code to C Move wakeup code to .c, so that video mode setting code can be shared between boot and wakeup. Remove nasty assembly code in 64-bit case by re-using trampoline code. Stack setup was fixed to clear high 16bits of %esp, maybe that fixes some machines. .c code sharing and morse code was done H. Peter Anvin, Sam Ravnborg reviewed kbuild related stuff, and it seems okay to him. Rafael did some cleanups. [rjw: * Made the patch stop breaking compilation on x86-32 * Added arch/x86/kernel/acpi/sleep.h * Got rid of compiler warnings in arch/x86/kernel/acpi/sleep.c * Fixed 32-bit compilation on x86-64 systems * Added include/asm-x86/trampoline.h and fixed the non-SMP compilation on 64-bit x86 * Removed arch/x86/kernel/acpi/sleep_32.c which was not used * Fixed some breakage caused by the integration of smpboot.c done under us in the meantime] Signed-off-by: Pavel Machek Signed-off-by: H. Peter Anvin Reviewed-by: Sam Ravnborg Signed-off-by: Rafael J. Wysocki Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/Makefile | 9 +- arch/x86/kernel/acpi/realmode/Makefile | 57 ++++++ arch/x86/kernel/acpi/realmode/copy.S | 1 + arch/x86/kernel/acpi/realmode/video-bios.c | 1 + arch/x86/kernel/acpi/realmode/video-mode.c | 1 + arch/x86/kernel/acpi/realmode/video-vesa.c | 1 + arch/x86/kernel/acpi/realmode/video-vga.c | 1 + arch/x86/kernel/acpi/realmode/wakemain.c | 81 ++++++++ arch/x86/kernel/acpi/realmode/wakeup.S | 113 +++++++++++ arch/x86/kernel/acpi/realmode/wakeup.h | 36 ++++ arch/x86/kernel/acpi/realmode/wakeup.lds.S | 61 ++++++ arch/x86/kernel/acpi/sleep.c | 71 +++++-- arch/x86/kernel/acpi/sleep.h | 16 ++ arch/x86/kernel/acpi/sleep_32.c | 40 ---- arch/x86/kernel/acpi/wakeup_32.S | 247 ++--------------------- arch/x86/kernel/acpi/wakeup_64.S | 313 +---------------------------- arch/x86/kernel/acpi/wakeup_rm.S | 10 + arch/x86/kernel/e820_64.c | 5 +- arch/x86/kernel/head_64.S | 4 - arch/x86/kernel/setup_32.c | 4 +- arch/x86/kernel/setup_64.c | 1 + arch/x86/kernel/smpboot.c | 6 +- arch/x86/kernel/trampoline_64.S | 5 - 23 files changed, 484 insertions(+), 600 deletions(-) create mode 100644 arch/x86/kernel/acpi/realmode/Makefile create mode 100644 arch/x86/kernel/acpi/realmode/copy.S create mode 100644 arch/x86/kernel/acpi/realmode/video-bios.c create mode 100644 arch/x86/kernel/acpi/realmode/video-mode.c create mode 100644 arch/x86/kernel/acpi/realmode/video-vesa.c create mode 100644 arch/x86/kernel/acpi/realmode/video-vga.c create mode 100644 arch/x86/kernel/acpi/realmode/wakemain.c create mode 100644 arch/x86/kernel/acpi/realmode/wakeup.S create mode 100644 arch/x86/kernel/acpi/realmode/wakeup.h create mode 100644 arch/x86/kernel/acpi/realmode/wakeup.lds.S create mode 100644 arch/x86/kernel/acpi/sleep.h delete mode 100644 arch/x86/kernel/acpi/sleep_32.c create mode 100644 arch/x86/kernel/acpi/wakeup_rm.S (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 19d3d6e9d09b..7335959b6aff 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,7 +1,14 @@ +subdir- := realmode + obj-$(CONFIG_ACPI) += boot.o -obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o +obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o processor.o endif +$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin + +$(obj)/realmode/wakeup.bin: FORCE + $(Q)$(MAKE) $(build)=$(obj)/realmode $@ + diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile new file mode 100644 index 000000000000..092900854acc --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -0,0 +1,57 @@ +# +# arch/x86/kernel/acpi/realmode/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# + +targets := wakeup.bin wakeup.elf + +wakeup-y += wakeup.o wakemain.o video-mode.o copy.o + +# The link order of the video-*.o modules can matter. In particular, +# video-vga.o *must* be listed first, followed by video-vesa.o. +# Hardware-specific drivers should follow in the order they should be +# probed, and video-bios.o should typically be last. +wakeup-y += video-vga.o +wakeup-y += video-vesa.o +wakeup-y += video-bios.o + +targets += $(wakeup-y) + +bootsrc := $(src)/../../../boot + +# --------------------------------------------------------------------------- + +# How to compile the 16-bit code. Note we always compile for -march=i386, +# that way we can complain to the user if the CPU is insufficient. +# Compile with _SETUP since this is similar to the boot-time setup code. +KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ + -I$(srctree)/$(bootsrc) \ + $(cflags-y) \ + -Wall -Wstrict-prototypes \ + -march=i386 -mregparm=3 \ + -include $(srctree)/$(bootsrc)/code16gcc.h \ + -fno-strict-aliasing -fomit-frame-pointer \ + $(call cc-option, -ffreestanding) \ + $(call cc-option, -fno-toplevel-reorder,\ + $(call cc-option, -fno-unit-at-a-time)) \ + $(call cc-option, -fno-stack-protector) \ + $(call cc-option, -mpreferred-stack-boundary=2) +KBUILD_CFLAGS += $(call cc-option, -m32) +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ + +WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) + +LDFLAGS_wakeup.elf := -T + +CPPFLAGS_wakeup.lds += -P -C + +$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE + $(call if_changed,ld) + +OBJCOPYFLAGS_wakeup.bin := -O binary + +$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE + $(call if_changed,objcopy) diff --git a/arch/x86/kernel/acpi/realmode/copy.S b/arch/x86/kernel/acpi/realmode/copy.S new file mode 100644 index 000000000000..dc59ebee69d8 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/copy.S @@ -0,0 +1 @@ +#include "../../../boot/copy.S" diff --git a/arch/x86/kernel/acpi/realmode/video-bios.c b/arch/x86/kernel/acpi/realmode/video-bios.c new file mode 100644 index 000000000000..7deabc144a27 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/video-bios.c @@ -0,0 +1 @@ +#include "../../../boot/video-bios.c" diff --git a/arch/x86/kernel/acpi/realmode/video-mode.c b/arch/x86/kernel/acpi/realmode/video-mode.c new file mode 100644 index 000000000000..328ad209f113 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/video-mode.c @@ -0,0 +1 @@ +#include "../../../boot/video-mode.c" diff --git a/arch/x86/kernel/acpi/realmode/video-vesa.c b/arch/x86/kernel/acpi/realmode/video-vesa.c new file mode 100644 index 000000000000..9dbb9672226a --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/video-vesa.c @@ -0,0 +1 @@ +#include "../../../boot/video-vesa.c" diff --git a/arch/x86/kernel/acpi/realmode/video-vga.c b/arch/x86/kernel/acpi/realmode/video-vga.c new file mode 100644 index 000000000000..bcc81255f374 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/video-vga.c @@ -0,0 +1 @@ +#include "../../../boot/video-vga.c" diff --git a/arch/x86/kernel/acpi/realmode/wakemain.c b/arch/x86/kernel/acpi/realmode/wakemain.c new file mode 100644 index 000000000000..883962d9eef2 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/wakemain.c @@ -0,0 +1,81 @@ +#include "wakeup.h" +#include "boot.h" + +static void udelay(int loops) +{ + while (loops--) + io_delay(); /* Approximately 1 us */ +} + +static void beep(unsigned int hz) +{ + u8 enable; + + if (!hz) { + enable = 0x00; /* Turn off speaker */ + } else { + u16 div = 1193181/hz; + + outb(0xb6, 0x43); /* Ctr 2, squarewave, load, binary */ + io_delay(); + outb(div, 0x42); /* LSB of counter */ + io_delay(); + outb(div >> 8, 0x42); /* MSB of counter */ + io_delay(); + + enable = 0x03; /* Turn on speaker */ + } + inb(0x61); /* Dummy read of System Control Port B */ + io_delay(); + outb(enable, 0x61); /* Enable timer 2 output to speaker */ + io_delay(); +} + +#define DOT_HZ 880 +#define DASH_HZ 587 +#define US_PER_DOT 125000 + +/* Okay, this is totally silly, but it's kind of fun. */ +static void send_morse(const char *pattern) +{ + char s; + + while ((s = *pattern++)) { + switch (s) { + case '.': + beep(DOT_HZ); + udelay(US_PER_DOT); + beep(0); + udelay(US_PER_DOT); + break; + case '-': + beep(DASH_HZ); + udelay(US_PER_DOT * 3); + beep(0); + udelay(US_PER_DOT); + break; + default: /* Assume it's a space */ + udelay(US_PER_DOT * 3); + break; + } + } +} + +void main(void) +{ + /* Kill machine if structures are wrong */ + if (wakeup_header.real_magic != 0x12345678) + while (1); + + if (wakeup_header.realmode_flags & 4) + send_morse("...-"); + + if (wakeup_header.realmode_flags & 1) + asm volatile("lcallw $0xc000,$3"); + + if (wakeup_header.realmode_flags & 2) { + /* Need to call BIOS */ + probe_cards(0); + set_mode(wakeup_header.video_mode); + } +} diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S new file mode 100644 index 000000000000..f9b77fb37e5b --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -0,0 +1,113 @@ +/* + * ACPI wakeup real mode startup stub + */ +#include +#include +#include +#include + + .code16 + .section ".header", "a" + +/* This should match the structure in wakeup.h */ + .globl wakeup_header +wakeup_header: +video_mode: .short 0 /* Video mode number */ +pmode_return: .byte 0x66, 0xea /* ljmpl */ + .long 0 /* offset goes here */ + .short __KERNEL_CS +pmode_cr0: .long 0 /* Saved %cr0 */ +pmode_cr3: .long 0 /* Saved %cr3 */ +pmode_cr4: .long 0 /* Saved %cr4 */ +pmode_efer: .quad 0 /* Saved EFER */ +pmode_gdt: .quad 0 +realmode_flags: .long 0 +real_magic: .long 0 +trampoline_segment: .word 0 +signature: .long 0x51ee1111 + + .text + .globl _start + .code16 +wakeup_code: +_start: + cli + cld + + /* Set up segments */ + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + movl $wakeup_stack_end, %esp + + /* Clear the EFLAGS */ + pushl $0 + popfl + + /* Check header signature... */ + movl signature, %eax + cmpl $0x51ee1111, %eax + jne bogus_real_magic + + /* Check we really have everything... */ + movl end_signature, %eax + cmpl $0x65a22c82, %eax + jne bogus_real_magic + + /* Call the C code */ + calll main + + /* Do any other stuff... */ + +#ifndef CONFIG_64BIT + /* This could also be done in C code... */ + movl pmode_cr3, %eax + movl %eax, %cr3 + + movl pmode_cr4, %ecx + jecxz 1f + movl %ecx, %cr4 +1: + movl pmode_efer, %eax + movl pmode_efer + 4, %edx + movl %eax, %ecx + orl %edx, %ecx + jz 1f + movl $0xc0000080, %ecx + wrmsr +1: + + lgdtl pmode_gdt + + /* This really couldn't... */ + movl pmode_cr0, %eax + movl %eax, %cr0 + jmp pmode_return +#else + pushw $0 + pushw trampoline_segment + pushw $0 + lret +#endif + +bogus_real_magic: +1: + hlt + jmp 1b + + .data + .balign 4 + .globl HEAP, heap_end +HEAP: + .long wakeup_heap +heap_end: + .long wakeup_stack + + .bss +wakeup_heap: + .space 2048 +wakeup_stack: + .space 2048 +wakeup_stack_end: diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h new file mode 100644 index 000000000000..ef8166fe8020 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/wakeup.h @@ -0,0 +1,36 @@ +/* + * Definitions for the wakeup data structure at the head of the + * wakeup code. + */ + +#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H +#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H + +#ifndef __ASSEMBLY__ +#include + +/* This must match data at wakeup.S */ +struct wakeup_header { + u16 video_mode; /* Video mode number */ + u16 _jmp1; /* ljmpl opcode, 32-bit only */ + u32 pmode_entry; /* Protected mode resume point, 32-bit only */ + u16 _jmp2; /* CS value, 32-bit only */ + u32 pmode_cr0; /* Protected mode cr0 */ + u32 pmode_cr3; /* Protected mode cr3 */ + u32 pmode_cr4; /* Protected mode cr4 */ + u32 pmode_efer_low; /* Protected mode EFER */ + u32 pmode_efer_high; + u64 pmode_gdt; + u32 realmode_flags; + u32 real_magic; + u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ + u32 signature; /* To check we have correct structure */ +} __attribute__((__packed__)); + +extern struct wakeup_header wakeup_header; +#endif + +#define HEADER_OFFSET 0x3f00 +#define WAKEUP_SIZE 0x4000 + +#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S new file mode 100644 index 000000000000..22fab6c4be15 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -0,0 +1,61 @@ +/* + * wakeup.ld + * + * Linker script for the real-mode wakeup code + */ +#undef i386 +#include "wakeup.h" + +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + . = HEADER_OFFSET; + .header : { + *(.header) + } + + . = 0; + .text : { + *(.text*) + } + + . = ALIGN(16); + .rodata : { + *(.rodata*) + } + + .videocards : { + video_cards = .; + *(.videocards) + video_cards_end = .; + } + + . = ALIGN(16); + .data : { + *(.data*) + } + + .signature : { + end_signature = .; + LONG(0x65a22c82) + } + + . = ALIGN(16); + .bss : { + __bss_start = .; + *(.bss) + __bss_end = .; + } + + . = ALIGN(16); + _end = .; + + /DISCARD/ : { + *(.note*) + } + + . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); +} diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index dd78326ae47c..afc25ee9964b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -10,30 +10,72 @@ #include #include -#include +#include "realmode/wakeup.h" +#include "sleep.h" -/* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address; unsigned long acpi_realmode_flags; -extern char wakeup_start, wakeup_end; -extern unsigned long acpi_copy_wakeup_routine(unsigned long); +/* address in low memory of the wakeup routine. */ +static unsigned long acpi_realmode; + +#ifdef CONFIG_64BIT +static char temp_stack[10240]; +#endif /** * acpi_save_state_mem - save kernel state * * Create an identity mapped page table and copy the wakeup routine to * low memory. + * + * Note that this is too late to change acpi_wakeup_address. */ int acpi_save_state_mem(void) { - if (!acpi_wakeup_address) { - printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n"); + struct wakeup_header *header; + + if (!acpi_realmode) { + printk(KERN_ERR "Could not allocate memory during boot, " + "S3 disabled\n"); return -ENOMEM; } - memcpy((void *)acpi_wakeup_address, &wakeup_start, - &wakeup_end - &wakeup_start); - acpi_copy_wakeup_routine(acpi_wakeup_address); + memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE); + + header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET); + if (header->signature != 0x51ee1111) { + printk(KERN_ERR "wakeup header does not match\n"); + return -EINVAL; + } + + header->video_mode = saved_video_mode; + +#ifndef CONFIG_64BIT + store_gdt((struct desc_ptr *)&header->pmode_gdt); + + header->pmode_efer_low = nx_enabled; + if (header->pmode_efer_low & 1) { + /* This is strange, why not save efer, always? */ + rdmsr(MSR_EFER, header->pmode_efer_low, + header->pmode_efer_high); + } +#endif /* !CONFIG_64BIT */ + + header->pmode_cr0 = read_cr0(); + header->pmode_cr4 = read_cr4(); + header->realmode_flags = acpi_realmode_flags; + header->real_magic = 0x12345678; + +#ifndef CONFIG_64BIT + header->pmode_entry = (u32)&wakeup_pmode_return; + header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET); + saved_magic = 0x12345678; +#else /* CONFIG_64BIT */ + header->trampoline_segment = setup_trampoline() >> 4; + init_rsp = (unsigned long)temp_stack + 4096; + initial_code = (unsigned long)wakeup_long64; + saved_magic = 0x123456789abcdef0; +#endif /* CONFIG_64BIT */ return 0; } @@ -56,15 +98,20 @@ void acpi_restore_state_mem(void) */ void __init acpi_reserve_bootmem(void) { - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) { + if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n"); return; } - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); - if (!acpi_wakeup_address) + acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); + + if (!acpi_realmode) { printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); + return; + } + + acpi_wakeup_address = acpi_realmode; } diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h new file mode 100644 index 000000000000..adbcbaa6f1df --- /dev/null +++ b/arch/x86/kernel/acpi/sleep.h @@ -0,0 +1,16 @@ +/* + * Variables and functions used by the code in sleep.c + */ + +#include + +extern char wakeup_code_start, wakeup_code_end; + +extern unsigned long saved_video_mode; +extern long saved_magic; + +extern int wakeup_pmode_return; +extern char swsusp_pg_dir[PAGE_SIZE]; + +extern unsigned long acpi_copy_wakeup_routine(unsigned long); +extern void wakeup_long64(void); diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c deleted file mode 100644 index 63fe5525e026..000000000000 --- a/arch/x86/kernel/acpi/sleep_32.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * sleep.c - x86-specific ACPI sleep support. - * - * Copyright (C) 2001-2003 Patrick Mochel - * Copyright (C) 2001-2003 Pavel Machek - */ - -#include -#include -#include -#include - -#include - -/* Ouch, we want to delete this. We already have better version in userspace, in - s2ram from suspend.sf.net project */ -static __init int reset_videomode_after_s3(const struct dmi_system_id *d) -{ - acpi_realmode_flags |= 2; - return 0; -} - -static __initdata struct dmi_system_id acpisleep_dmi_table[] = { - { /* Reset video mode after returning from ACPI S3 sleep */ - .callback = reset_videomode_after_s3, - .ident = "Toshiba Satellite 4030cdt", - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - }, - }, - {} -}; - -static int __init acpisleep_dmi_init(void) -{ - dmi_check_system(acpisleep_dmi_table); - return 0; -} - -core_initcall(acpisleep_dmi_init); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index f53e3277f8e5..a12e6a9fb659 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -3,178 +3,12 @@ #include #include -# -# wakeup_code runs in real mode, and at unknown address (determined at run-time). -# Therefore it must only use relative jumps/calls. -# -# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled -# -# If physical address of wakeup_code is 0x12345, BIOS should call us with -# cs = 0x1234, eip = 0x05 -# - -#define BEEP \ - inb $97, %al; \ - outb %al, $0x80; \ - movb $3, %al; \ - outb %al, $97; \ - outb %al, $0x80; \ - movb $-74, %al; \ - outb %al, $67; \ - outb %al, $0x80; \ - movb $-119, %al; \ - outb %al, $66; \ - outb %al, $0x80; \ - movb $15, %al; \ - outb %al, $66; - -ALIGN - .align 4096 -ENTRY(wakeup_start) -wakeup_code: - wakeup_code_start = . - .code16 - - cli - cld - - # setup data segment - movw %cs, %ax - movw %ax, %ds # Make ds:0 point to wakeup_start - movw %ax, %ss - - testl $4, realmode_flags - wakeup_code - jz 1f - BEEP -1: - mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board - - pushl $0 # Kill any dangerous flags - popfl - - movl real_magic - wakeup_code, %eax - cmpl $0x12345678, %eax - jne bogus_real_magic - - testl $1, realmode_flags - wakeup_code - jz 1f - lcall $0xc000,$3 - movw %cs, %ax - movw %ax, %ds # Bios might have played with that - movw %ax, %ss -1: - - testl $2, realmode_flags - wakeup_code - jz 1f - mov video_mode - wakeup_code, %ax - call mode_set -1: - - # set up page table - movl $swsusp_pg_dir-__PAGE_OFFSET, %eax - movl %eax, %cr3 - - testl $1, real_efer_save_restore - wakeup_code - jz 4f - # restore efer setting - movl real_save_efer_edx - wakeup_code, %edx - movl real_save_efer_eax - wakeup_code, %eax - mov $0xc0000080, %ecx - wrmsr -4: - # make sure %cr4 is set correctly (features, etc) - movl real_save_cr4 - wakeup_code, %eax - movl %eax, %cr4 - - # need a gdt -- use lgdtl to force 32-bit operands, in case - # the GDT is located past 16 megabytes. - lgdtl real_save_gdt - wakeup_code - - movl real_save_cr0 - wakeup_code, %eax - movl %eax, %cr0 - jmp 1f -1: - movl real_magic - wakeup_code, %eax - cmpl $0x12345678, %eax - jne bogus_real_magic - - testl $8, realmode_flags - wakeup_code - jz 1f - BEEP -1: - ljmpl $__KERNEL_CS, $wakeup_pmode_return - -real_save_gdt: .word 0 - .long 0 -real_save_cr0: .long 0 -real_save_cr3: .long 0 -real_save_cr4: .long 0 -real_magic: .long 0 -video_mode: .long 0 -realmode_flags: .long 0 -real_efer_save_restore: .long 0 -real_save_efer_edx: .long 0 -real_save_efer_eax: .long 0 - -bogus_real_magic: - jmp bogus_real_magic - -/* This code uses an extended set of video mode numbers. These include: - * Aliases for standard modes - * NORMAL_VGA (-1) - * EXTENDED_VGA (-2) - * ASK_VGA (-3) - * Video modes numbered by menu position -- NOT RECOMMENDED because of lack - * of compatibility when extending the table. These are between 0x00 and 0xff. - */ -#define VIDEO_FIRST_MENU 0x0000 - -/* Standard BIOS video modes (BIOS number + 0x0100) */ -#define VIDEO_FIRST_BIOS 0x0100 - -/* VESA BIOS video modes (VESA number + 0x0200) */ -#define VIDEO_FIRST_VESA 0x0200 - -/* Video7 special modes (BIOS number + 0x0900) */ -#define VIDEO_FIRST_V7 0x0900 - -# Setting of user mode (AX=mode ID) => CF=success - -# For now, we only handle VESA modes (0x0200..0x03ff). To handle other -# modes, we should probably compile in the video code from the boot -# directory. -mode_set: - movw %ax, %bx - subb $VIDEO_FIRST_VESA>>8, %bh - cmpb $2, %bh - jb check_vesa - -setbad: - clc - ret - -check_vesa: - orw $0x4000, %bx # Use linear frame buffer - movw $0x4f02, %ax # VESA BIOS mode set call - int $0x10 - cmpw $0x004f, %ax # AL=4f if implemented - jnz setbad # AH=0 if OK - - stc - ret +# Copyright 2003, 2008 Pavel Machek , distribute under GPLv2 .code32 ALIGN -.org 0x800 -wakeup_stack_begin: # Stack grows down - -.org 0xff0 # Just below end of page -wakeup_stack: -ENTRY(wakeup_end) - -.org 0x1000 - +ENTRY(wakeup_pmode_return) wakeup_pmode_return: movw $__KERNEL_DS, %ax movw %ax, %ss @@ -187,7 +21,7 @@ wakeup_pmode_return: lgdt saved_gdt lidt saved_idt lldt saved_ldt - ljmp $(__KERNEL_CS),$1f + ljmp $(__KERNEL_CS), $1f 1: movl %cr3, %eax movl %eax, %cr3 @@ -201,82 +35,41 @@ wakeup_pmode_return: jne bogus_magic # jump to place where we left off - movl saved_eip,%eax + movl saved_eip, %eax jmp *%eax bogus_magic: jmp bogus_magic -## -# acpi_copy_wakeup_routine -# -# Copy the above routine to low memory. -# -# Parameters: -# %eax: place to copy wakeup routine to -# -# Returned address is location of code in low memory (past data and stack) -# -ENTRY(acpi_copy_wakeup_routine) - pushl %ebx +save_registers: sgdt saved_gdt sidt saved_idt sldt saved_ldt str saved_tss - movl nx_enabled, %edx - movl %edx, real_efer_save_restore - wakeup_start (%eax) - testl $1, real_efer_save_restore - wakeup_start (%eax) - jz 2f - # save efer setting - pushl %eax - movl %eax, %ebx - mov $0xc0000080, %ecx - rdmsr - movl %edx, real_save_efer_edx - wakeup_start (%ebx) - movl %eax, real_save_efer_eax - wakeup_start (%ebx) - popl %eax -2: - - movl %cr3, %edx - movl %edx, real_save_cr3 - wakeup_start (%eax) - movl %cr4, %edx - movl %edx, real_save_cr4 - wakeup_start (%eax) - movl %cr0, %edx - movl %edx, real_save_cr0 - wakeup_start (%eax) - sgdt real_save_gdt - wakeup_start (%eax) - - movl saved_videomode, %edx - movl %edx, video_mode - wakeup_start (%eax) - movl acpi_realmode_flags, %edx - movl %edx, realmode_flags - wakeup_start (%eax) - movl $0x12345678, real_magic - wakeup_start (%eax) - movl $0x12345678, saved_magic - popl %ebx - ret - -save_registers: leal 4(%esp), %eax movl %eax, saved_context_esp - movl %ebx, saved_context_ebx - movl %ebp, saved_context_ebp - movl %esi, saved_context_esi - movl %edi, saved_context_edi - pushfl ; popl saved_context_eflags - - movl $ret_point, saved_eip + movl %ebx, saved_context_ebx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl + popl saved_context_eflags + + movl $ret_point, saved_eip ret restore_registers: - movl saved_context_ebp, %ebp - movl saved_context_ebx, %ebx - movl saved_context_esi, %esi - movl saved_context_edi, %edi - pushl saved_context_eflags ; popfl - ret + movl saved_context_ebp, %ebp + movl saved_context_ebx, %ebx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + pushl saved_context_eflags + popfl + ret ENTRY(do_suspend_lowlevel) call save_processor_state diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 2e1b9e0d0767..bcc293423a70 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -7,191 +7,18 @@ #include # Copyright 2003 Pavel Machek , distribute under GPLv2 -# -# wakeup_code runs in real mode, and at unknown address (determined at run-time). -# Therefore it must only use relative jumps/calls. -# -# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled -# -# If physical address of wakeup_code is 0x12345, BIOS should call us with -# cs = 0x1234, eip = 0x05 -# - -#define BEEP \ - inb $97, %al; \ - outb %al, $0x80; \ - movb $3, %al; \ - outb %al, $97; \ - outb %al, $0x80; \ - movb $-74, %al; \ - outb %al, $67; \ - outb %al, $0x80; \ - movb $-119, %al; \ - outb %al, $66; \ - outb %al, $0x80; \ - movb $15, %al; \ - outb %al, $66; - - -ALIGN - .align 16 -ENTRY(wakeup_start) -wakeup_code: - wakeup_code_start = . - .code16 - -# Running in *copy* of this code, somewhere in low 1MB. - - cli - cld - # setup data segment - movw %cs, %ax - movw %ax, %ds # Make ds:0 point to wakeup_start - movw %ax, %ss - - # Data segment must be set up before we can see whether to beep. - testl $4, realmode_flags - wakeup_code - jz 1f - BEEP -1: - - # Private stack is needed for ASUS board - mov $(wakeup_stack - wakeup_code), %sp - - pushl $0 # Kill any dangerous flags - popfl - - movl real_magic - wakeup_code, %eax - cmpl $0x12345678, %eax - jne bogus_real_magic - - testl $1, realmode_flags - wakeup_code - jz 1f - lcall $0xc000,$3 - movw %cs, %ax - movw %ax, %ds # Bios might have played with that - movw %ax, %ss -1: - - testl $2, realmode_flags - wakeup_code - jz 1f - mov video_mode - wakeup_code, %ax - call mode_set -1: - - mov %ds, %ax # Find 32bit wakeup_code addr - movzx %ax, %esi # (Convert %ds:gdt to a liner ptr) - shll $4, %esi - # Fix up the vectors - addl %esi, wakeup_32_vector - wakeup_code - addl %esi, wakeup_long64_vector - wakeup_code - addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer - - lidtl %ds:idt_48a - wakeup_code - lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is - # appropriate - - movl $1, %eax # protected mode (PE) bit - lmsw %ax # This is it! - jmp 1f -1: - - ljmpl *(wakeup_32_vector - wakeup_code) - - .balign 4 -wakeup_32_vector: - .long wakeup_32 - wakeup_code - .word __KERNEL32_CS, 0 - - .code32 -wakeup_32: -# Running in this code, but at low address; paging is not yet turned on. - - movl $__KERNEL_DS, %eax - movl %eax, %ds - - /* - * Prepare for entering 64bits mode - */ - - /* Enable PAE */ - xorl %eax, %eax - btsl $5, %eax - movl %eax, %cr4 - - /* Setup early boot stage 4 level pagetables */ - leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax - movl %eax, %cr3 - - /* Check if nx is implemented */ - movl $0x80000001, %eax - cpuid - movl %edx,%edi - - /* Enable Long Mode */ - xorl %eax, %eax - btsl $_EFER_LME, %eax - - /* No Execute supported? */ - btl $20,%edi - jnc 1f - btsl $_EFER_NX, %eax - - /* Make changes effective */ -1: movl $MSR_EFER, %ecx - xorl %edx, %edx - wrmsr - - xorl %eax, %eax - btsl $31, %eax /* Enable paging and in turn activate Long Mode */ - btsl $0, %eax /* Enable protected mode */ - - /* Make changes effective */ - movl %eax, %cr0 - - /* At this point: - CR4.PAE must be 1 - CS.L must be 0 - CR3 must point to PML4 - Next instruction must be a branch - This must be on identity-mapped page - */ - /* - * At this point we're in long mode but in 32bit compatibility mode - * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn - * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load - * the new gdt/idt that has __KERNEL_CS with CS.L = 1. - */ - - /* Finally jump in 64bit mode */ - ljmp *(wakeup_long64_vector - wakeup_code)(%esi) - - .balign 4 -wakeup_long64_vector: - .long wakeup_long64 - wakeup_code - .word __KERNEL_CS, 0 .code64 - - /* Hooray, we are in Long 64-bit mode (but still running in - * low memory) - */ -wakeup_long64: /* - * We must switch to a new descriptor in kernel space for the GDT - * because soon the kernel won't have access anymore to the userspace - * addresses where we're currently running on. We have to do that here - * because in 32bit we couldn't load a 64bit linear address. + * Hooray, we are in Long 64-bit mode (but still running in low memory) */ - lgdt cpu_gdt_descr - - movq saved_magic, %rax - movq $0x123456789abcdef0, %rdx - cmpq %rdx, %rax - jne bogus_64_magic +ENTRY(wakeup_long64) +wakeup_long64: + movq saved_magic, %rax + movq $0x123456789abcdef0, %rdx + cmpq %rdx, %rax + jne bogus_64_magic - nop - nop movw $__KERNEL_DS, %ax movw %ax, %ss movw %ax, %ds @@ -208,130 +35,8 @@ wakeup_long64: movq saved_rip, %rax jmp *%rax -.code32 - - .align 64 -gdta: - /* Its good to keep gdt in sync with one in trampoline.S */ - .word 0, 0, 0, 0 # dummy - /* ??? Why I need the accessed bit set in order for this to work? */ - .quad 0x00cf9b000000ffff # __KERNEL32_CS - .quad 0x00af9b000000ffff # __KERNEL_CS - .quad 0x00cf93000000ffff # __KERNEL_DS - -idt_48a: - .word 0 # idt limit = 0 - .word 0, 0 # idt base = 0L - -gdt_48a: - .word 0x800 # gdt limit=2048, - # 256 GDT entries - .long gdta - wakeup_code # gdt base (relocated in later) - -real_magic: .quad 0 -video_mode: .quad 0 -realmode_flags: .quad 0 - -.code16 -bogus_real_magic: - jmp bogus_real_magic - -.code64 bogus_64_magic: - jmp bogus_64_magic - -/* This code uses an extended set of video mode numbers. These include: - * Aliases for standard modes - * NORMAL_VGA (-1) - * EXTENDED_VGA (-2) - * ASK_VGA (-3) - * Video modes numbered by menu position -- NOT RECOMMENDED because of lack - * of compatibility when extending the table. These are between 0x00 and 0xff. - */ -#define VIDEO_FIRST_MENU 0x0000 - -/* Standard BIOS video modes (BIOS number + 0x0100) */ -#define VIDEO_FIRST_BIOS 0x0100 - -/* VESA BIOS video modes (VESA number + 0x0200) */ -#define VIDEO_FIRST_VESA 0x0200 - -/* Video7 special modes (BIOS number + 0x0900) */ -#define VIDEO_FIRST_V7 0x0900 - -# Setting of user mode (AX=mode ID) => CF=success - -# For now, we only handle VESA modes (0x0200..0x03ff). To handle other -# modes, we should probably compile in the video code from the boot -# directory. -.code16 -mode_set: - movw %ax, %bx - subb $VIDEO_FIRST_VESA>>8, %bh - cmpb $2, %bh - jb check_vesa - -setbad: - clc - ret - -check_vesa: - orw $0x4000, %bx # Use linear frame buffer - movw $0x4f02, %ax # VESA BIOS mode set call - int $0x10 - cmpw $0x004f, %ax # AL=4f if implemented - jnz setbad # AH=0 if OK - - stc - ret - -wakeup_stack_begin: # Stack grows down - -.org 0xff0 -wakeup_stack: # Just below end of page - -.org 0x1000 -ENTRY(wakeup_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 510,8,0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE - -ENTRY(wakeup_end) - -## -# acpi_copy_wakeup_routine -# -# Copy the above routine to low memory. -# -# Parameters: -# %rdi: place to copy wakeup routine to -# -# Returned address is location of code in low memory (past data and stack) -# - .code64 -ENTRY(acpi_copy_wakeup_routine) - pushq %rax - pushq %rdx - - movl saved_video_mode, %edx - movl %edx, video_mode - wakeup_start (,%rdi) - movl acpi_realmode_flags, %edx - movl %edx, realmode_flags - wakeup_start (,%rdi) - movq $0x12345678, real_magic - wakeup_start (,%rdi) - movq $0x123456789abcdef0, %rdx - movq %rdx, saved_magic - - movq saved_magic, %rax - movq $0x123456789abcdef0, %rdx - cmpq %rdx, %rax - jne bogus_64_magic - - # restore the regs we used - popq %rdx - popq %rax -ENTRY(do_suspend_lowlevel_s4bios) - ret + jmp bogus_64_magic .align 2 .p2align 4,,15 @@ -414,7 +119,7 @@ do_suspend_lowlevel: jmp restore_processor_state .LFE5: .Lfe5: - .size do_suspend_lowlevel,.Lfe5-do_suspend_lowlevel + .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel .data ALIGN diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S new file mode 100644 index 000000000000..6ff3b5730575 --- /dev/null +++ b/arch/x86/kernel/acpi/wakeup_rm.S @@ -0,0 +1,10 @@ +/* + * Wrapper script for the realmode binary as a transport object + * before copying to low memory. + */ + .section ".rodata","a" + .globl wakeup_code_start, wakeup_code_end +wakeup_code_start: + .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" +wakeup_code_end: + .size wakeup_code_start, .-wakeup_code_start diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index a720f3d5ed9d..7f6c0c85c8f6 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -27,6 +27,7 @@ #include #include #include +#include struct e820map e820; @@ -58,8 +59,8 @@ struct early_res { }; static struct early_res early_res[MAX_EARLY_RES] __initdata = { { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ -#ifdef CONFIG_SMP - { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" }, +#ifdef CONFIG_X86_TRAMPOLINE + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, #endif {} }; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c1d7a877d814..10a1955bb1d1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -132,10 +132,6 @@ ident_complete: addq %rbp, trampoline_level4_pgt + 0(%rip) addq %rbp, trampoline_level4_pgt + (511*8)(%rip) #endif -#ifdef CONFIG_ACPI_SLEEP - addq %rbp, wakeup_level4_pgt + 0(%rip) - addq %rbp, wakeup_level4_pgt + (511*8)(%rip) -#endif /* Due to ENTRY(), sometimes the empty space gets filled with * zeros. Better take a jmp than relying on empty space being diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 4b198d9d0de3..5b0bffb7fcc9 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -192,7 +192,7 @@ EXPORT_SYMBOL(ist_info); extern void early_cpu_init(void); extern int root_mountflags; -unsigned long saved_videomode; +unsigned long saved_video_mode; #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 @@ -763,7 +763,7 @@ void __init setup_arch(char **cmdline_p) edid_info = boot_params.edid_info; apm_info.bios = boot_params.apm_bios_info; ist_info = boot_params.ist_info; - saved_videomode = boot_params.hdr.vid_mode; + saved_video_mode = boot_params.hdr.vid_mode; if( boot_params.sys_desc_table.length != 0 ) { set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); machine_id = boot_params.sys_desc_table.table[0]; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b80300710c08..674ef3510cdf 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ca3929b16049..424600e671bd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -140,7 +141,7 @@ static atomic_t init_deasserted; static int boot_cpu_logical_apicid; /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ -unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); +unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -554,8 +555,7 @@ cpumask_t cpu_coregroup_map(int cpu) * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ - -unsigned long __cpuinit setup_trampoline(void) +unsigned long setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 2a07e67d6697..894293c598db 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -30,12 +30,7 @@ #include #include -/* We can free up trampoline after bootup if cpu hotplug is not supported. */ -#ifndef CONFIG_HOTPLUG_CPU -.section .cpuinit.data, "aw", @progbits -#else .section .rodata, "a", @progbits -#endif .code16 -- cgit v1.2.3 From 77ad386e596c6b0930cc2e09e3cce485e3ee7f72 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Mar 2008 15:23:19 +0100 Subject: x86: standalone trampoline code move the trampoline setup code out of smpboot.c - UP kernels can have suspend support too. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/smpboot.c | 15 --------------- arch/x86/kernel/trampoline.c | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 15 deletions(-) create mode 100644 arch/x86/kernel/trampoline.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fdd8395e0ed3..530ed6a4a031 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,6 +28,7 @@ obj-y += alternative.o i8253.o obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o +obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += i387.o obj-y += ptrace.o obj-y += ds.o diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 424600e671bd..e6abe8a49b1f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -140,9 +140,6 @@ static atomic_t init_deasserted; static int boot_cpu_logical_apicid; -/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ -unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); - /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -550,18 +547,6 @@ cpumask_t cpu_coregroup_map(int cpu) return c->llc_shared_map; } -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ -unsigned long setup_trampoline(void) -{ - memcpy(trampoline_base, trampoline_data, - trampoline_end - trampoline_data); - return virt_to_phys(trampoline_base); -} - #ifdef CONFIG_X86_32 /* * We are called very early to get the low memory for the diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c new file mode 100644 index 000000000000..abbf199adebb --- /dev/null +++ b/arch/x86/kernel/trampoline.c @@ -0,0 +1,18 @@ +#include + +#include + +/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ +unsigned long setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, + trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} -- cgit v1.2.3 From 82da3ff89dc2a1842cff9b0d4cbc345cb90b59e1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Apr 2008 20:05:37 +0200 Subject: x86: kgdb support simplified and streamlined kgdb support on x86, both 32-bit and 64-bit, based on patch from: Subject: kgdb: core-lite From: Jason Wessel [ and countless other authors - see the patch for details. ] Signed-off-by: Ingo Molnar Signed-off-by: Jason Wessel Signed-off-by: Jan Kiszka Reviewed-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/kgdb.c | 417 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 418 insertions(+) create mode 100644 arch/x86/kernel/kgdb.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4eb5ce841106..4a4260c7f672 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_MODULES) += module_$(BITS).o obj-$(CONFIG_ACPI_SRAT) += srat_32.o obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c new file mode 100644 index 000000000000..37194d6374d8 --- /dev/null +++ b/arch/x86/kernel/kgdb.c @@ -0,0 +1,417 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (C) 2004 Amit S. Kale + * Copyright (C) 2000-2001 VERITAS Software Corporation. + * Copyright (C) 2002 Andi Kleen, SuSE Labs + * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd. + * Copyright (C) 2007 MontaVista Software, Inc. + * Copyright (C) 2007-2008 Jason Wessel, Wind River Systems, Inc. + */ +/**************************************************************************** + * Contributor: Lake Stevens Instrument Division$ + * Written by: Glenn Engel $ + * Updated by: Amit Kale + * Updated by: Tom Rini + * Updated by: Jason Wessel + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Origianl kgdb, compatibility with 2.1.xx kernel by + * David Grothe + * Integrated into 2.2.5 kernel by Tigran Aivazian + * X86_64 changes from Andi Kleen's patch merged by Jim Houston + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +/* + * Put the error code here just in case the user cares: + */ +static int gdb_x86errcode; + +/* + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific): + */ +static int gdb_x86vector = -1; + +/** + * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs + * @gdb_regs: A pointer to hold the registers in the order GDB wants. + * @regs: The &struct pt_regs of the current process. + * + * Convert the pt_regs in @regs into the format for registers that + * GDB expects, stored in @gdb_regs. + */ +void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[GDB_AX] = regs->ax; + gdb_regs[GDB_BX] = regs->bx; + gdb_regs[GDB_CX] = regs->cx; + gdb_regs[GDB_DX] = regs->dx; + gdb_regs[GDB_SI] = regs->si; + gdb_regs[GDB_DI] = regs->di; + gdb_regs[GDB_BP] = regs->bp; + gdb_regs[GDB_PS] = regs->flags; + gdb_regs[GDB_PC] = regs->ip; +#ifdef CONFIG_X86_32 + gdb_regs[GDB_DS] = regs->ds; + gdb_regs[GDB_ES] = regs->es; + gdb_regs[GDB_CS] = regs->cs; + gdb_regs[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_FS] = 0xFFFF; + gdb_regs[GDB_GS] = 0xFFFF; +#else + gdb_regs[GDB_R8] = regs->r8; + gdb_regs[GDB_R9] = regs->r9; + gdb_regs[GDB_R10] = regs->r10; + gdb_regs[GDB_R11] = regs->r11; + gdb_regs[GDB_R12] = regs->r12; + gdb_regs[GDB_R13] = regs->r13; + gdb_regs[GDB_R14] = regs->r14; + gdb_regs[GDB_R15] = regs->r15; +#endif + gdb_regs[GDB_SP] = regs->sp; +} + +/** + * sleeping_thread_to_gdb_regs - Convert ptrace regs to GDB regs + * @gdb_regs: A pointer to hold the registers in the order GDB wants. + * @p: The &struct task_struct of the desired process. + * + * Convert the register values of the sleeping process in @p to + * the format that GDB expects. + * This function is called when kgdb does not have access to the + * &struct pt_regs and therefore it should fill the gdb registers + * @gdb_regs with what has been saved in &struct thread_struct + * thread field during switch_to. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + gdb_regs[GDB_AX] = 0; + gdb_regs[GDB_BX] = 0; + gdb_regs[GDB_CX] = 0; + gdb_regs[GDB_DX] = 0; + gdb_regs[GDB_SI] = 0; + gdb_regs[GDB_DI] = 0; + gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; +#ifdef CONFIG_X86_32 + gdb_regs[GDB_DS] = __KERNEL_DS; + gdb_regs[GDB_ES] = __KERNEL_DS; + gdb_regs[GDB_PS] = 0; + gdb_regs[GDB_CS] = __KERNEL_CS; + gdb_regs[GDB_PC] = p->thread.ip; + gdb_regs[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_FS] = 0xFFFF; + gdb_regs[GDB_GS] = 0xFFFF; +#else + gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs[GDB_PC] = 0; + gdb_regs[GDB_R8] = 0; + gdb_regs[GDB_R9] = 0; + gdb_regs[GDB_R10] = 0; + gdb_regs[GDB_R11] = 0; + gdb_regs[GDB_R12] = 0; + gdb_regs[GDB_R13] = 0; + gdb_regs[GDB_R14] = 0; + gdb_regs[GDB_R15] = 0; +#endif + gdb_regs[GDB_SP] = p->thread.sp; +} + +/** + * gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs. + * @gdb_regs: A pointer to hold the registers we've received from GDB. + * @regs: A pointer to a &struct pt_regs to hold these values in. + * + * Convert the GDB regs in @gdb_regs into the pt_regs, and store them + * in @regs. + */ +void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->ax = gdb_regs[GDB_AX]; + regs->bx = gdb_regs[GDB_BX]; + regs->cx = gdb_regs[GDB_CX]; + regs->dx = gdb_regs[GDB_DX]; + regs->si = gdb_regs[GDB_SI]; + regs->di = gdb_regs[GDB_DI]; + regs->bp = gdb_regs[GDB_BP]; + regs->flags = gdb_regs[GDB_PS]; + regs->ip = gdb_regs[GDB_PC]; +#ifdef CONFIG_X86_32 + regs->ds = gdb_regs[GDB_DS]; + regs->es = gdb_regs[GDB_ES]; + regs->cs = gdb_regs[GDB_CS]; +#else + regs->r8 = gdb_regs[GDB_R8]; + regs->r9 = gdb_regs[GDB_R9]; + regs->r10 = gdb_regs[GDB_R10]; + regs->r11 = gdb_regs[GDB_R11]; + regs->r12 = gdb_regs[GDB_R12]; + regs->r13 = gdb_regs[GDB_R13]; + regs->r14 = gdb_regs[GDB_R14]; + regs->r15 = gdb_regs[GDB_R15]; +#endif +} + +/** + * kgdb_post_primary_code - Save error vector/code numbers. + * @regs: Original pt_regs. + * @e_vector: Original error vector. + * @err_code: Original error code. + * + * This is needed on architectures which support SMP and KGDB. + * This function is called after all the slave cpus have been put + * to a know spin state and the primary CPU has control over KGDB. + */ +void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) +{ + /* primary processor is completely in the debugger */ + gdb_x86vector = e_vector; + gdb_x86errcode = err_code; +} + +#ifdef CONFIG_SMP +/** + * kgdb_roundup_cpus - Get other CPUs into a holding pattern + * @flags: Current IRQ state + * + * On SMP systems, we need to get the attention of the other CPUs + * and get them be in a known state. This should do what is needed + * to get the other CPUs to call kgdb_wait(). Note that on some arches, + * the NMI approach is not used for rounding up all the CPUs. For example, + * in case of MIPS, smp_call_function() is used to roundup CPUs. In + * this case, we have to make sure that interrupts are enabled before + * calling smp_call_function(). The argument to this function is + * the flags that will be used when restoring the interrupts. There is + * local_irq_save() call before kgdb_roundup_cpus(). + * + * On non-SMP systems, this is not called. + */ +void kgdb_roundup_cpus(unsigned long flags) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif + +/** + * kgdb_arch_handle_exception - Handle architecture specific GDB packets. + * @vector: The error vector of the exception that happened. + * @signo: The signal number of the exception that happened. + * @err_code: The error code of the exception that happened. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + * @regs: The &struct pt_regs of the current process. + * + * This function MUST handle the 'c' and 's' command packets, + * as well packets to set / remove a hardware breakpoint, if used. + * If there are additional packets which the hardware needs to handle, + * they are handled here. The code should return -1 if it wants to + * process more packets, and a %0 or %1 if it wants to exit from the + * kgdb callback. + */ +int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, + char *remcomInBuffer, char *remcomOutBuffer, + struct pt_regs *linux_regs) +{ + unsigned long addr; + char *ptr; + int newPC; + + switch (remcomInBuffer[0]) { + case 'c': + case 's': + /* try to read optional parameter, pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->ip = addr; + newPC = linux_regs->ip; + + /* clear the trace bit */ + linux_regs->flags &= ~TF_MASK; + atomic_set(&kgdb_cpu_doing_single_step, -1); + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') { + linux_regs->flags |= TF_MASK; + kgdb_single_step = 1; + if (kgdb_contthread) { + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } + } + + return 0; + } + + /* this means that we do not want to exit from the handler: */ + return -1; +} + +static inline int +single_step_cont(struct pt_regs *regs, struct die_args *args) +{ + /* + * Single step exception from kernel space to user space so + * eat the exception and continue the process: + */ + printk(KERN_ERR "KGDB: trap/step from kernel to user space, " + "resuming...\n"); + kgdb_arch_handle_exception(args->trapnr, args->signr, + args->err, "c", "", regs); + + return NOTIFY_STOP; +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + switch (cmd) { + case DIE_NMI: + if (atomic_read(&kgdb_active) != -1) { + /* KGDB CPU roundup */ + kgdb_nmicallback(raw_smp_processor_id(), regs); + return NOTIFY_STOP; + } + return NOTIFY_DONE; + + case DIE_NMI_IPI: + if (atomic_read(&kgdb_active) != -1) { + /* KGDB CPU roundup: */ + if (kgdb_nmicallback(raw_smp_processor_id(), regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; + } + return NOTIFY_DONE; + + case DIE_NMIWATCHDOG: + if (atomic_read(&kgdb_active) != -1) { + /* KGDB CPU roundup: */ + kgdb_nmicallback(raw_smp_processor_id(), regs); + return NOTIFY_STOP; + } + /* Enter debugger: */ + break; + + case DIE_DEBUG: + if (atomic_read(&kgdb_cpu_doing_single_step) == + raw_smp_processor_id() && + user_mode(regs)) + return single_step_cont(regs, args); + /* fall through */ + default: + if (user_mode(regs)) + return NOTIFY_DONE; + } + + if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs)) + return NOTIFY_DONE; + + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + + /* + * Lowest-prio notifier priority, we want to be notified last: + */ + .priority = -INT_MAX, +}; + +/** + * kgdb_arch_init - Perform any architecture specific initalization. + * + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + return register_die_notifier(&kgdb_notifier); +} + +/** + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} + +/** + * + * kgdb_skipexception - Bail out of KGDB when we've been triggered. + * @exception: Exception vector number + * @regs: Current &struct pt_regs. + * + * On some architectures we need to skip a breakpoint exception when + * it occurs after a breakpoint has been removed. + * + * Skip an int3 exception when it occurs after a breakpoint has been + * removed. Backtrack eip by 1 since the int3 would have caused it to + * increment by 1. + */ +int kgdb_skipexception(int exception, struct pt_regs *regs) +{ + if (exception == 3 && kgdb_isremovedbreak(regs->ip - 1)) { + regs->ip -= 1; + return 1; + } + return 0; +} + +unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) +{ + if (exception == 3) + return instruction_pointer(regs) - 1; + return instruction_pointer(regs); +} + +struct kgdb_arch arch_kgdb_ops = { + /* Breakpoint instruction: */ + .gdb_bpt_instr = { 0xcc }, +}; -- cgit v1.2.3 From d359752407f8916c29ad53a5c30ac73e338f2797 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 15 Feb 2008 14:55:53 -0600 Subject: kgdb: fix NMI hangs This patch fixes the hang regression with kgdb when the NMI interrupt comes in while the master core is returning from an exception. Adjust the NMI logic such that KGDB will not stop NMI exceptions from occurring by in general returning NOTIFY_DONE. It is not possible to distinguish the debug NMI sync vs the normal NMI apic interrupt so kgdb needs to catch the unknown NMI if it the debugger was previously active on one of the cpus. Signed-off-by: Jason Wessel Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 18 +++++++++++++++--- arch/x86/kernel/traps_32.c | 2 ++ arch/x86/kernel/traps_64.c | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 37194d6374d8..5d7a21119bf8 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -290,6 +291,8 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) return NOTIFY_STOP; } +static int was_in_debug_nmi[NR_CPUS]; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -299,15 +302,24 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ kgdb_nmicallback(raw_smp_processor_id(), regs); + was_in_debug_nmi[raw_smp_processor_id()] = 1; + touch_nmi_watchdog(); return NOTIFY_STOP; } return NOTIFY_DONE; case DIE_NMI_IPI: if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup: */ - if (kgdb_nmicallback(raw_smp_processor_id(), regs)) - return NOTIFY_DONE; + /* KGDB CPU roundup */ + kgdb_nmicallback(raw_smp_processor_id(), regs); + was_in_debug_nmi[raw_smp_processor_id()] = 1; + touch_nmi_watchdog(); + } + return NOTIFY_DONE; + + case DIE_NMIUNKNOWN: + if (was_in_debug_nmi[raw_smp_processor_id()]) { + was_in_debug_nmi[raw_smp_processor_id()] = 0; return NOTIFY_STOP; } return NOTIFY_DONE; diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index b22c01e05a18..c5421f30d678 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -708,6 +708,8 @@ io_check_error(unsigned char reason, struct pt_regs * regs) static __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + return; #ifdef CONFIG_MCA /* Might actually be able to figure out what the guilty party * is. */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 045466681911..055b1650c69d 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -806,6 +806,8 @@ io_check_error(unsigned char reason, struct pt_regs * regs) static __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + return; printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); -- cgit v1.2.3 From 64e9ee3095b61d0300ea548216a57d2536611309 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 15 Feb 2008 14:55:56 -0600 Subject: kgdb: add x86 HW breakpoints Add HW breakpoints into the arch specific portion of x86 kgdb. In the current x86 kernel.org kernels HW breakpoints are changed out in lazy fashion because there is no infrastructure around changing them when changing to a kernel task or entering the kernel mode via a system call. This lazy approach means that if a user process uses HW breakpoints the kgdb will loose out. This is an acceptable trade off because the developer debugging the kernel is assumed to know what is going on system wide and would be aware of this trade off. There is a minor bug fix to the kgdb core so as to correctly call the hw breakpoint functions with a valid value from the enum. There is also a minor change to the x86_64 startup code when using early HW breakpoints. When the debugger is connected, the cpu startup code must not zero out the HW breakpoint registers or you cannot hit the breakpoints you are interested in, in the first place. Signed-off-by: Jason Wessel Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup64.c | 16 ++++++ 2 files changed, 154 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5d7a21119bf8..7d651adcb222 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -182,6 +182,122 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) #endif } +static struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned long addr; +} breakinfo[4]; + +static void kgdb_correct_hw_break(void) +{ + unsigned long dr7; + int correctit = 0; + int breakbit; + int breakno; + + get_debugreg(dr7, 7); + for (breakno = 0; breakno < 4; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= ((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << + ((breakno << 2) + 16); + if (breakno >= 0 && breakno <= 3) + set_debugreg(breakinfo[breakno].addr, breakno); + + } else { + if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + } + if (correctit) + set_debugreg(dr7, 7); +} + +static int +kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + + for (i = 0; i < 4; i++) + if (breakinfo[i].addr == addr && breakinfo[i].enabled) + break; + if (i == 4) + return -1; + + breakinfo[i].enabled = 0; + + return 0; +} + +static void kgdb_remove_all_hw_break(void) +{ + int i; + + for (i = 0; i < 4; i++) + memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); +} + +static int +kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + unsigned type; + int i; + + for (i = 0; i < 4; i++) + if (!breakinfo[i].enabled) + break; + if (i == 4) + return -1; + + switch (bptype) { + case BP_HARDWARE_BREAKPOINT: + type = 0; + len = 1; + break; + case BP_WRITE_WATCHPOINT: + type = 1; + break; + case BP_ACCESS_WATCHPOINT: + type = 3; + break; + default: + return -1; + } + + if (len == 1 || len == 2 || len == 4) + breakinfo[i].len = len - 1; + else + return -1; + + breakinfo[i].enabled = 1; + breakinfo[i].addr = addr; + breakinfo[i].type = type; + + return 0; +} + +/** + * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. + * @regs: Current &struct pt_regs. + * + * This function will be called if the particular architecture must + * disable hardware debugging while it is processing gdb packets or + * handling exception. + */ +void kgdb_disable_hw_debug(struct pt_regs *regs) +{ + /* Disable hardware debugging while we are in kgdb: */ + set_debugreg(0UL, 7); +} + /** * kgdb_post_primary_code - Save error vector/code numbers. * @regs: Original pt_regs. @@ -243,6 +359,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, struct pt_regs *linux_regs) { unsigned long addr; + unsigned long dr6; char *ptr; int newPC; @@ -269,6 +386,22 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, } } + get_debugreg(dr6, 6); + if (!(dr6 & 0x4000)) { + int breakno; + + for (breakno = 0; breakno < 4; breakno++) { + if (dr6 & (1 << breakno) && + breakinfo[breakno].type == 0) { + /* Set restore flag: */ + linux_regs->flags |= X86_EFLAGS_RF; + break; + } + } + } + set_debugreg(0UL, 6); + kgdb_correct_hw_break(); + return 0; } @@ -426,4 +559,9 @@ unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: */ .gdb_bpt_instr = { 0xcc }, + .flags = KGDB_HW_BREAKPOINT, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, }; diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index e24c45677094..143aa78c566b 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -327,6 +328,17 @@ void __cpuinit cpu_init (void) load_TR_desc(); load_LDT(&init_mm.context); +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif /* * Clear all 6 debug registers: */ @@ -337,6 +349,10 @@ void __cpuinit cpu_init (void) set_debugreg(0UL, 3); set_debugreg(0UL, 6); set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif fpu_init(); -- cgit v1.2.3 From 737a460f21febe551ff1d2299b63bae9b154078f Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 7 Mar 2008 16:34:16 -0600 Subject: kgdb: fix several kgdb regressions kgdb core fixes: - Check to see that mm->mmap_cache is not null before calling flush_cache_range(), else on arch=ARM it will cause a fatal fault. - Breakpoints should only be restored if they are in the BP_ACTIVE state. - Fix a typo in comments to "kgdb_register_io_module" x86 kgdb fixes: - Fix the x86 arch handler such that on a kill or detach that the appropriate cleanup on the single stepping flags gets run. - Add in the DIE_NMIWATCHDOG call for x86_64 - Touch the nmi watchdog before returning the system to normal operation after performing any kind of kgdb operation, else the possibility exists to trigger the watchdog. Signed-off-by: Jason Wessel Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 4 ++++ arch/x86/kernel/traps_64.c | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 7d651adcb222..8c7e555f6d39 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -370,6 +370,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->ip = addr; + case 'D': + case 'k': newPC = linux_regs->ip; /* clear the trace bit */ @@ -480,6 +482,8 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs)) return NOTIFY_DONE; + /* Must touch watchdog before return to normal operation */ + touch_nmi_watchdog(); return NOTIFY_STOP; } diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 055b1650c69d..4e073320e70a 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -600,8 +600,13 @@ void die(const char * str, struct pt_regs * regs, long err) void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) { - unsigned long flags = oops_begin(); + unsigned long flags; + + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == + NOTIFY_STOP) + return; + flags = oops_begin(); /* * We are in trouble anyway, lets at least try * to get a message out. -- cgit v1.2.3 From fda31d7d4b5a9c663ac3ed1ba244018a88deecaf Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 18 Apr 2008 09:54:38 -0700 Subject: x86: kgdb build fix TF_MASK is no longer defined, use X86_EFLAGS_TF. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- arch/x86/kernel/kgdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8c7e555f6d39..24362ecf5f9a 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -375,12 +375,12 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, newPC = linux_regs->ip; /* clear the trace bit */ - linux_regs->flags &= ~TF_MASK; + linux_regs->flags &= ~X86_EFLAGS_TF; atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') { - linux_regs->flags |= TF_MASK; + linux_regs->flags |= X86_EFLAGS_TF; kgdb_single_step = 1; if (kgdb_contthread) { atomic_set(&kgdb_cpu_doing_single_step, -- cgit v1.2.3 From 950e4da32426859ee4b37b2c95026d4f1efa5d05 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 26 Feb 2008 09:55:29 -0500 Subject: arch: Remove unnecessary inclusions of asm/semaphore.h None of these files use any of the functionality promised by asm/semaphore.h. It's possible that they rely on it dragging in some unrelated header file, but I can't build all these files, so we'll have fix any build failures as they come up. Signed-off-by: Matthew Wilcox --- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/proc.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d999d7833bc2..35b4f6a9c8ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0978a4a39418..0d0d9057e7c0 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From f8dfd5ed149ae340451f25847b434297c20d4645 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 19 Apr 2008 19:19:54 +0200 Subject: x86: KGDB build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 24362ecf5f9a..f47f0eb886b8 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,11 +46,7 @@ #include #include -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif +#include /* * Put the error code here just in case the user cares: -- cgit v1.2.3 From 4a3575fd436aa98957184afd745e4ada8f1542d8 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Mon, 25 Feb 2008 15:18:37 +0800 Subject: x86: EFI_PAGE_SHIFT fix Make x86 EFI code works when EFI_PAGE_SHIFT != PAGE_SHIFT. The memrage_efi_to_native() provided in this patch can be used on other EFI platform such as IA64 too. This patch has been tested on Intel x86_64 platform with EFI 64/32 firmware. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 18 +++++++++++++----- arch/x86/kernel/efi_64.c | 12 ++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 759e02bec070..77d424cf68b3 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; + u64 addr, npages; /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void) if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; - set_memory_x(md->virt_addr, md->num_pages); + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_x(addr, npages); } } @@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab; + u64 end, systab, addr, npages; void *p, *va; efi.systab = NULL; @@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if ((end >> PAGE_SHIFT) <= max_pfn_mapped) + if (PFN_UP(end) <= max_pfn_mapped) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); @@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) - set_memory_uc(md->virt_addr, md->num_pages); + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d143a1e76b30..d0060fdcccac 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void) void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped; + static unsigned pages_mapped __initdata; unsigned i, pages; + unsigned long offset; - /* phys_addr and size must be page aligned */ - if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) - return NULL; + pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; - pages = size >> PAGE_SHIFT; if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; @@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) } return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)); + (pages_mapped - pages)) + offset; } -- cgit v1.2.3 From 2b8106a0a3d3c1e5b69f091192bc99019ff4d81d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:51:22 -0700 Subject: x86_64: do not reserve ramdisk two times ramdisk is reserved via reserve_early in x86_64_start_kernel, later early_res_to_bootmem() will convert to reservation in bootmem. so don't need to reserve that again. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/setup_64.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d6d54faa84df..993c76773256 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data) reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); +#ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data) unsigned long ramdisk_end = ramdisk_image + ramdisk_size; reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } +#endif reserve_ebda_region(); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 674ef3510cdf..0aa291bff4e0 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -420,11 +420,14 @@ void __init setup_arch(char **cmdline_p) unsigned long end_of_mem = end_pfn << PAGE_SHIFT; if (ramdisk_end <= end_of_mem) { - reserve_bootmem_generic(ramdisk_image, ramdisk_size); + /* + * don't need to reserve again, already reserved early + * in x86_64_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; } else { - /* Assumes everything on node 0 */ free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", -- cgit v1.2.3 From 8ce116e5993cf64729a4d2b3dc2c0f072852654b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:52:16 +0100 Subject: x86: clean up cpu capabilities accesses, p4-clockmod.c Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 14791ec55cfd..199e4e05e5dc 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void) if (c->x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) + if (!test_cpu_cap(c, X86_FEATURE_ACPI) || + !test_cpu_cap(c, X86_FEATURE_ACC)) return -ENODEV; ret = cpufreq_register_driver(&p4clockmod_driver); -- cgit v1.2.3 From cf9b111c170733dde39139e8989b676ec8b81573 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 8 Mar 2008 18:15:06 +0800 Subject: x86: remove pointless comments Remove old comments that include the old arch/i386 directory. Signed-off-by: WANG Cong Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/cstate.c | 2 -- arch/x86/kernel/acpi/processor.c | 2 -- arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 - arch/x86/kernel/entry_32.S | 1 - arch/x86/kernel/head_32.S | 1 - 5 files changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8ca3557a6d59..9366fb68d8d8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/cstate.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for SMP C-states on Intel CPUs diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 324eb0cab19c..de2d2e4ebad9 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/processor.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for platforms with Intel CPUs diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 9b7e01daa1ca..1f4cc48c14c6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9ba49a26dff8..f0f8934fc303 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/entry.S * * Copyright (C) 1991, 1992 Linus Torvalds */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 826988a6e964..90f038af3adc 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. * * Copyright (C) 1991, 1992 Linus Torvalds * -- cgit v1.2.3 From 120d5bf128906c790df810e159d2e1239d08fef1 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Wed, 9 Apr 2008 22:53:50 +0200 Subject: x86: remove vm86.h inclusion from process_32.c I've made a small investigation about vm86.h inclusion rules and it looks like everything is more or less ok. Files that rely on asm/vm86.h symbols are: - kprobes.c - process_32.c - signal_32.c - traps_32.c - vm86_32.c File process_32.c includes vm86.h explicitly. We can remove that include and it won't break anything. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3903a8f2eb97..91e147b486dd 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -45,7 +45,6 @@ #include #include #include -#include #ifdef CONFIG_MATH_EMULATION #include #endif -- cgit v1.2.3 From 1a7a34af78923f8807d054a15133a8fcf47e385e Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Thu, 10 Apr 2008 13:40:57 +0200 Subject: x86: e820_64, fix section mismatch warning fix section mismatch warnings which occurs on my x86_64 box while compiling linux-next-20080410: Warning messages: WARNING: arch/x86/kernel/built-in.o(.text+0x7bc2): Section mismatch in reference from the function bad_addr() to the variable .init.data:early_res The function bad_addr() references the variable __initdata early_res. This is often because bad_addr lacks a __initdata annotation or the annotation of early_res is wrong. WARNING: arch/x86/kernel/built-in.o(.text+0x7c3b): Section mismatch in reference from the function bad_addr_size() to the variable .init.data:early_res The function bad_addr_size() references the variable __initdata early_res. This is often because bad_addr_size lacks a __initdata annotation or the annotation of early_res is wrong. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 7f6c0c85c8f6..cbd42e51cb08 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void) } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) { int i; @@ -116,7 +116,7 @@ again: } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) { int i; -- cgit v1.2.3 From f5a1b191b37ac2609e2babeec1b21f411da93e4d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 12 Apr 2008 10:28:25 +0200 Subject: x86: fix exec mappings comments - noexec32 is on by default for years already - add noexec32 to kernel-parameters and fix noexec typo in there Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 9042fb0e36f5..aee0e8200777 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -74,8 +74,8 @@ int force_personality32 = 0; Control non executable heap for 32bit processes. To control the stack too use noexec=off -on PROT_READ does not imply PROT_EXEC for 32bit processes -off PROT_READ implies PROT_EXEC (default) +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC */ static int __init nonx32_setup(char *str) { -- cgit v1.2.3 From 4bd01600b214275a80a69b44393d7e81d43c2faa Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 19 Feb 2008 11:02:30 +0100 Subject: x86: clean up =0 initializations in arch/x86/kernel/tsc_32.c Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 3d7e6e9fa6c2..06af8cf8251f 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); * if the CPU frequency is scaled, TSC-based delays will need a different * loops_per_jiffy value to function properly. */ -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long cpu_khz_ref; static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) @@ -283,7 +283,7 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz = 0; +static unsigned long current_tsc_khz; static cycle_t read_tsc(void) { -- cgit v1.2.3 From 5deb45e39b946901ae028ccd3a1d0b35fa387475 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 19 Apr 2008 19:19:55 +0200 Subject: ftrace: add notrace annotations for NMI routines This annotates NMI functions with notrace. Some tracers may be able to live with this, but some cannot. The safest is to turn it off, it's not particularly interesting anyway. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi_32.c | 3 ++- arch/x86/kernel/nmi_64.c | 6 ++++-- arch/x86/kernel/traps_32.c | 12 ++++++------ arch/x86/kernel/traps_64.c | 11 ++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 8421d0ac6f22..11b14bbaa61e 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { /* diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 11f9130ac513..5a29ded994fa 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -313,7 +313,8 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { int sum; int touched = 0; @@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) static unsigned ignore_nmis; -asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); add_pda(__nmi_count,1); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 65791ca2824a..dc4273010f2a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -681,7 +681,7 @@ gp_in_kernel: } } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { printk(KERN_EMERG @@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) clear_mem_error(reason); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs *regs) +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; @@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) static int ignore_nmis; -__kprobes void do_nmi(struct pt_regs *regs, long error_code) +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 79aa6fc0815c..6d883b13ef4f 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err) oops_end(flags, regs, SIGSEGV); } -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) { unsigned long flags; @@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, die("general protection fault", regs, error_code); } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", @@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs * regs) { printk("NMI: IOCK error (debug interrupt?)\n"); @@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) /* Runs on IST stack. This code must keep interrupts off all the time. Nested NMIs are prevented by the CPU. */ -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; -- cgit v1.2.3 From 529e25f646e08901a6dad5768f681efffd77225e Mon Sep 17 00:00:00 2001 From: Erik Bosman Date: Mon, 14 Apr 2008 00:24:18 +0200 Subject: x86: implement prctl PR_GET_TSC and PR_SET_TSC This patch implements the PR_GET_TSC and PR_SET_TSC prctl() commands on the x86 platform (both 32 and 64 bit.) These commands control the ability to read the timestamp counter from userspace (the RDTSC instruction.) While the RDTSC instuction is a useful profiling tool, it is also the source of some non-determinism in ring-3. For deterministic replay applications it is useful to be able to trap and emulate (and record the outcome of) this instruction. This patch uses code earlier used to disable the timestamp counter for the SECCOMP framework. A side-effect of this patch is that the SECCOMP environment will now also disable the timestamp counter on x86_64 due to the addition of the TIF_NOTSC define on this platform. The code which enables/disables the RDTSC instruction during context switches is in the __switch_to_xtra function, which already handles other unusual conditions, so normal performance should not have to suffer from this change. Signed-off-by: Erik Bosman Acked-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 43 +++++++++++++++++++++++++--- arch/x86/kernel/process_64.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 91e147b486dd..a3790a3f8a83 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -523,11 +524,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); -#ifdef CONFIG_SECCOMP static void hard_disable_TSC(void) { write_cr4(read_cr4() | X86_CR4_TSD); } + void disable_TSC(void) { preempt_disable(); @@ -539,11 +540,47 @@ void disable_TSC(void) hard_disable_TSC(); preempt_enable(); } + static void hard_enable_TSC(void) { write_cr4(read_cr4() & ~X86_CR4_TSD); } -#endif /* CONFIG_SECCOMP */ + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, @@ -577,7 +614,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_debugreg(next->debugreg7, 7); } -#ifdef CONFIG_SECCOMP if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ @@ -586,7 +622,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else hard_enable_TSC(); } -#endif #ifdef X86_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e75ccc8a2b87..4c13b1406c70 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -535,6 +536,64 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +static void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} + +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} + +static void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} + /* * This special macro can be used to load a debugging register */ @@ -572,6 +631,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, loaddebug(next, 7); } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Copy the relevant range of the IO bitmap. -- cgit v1.2.3 From d8bb6f4c1670c8324e4135c61ef07486f7f17379 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Apr 2008 19:45:18 +0200 Subject: x86: tsc prevent time going backwards We already catch most of the TSC problems by sanity checks, but there is a subtle bug which has been in the code forever. This can cause time jumps in the range of hours. This was reported in: http://lkml.org/lkml/2007/8/23/96 and http://lkml.org/lkml/2008/3/31/23 I was able to reproduce the problem with a gettimeofday loop test on a dual core and a quad core machine which both have sychronized TSCs. The TSCs seems not to be perfectly in sync though, but the kernel is not able to detect the slight delta in the sync check. Still there exists an extremly small window where this delta can be observed with a real big time jump. So far I was only able to reproduce this with the vsyscall gettimeofday implementation, but in theory this might be observable with the syscall based version as well. CPU 0 updates the clock source variables under xtime/vyscall lock and CPU1, where the TSC is slighty behind CPU0, is reading the time right after the seqlock was unlocked. The clocksource reference data was updated with the TSC from CPU0 and the value which is read from TSC on CPU1 is less than the reference data. This results in a huge delta value due to the unsigned subtraction of the TSC value and the reference value. This algorithm can not be changed due to the support of wrapping clock sources like pm timer. The huge delta is converted to nanoseconds and added to xtime, which is then observable by the caller. The next gettimeofday call on CPU1 will show the correct time again as now the TSC has advanced above the reference value. To prevent this TSC specific wreckage we need to compare the TSC value against the reference value and return the latter when it is larger than the actual TSC value. I pondered to mark the TSC unstable when the readout is smaller than the reference value, but this would render an otherwise good and fast clocksource unusable without a real good reason. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_32.c | 15 ++++++++++++++- arch/x86/kernel/tsc_64.c | 23 ++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 06af8cf8251f..e4790728b224 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -284,14 +284,27 @@ core_initcall(cpufreq_tsc); /* clock source code */ static unsigned long current_tsc_khz; +static struct clocksource clocksource_tsc; +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp issue. This can be observed in + * a very small window right after one CPU updated cycle_last under + * xtime lock and the other CPU reads a TSC value which is smaller + * than the cycle_last reference value due to a TSC which is slighty + * behind. This delta is nowhere else observable, but in that case it + * results in a forward time jump in the range of hours due to the + * unsigned delta calculation of the time keeping core code, which is + * necessary to support wrapping clocksources like pm timer. + */ static cycle_t read_tsc(void) { cycle_t ret; rdtscll(ret); - return ret; + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static struct clocksource clocksource_tsc = { diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index ceeba01e7f47..fcc16e58609e 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -11,6 +11,7 @@ #include #include #include +#include static int notsc __initdata = 0; @@ -287,18 +288,34 @@ int __init notsc_setup(char *s) __setup("notsc", notsc_setup); +static struct clocksource clocksource_tsc; -/* clock source code: */ +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ static cycle_t read_tsc(void) { cycle_t ret = (cycle_t)get_cycles(); - return ret; + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); - return ret; + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; } static struct clocksource clocksource_tsc = { -- cgit v1.2.3 From fa5c4639419668cbb18ca3d20c1253559a3b43ae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 16 Apr 2008 02:29:42 +0200 Subject: x86: rename find_max_pfn() to propagate_e820_map() this function doesnt just 'find' the max_pfn - it also has other side-effects such as registering sparse memory maps. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_32.c | 4 ++-- arch/x86/kernel/setup_32.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 0240cd778365..ed733e7cf4e6 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map) /* * Find the highest page frame number we have available */ -void __init find_max_pfn(void) +void __init propagate_e820_map(void) { int i; @@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg) * size before original memory map is * reset. */ - find_max_pfn(); + propagate_e820_map(); saved_max_pfn = max_pfn; #endif e820.nr_map = 0; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5b0bffb7fcc9..1c4799e68718 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -812,10 +812,10 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* update e820 for memory not covered by WB MTRRs */ - find_max_pfn(); + propagate_e820_map(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) - find_max_pfn(); + propagate_e820_map(); max_low_pfn = setup_memory(); -- cgit v1.2.3 From 61c4628b538608c1a85211ed8438136adfeb9a95 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 10 Mar 2008 15:28:04 -0700 Subject: x86, fpu: split FPU state from task struct - v5 Split the FPU save area from the task struct. This allows easy migration of FPU context, and it's generally cleaner. It also allows the following two optimizations: 1) only allocate when the application actually uses FPU, so in the first lazy FPU trap. This could save memory for non-fpu using apps. Next patch does this lazy allocation. 2) allocate the right size for the actual cpu rather than 512 bytes always. Patches enabling xsave/xrstor support (coming shortly) will take advantage of this. Signed-off-by: Suresh Siddha Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/i387.c | 80 +++++++++++++++++++++++++------------------- arch/x86/kernel/process.c | 35 +++++++++++++++++++ arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/traps_32.c | 6 +--- arch/x86/kernel/traps_64.c | 6 +++- 7 files changed, 90 insertions(+), 42 deletions(-) create mode 100644 arch/x86/kernel/process.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c3920ea8ac56..7a2a2e93e84b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-y += process.o obj-y += i387.o obj-y += ptrace.o obj-y += ds.o diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 8f8102d967b3..baf632b221d4 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -35,17 +36,18 @@ #endif static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int xstate_size; +static struct i387_fxsave_struct fx_scratch __cpuinitdata; -void mxcsr_feature_mask_init(void) +void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; clts(); if (cpu_has_fxsr) { - memset(¤t->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); - mask = current->thread.i387.fxsave.mxcsr_mask; + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); + asm volatile("fxsave %0" : : "m" (fx_scratch)); + mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; } @@ -53,6 +55,17 @@ void mxcsr_feature_mask_init(void) stts(); } +void __init init_thread_xstate(void) +{ + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); +#ifdef CONFIG_X86_32 + else + xstate_size = sizeof(struct i387_fsave_struct); +#endif + init_task.thread.xstate = alloc_bootmem(xstate_size); +} + #ifdef CONFIG_X86_64 /* * Called at bootup to set up the initial FPU state that is later cloned @@ -61,10 +74,6 @@ void mxcsr_feature_mask_init(void) void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); - extern void __bad_fxsave_alignment(void); - - if (offsetof(struct task_struct, thread.i387.fxsave) & 15) - __bad_fxsave_alignment(); set_in_cr4(X86_CR4_OSFXSR); set_in_cr4(X86_CR4_OSXMMEXCPT); @@ -93,18 +102,19 @@ void init_fpu(struct task_struct *tsk) } if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; + fx->mxcsr = MXCSR_DEFAULT; } else { - memset(&tsk->thread.i387.fsave, 0, - sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; + memset(fp, 0, xstate_size); + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; } /* * Only the device not available exception or ptrace can call init_fpu. @@ -132,7 +142,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, init_fpu(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -148,12 +158,12 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; return ret; } @@ -233,7 +243,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) static void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -273,7 +283,7 @@ static void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -310,7 +320,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, + -1); } if (kbuf && pos == 0 && count == sizeof(env)) { @@ -338,7 +349,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, -1); } if (pos > 0 || count < sizeof(env)) @@ -358,11 +369,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; unlazy_fpu(tsk); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if (__copy_to_user(buf, &tsk->thread.i387.fsave, - sizeof(struct i387_fsave_struct))) + fp->status = fp->swd; + if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; return 1; } @@ -370,6 +381,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; struct user_i387_ia32_struct env; int err = 0; @@ -379,12 +391,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (__copy_to_user(buf, &env, sizeof(env))) return -1; - err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); + err |= __put_user(fx->swd, &buf->status); err |= __put_user(X86_FXSR_MAGIC, &buf->magic); if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + if (__copy_to_user(&buf->_fxsr_env[0], fx, sizeof(struct i387_fxsave_struct))) return -1; return 1; @@ -417,7 +429,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; clear_fpu(tsk); - return __copy_from_user(&tsk->thread.i387.fsave, buf, + return __copy_from_user(&tsk->thread.xstate->fsave, buf, sizeof(struct i387_fsave_struct)); } @@ -428,10 +440,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int err; clear_fpu(tsk); - err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct)); /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) return 1; convert_to_fxsr(tsk, &env); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c new file mode 100644 index 000000000000..ead24efbcba0 --- /dev/null +++ b/arch/x86/kernel/process.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *task_xstate_cachep; + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + *dst = *src; + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + return 0; +} + +void free_thread_info(struct thread_info *ti) +{ + kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); + ti->task->thread.xstate = NULL; + + free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); +} + +void arch_task_cache_init(void) +{ + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC, NULL); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a3790a3f8a83..3890a5dd25f9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -703,7 +703,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c13b1406c70..b795e831afd6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -682,7 +682,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter>5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0, LDT and the page table pointer: diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index dc4273010f2a..8d136a73ce8e 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1208,11 +1208,6 @@ void __init trap_init(void) #endif set_trap_gate(19, &simd_coprocessor_error); - /* - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. - * Generate a build-time error if the alignment is wrong. - */ - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); if (cpu_has_fxsr) { printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); @@ -1233,6 +1228,7 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); + init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 6d883b13ef4f..dc0cb497eec3 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1128,7 +1128,7 @@ asmlinkage void math_state_restore(void) if (!used_math()) init_fpu(me); - restore_fpu_checking(&me->thread.i387.fxsave); + restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } @@ -1163,6 +1163,10 @@ void __init trap_init(void) set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif + /* + * initialize the per thread extended state: + */ + init_thread_xstate(); /* * Should be a barrier for any external CPU state. */ -- cgit v1.2.3 From aa283f49276e7d840a40fb01eee6de97eaa7e012 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 10 Mar 2008 15:28:05 -0700 Subject: x86, fpu: lazy allocation of FPU area - v5 Only allocate the FPU area when the application actually uses FPU, i.e., in the first lazy FPU trap. This could save memory for non-fpu using apps. for example: on my system after boot, there are around 300 processes, with only 17 using FPU. Signed-off-by: Suresh Siddha Cc: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i387.c | 38 ++++++++++++++++++++++++++++++-------- arch/x86/kernel/process.c | 28 +++++++++++++++++++--------- arch/x86/kernel/process_32.c | 4 ++++ arch/x86/kernel/process_64.c | 4 ++++ arch/x86/kernel/traps_32.c | 17 +++++++++++++++-- arch/x86/kernel/traps_64.c | 19 ++++++++++++++++--- 6 files changed, 88 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index baf632b221d4..db6839b53195 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -63,7 +62,6 @@ void __init init_thread_xstate(void) else xstate_size = sizeof(struct i387_fsave_struct); #endif - init_task.thread.xstate = alloc_bootmem(xstate_size); } #ifdef CONFIG_X86_64 @@ -93,12 +91,22 @@ void __cpuinit fpu_init(void) * value at reset if we support XMM instructions and then * remeber the current task has used the FPU. */ -void init_fpu(struct task_struct *tsk) +int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { if (tsk == current) unlazy_fpu(tsk); - return; + return 0; + } + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + if (!tsk->thread.xstate) { + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!tsk->thread.xstate) + return -ENOMEM; } if (cpu_has_fxsr) { @@ -120,6 +128,7 @@ void init_fpu(struct task_struct *tsk) * Only the device not available exception or ptrace can call init_fpu. */ set_stopped_child_used_math(tsk); + return 0; } int fpregs_active(struct task_struct *target, const struct user_regset *regset) @@ -136,10 +145,14 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + int ret; + if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fxsave, 0, -1); @@ -154,7 +167,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -312,11 +328,14 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { struct user_i387_ia32_struct env; + int ret; if (!HAVE_HWFP) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -344,7 +363,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); if (!cpu_has_fxsr) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ead24efbcba0..0e613e7e7b5e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -5,24 +5,34 @@ #include #include -static struct kmem_cache *task_xstate_cachep; +struct kmem_cache *task_xstate_cachep; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!dst->thread.xstate) - return -ENOMEM; - WARN_ON((unsigned long)dst->thread.xstate & 15); - memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + if (src->thread.xstate) { + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + } return 0; } -void free_thread_info(struct thread_info *ti) +void free_thread_xstate(struct task_struct *tsk) { - kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); - ti->task->thread.xstate = NULL; + if (tsk->thread.xstate) { + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); + tsk->thread.xstate = NULL; + } +} + +void free_thread_info(struct thread_info *ti) +{ + free_thread_xstate(ti->task); free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3890a5dd25f9..7adad088e373 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -521,6 +521,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b795e831afd6..891af1a1b48a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -533,6 +533,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ss = __USER_DS; regs->flags = 0x200; set_fs(USER_DS); + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8d136a73ce8e..471e694d6713 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; + if (!tsk_used_math(tsk)) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + clts(); /* Allow maths ops (or we recurse) */ - if (!tsk_used_math(tsk)) - init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index dc0cb497eec3..adff76ea97c4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1124,10 +1124,23 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) asmlinkage void math_state_restore(void) { struct task_struct *me = current; - clts(); /* Allow maths ops (or we recurse) */ - if (!used_math()) - init_fpu(me); + if (!used_math()) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(me)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + + clts(); /* Allow maths ops (or we recurse) */ restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; -- cgit v1.2.3 From 1679f2710ac58df580d3716fab1f42ae50a226eb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 16 Apr 2008 10:27:53 +0200 Subject: x86: fpu xstate split cleanup Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0e613e7e7b5e..3004d716539d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -29,11 +29,10 @@ void free_thread_xstate(struct task_struct *tsk) } } - void free_thread_info(struct thread_info *ti) { free_thread_xstate(ti->task); - free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); + free_pages((unsigned long)ti, get_order(THREAD_SIZE)); } void arch_task_cache_init(void) -- cgit v1.2.3 From 752bea4abbff5e3ffef36802b860e80d0b632990 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 7 Mar 2008 15:02:50 -0800 Subject: x86: reserve dma32 early for gart a system with 256 GB of RAM, when NUMA is disabled crashes the following way: Your BIOS doesn't leave a aperture memory hole Please enable the IOMMU option in the BIOS setup This costs you 64 MB of RAM Cannot allocate aperture memory hole (ffff8101c0000000,65536K) Kernel panic - not syncing: Not enough memory for aperture Pid: 0, comm: swapper Not tainted 2.6.25-rc4-x86-latest.git #33 Call Trace: [] panic+0xb2/0x190 [] ? release_console_sem+0x7c/0x250 [] ? __alloc_bootmem_nopanic+0x48/0x90 [] ? free_bootmem+0x29/0x50 [] gart_iommu_hole_init+0x5e7/0x680 [] ? alloc_large_system_hash+0x16b/0x310 [] ? _etext+0x0/0x1 [] pci_iommu_alloc+0x1c/0x40 [] mem_init+0x45/0x1a0 [] start_kernel+0x295/0x380 [] _sinittext+0x1c2/0x230 the root cause is : memmap PMD is too big, [ffffe200e0600000-ffffe200e07fffff] PMD ->ffff81383c000000 on node 0 almost near 4G..., and vmemmap_alloc_block will use up the ram under 4G. solution will be: 1. make memmap allocation get memory above 4G... 2. reserve some dma32 range early before we try to set up memmap for all. and release that before pci_iommu_alloc, so gart or swiotlb could get some range under 4g limit for sure. the patch is using method 2. because method1 may need more code to handle SPARSEMEM and SPASEMEM_VMEMMAP will get Your BIOS doesn't leave a aperture memory hole Please enable the IOMMU option in the BIOS setup This costs you 64 MB of RAM Mapping aperture over 65536 KB of RAM @ 4000000 Memory: 264245736k/268959744k available (8484k kernel code, 4187464k reserved, 4004k data, 724k init) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_64.c | 2 ++ 2 files changed, 51 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index ada5a0604992..e4fffaabe53b 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -286,8 +288,55 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); +static __initdata void *dma32_bootmem_ptr; +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); + +static int __init parse_dma32_size_opt(char *p) +{ + if (!p) + return -EINVAL; + dma32_bootmem_size = memparse(p, &p); + return 0; +} +early_param("dma32_size", parse_dma32_size_opt); + +void __init dma32_reserve_bootmem(void) +{ + unsigned long size, align; + if (end_pfn <= MAX_DMA32_PFN) + return; + + align = 64ULL<<20; + size = round_up(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + __pa(MAX_DMA_ADDRESS)); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else + dma32_bootmem_size = 0; +} +static void __init dma32_free_bootmem(void) +{ + int node; + + if (end_pfn <= MAX_DMA32_PFN) + return; + + if (!dma32_bootmem_ptr) + return; + + for_each_online_node(node) + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), + dma32_bootmem_size); + + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; +} + void __init pci_iommu_alloc(void) { + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); /* * The order of these functions is important for * fall-back/fail-over reasons diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 0aa291bff4e0..6b8e11f0c15d 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -398,6 +398,8 @@ void __init setup_arch(char **cmdline_p) early_res_to_bootmem(); + dma32_reserve_bootmem(); + #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. -- cgit v1.2.3 From 22456b97148be300e25e9cb97244656775972475 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:21 -0300 Subject: x86: implement dma_map_single through dma_ops That's already the name of the game for x86_64. For i386, we add a pci-base_32.c, that will hold the default operations. The function call itself goes through dma-mapping.h , the common header Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/pci-base_32.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 arch/x86/kernel/pci-base_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7a2a2e93e84b..edd5c54ffde9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,6 +26,7 @@ obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o obj-y += quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o +obj-$(CONFIG_X86_32) += pci-base_32.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c new file mode 100644 index 000000000000..b613d735f76c --- /dev/null +++ b/arch/x86/kernel/pci-base_32.c @@ -0,0 +1,20 @@ +#include +#include +#include +#include +#include + +static dma_addr_t pci32_map_single(struct device *dev, void *ptr, + size_t size, int direction) +{ + WARN_ON(size == 0); + flush_write_buffers(); + return virt_to_phys(ptr); +} + +static const struct dma_mapping_ops pci32_dma_ops = { + .map_single = pci32_map_single, +}; + +const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; +EXPORT_SYMBOL(dma_ops); -- cgit v1.2.3 From 0cb0ae68323657663e4e8c0c1ce82a5af6621bbb Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:22 -0300 Subject: x86: move dma_unmap_single to common header i386 base does not need it, so it gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index b613d735f76c..a8a7c7f2d23f 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -14,6 +14,7 @@ static dma_addr_t pci32_map_single(struct device *dev, void *ptr, static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, + .unmap_single = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 16a3ce9bae667178f79a4951fc0ba8b515b5b733 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:23 -0300 Subject: x86: move dma_map_sg to common header the old i386 implementation is moved to pci-base_32.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index a8a7c7f2d23f..24741525901c 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -12,9 +12,28 @@ static dma_addr_t pci32_map_single(struct device *dev, void *ptr, return virt_to_phys(ptr); } +static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, + int nents, int direction) +{ + struct scatterlist *sg; + int i; + + WARN_ON(nents == 0 || sglist[0].length == 0); + + for_each_sg(sglist, sg, nents, i) { + BUG_ON(!sg_page(sg)); + + sg->dma_address = sg_phys(sg); + } + + flush_write_buffers(); + return nents; +} + static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, + .map_sg = pci32_dma_map_sg, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 72c784f82c378df1903676acd2efc5eeb5cac579 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:24 -0300 Subject: x86: move dma_unmap_sg to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 24741525901c..920530438d8c 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -34,6 +34,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, .map_sg = pci32_dma_map_sg, + .unmap_sg = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From c01dd8cf7d19b869af1668c80a34a955c871f607 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:25 -0300 Subject: x86: move dma_sync_single_for_cpu to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 920530438d8c..dce03c81bb25 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -35,6 +35,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .unmap_single = NULL, .map_sg = pci32_dma_map_sg, .unmap_sg = NULL, + .sync_single_for_cpu = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 9231b269e09ed60910c159cf668f887623b7ac58 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:26 -0300 Subject: x86: move dma_sync_single_for_device to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index dce03c81bb25..36488245e361 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -36,6 +36,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .map_sg = pci32_dma_map_sg, .unmap_sg = NULL, .sync_single_for_cpu = NULL, + .sync_single_for_device = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 627610fcb70164991ed0d11110a56c43b15b9312 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:27 -0300 Subject: x86: move dma_sync_single_range_for_cpu to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 36488245e361..c501599a4506 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -37,6 +37,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .unmap_sg = NULL, .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, + .sync_single_range_for_cpu = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 713623326c816b145105769f174ec237815e53f1 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:28 -0300 Subject: x86: move dma_sync_single_range_for_device to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index c501599a4506..4512c307b60c 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -38,6 +38,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, .sync_single_range_for_cpu = NULL, + .sync_single_range_for_device = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From ed435dee9cb470082e4550edbfcbc7e81132e976 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:29 -0300 Subject: x86: move dma_sync_sg_for_cpu to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 4512c307b60c..d876600aaeb6 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -39,6 +39,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_for_device = NULL, .sync_single_range_for_cpu = NULL, .sync_single_range_for_device = NULL, + .sync_sg_for_cpu = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From e7f3a913f91b7bfef3a93dff27930f24bdfcd2c0 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:30 -0300 Subject: x86: move dma_sync_sg_for_device to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index d876600aaeb6..033d94ec5000 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -40,6 +40,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_range_for_cpu = NULL, .sync_single_range_for_device = NULL, .sync_sg_for_cpu = NULL, + .sync_sg_for_device = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 2be621498d461b63ca6124f86e3b9582e1a8e722 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 19 Apr 2008 19:19:56 +0200 Subject: x86: dma-ops on highmem fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 4 ++-- arch/x86/kernel/pci-calgary_64.c | 3 ++- arch/x86/kernel/pci-dma_64.c | 2 +- arch/x86/kernel/pci-gart_64.c | 15 +++++++-------- arch/x86/kernel/pci-nommu_64.c | 4 ++-- arch/x86/kernel/pci-swiotlb_64.c | 9 ++++++++- 6 files changed, 22 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 033d94ec5000..cf4bb28dfc6a 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -4,12 +4,12 @@ #include #include -static dma_addr_t pci32_map_single(struct device *dev, void *ptr, +static dma_addr_t pci32_map_single(struct device *dev, phys_addr_t ptr, size_t size, int direction) { WARN_ON(size == 0); flush_write_buffers(); - return virt_to_phys(ptr); + return ptr; } static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1b5464c2434f..adb91e4b62da 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -470,10 +470,11 @@ error: return 0; } -static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, +static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction) { dma_addr_t dma_handle = bad_dma_address; + void *vaddr = phys_to_virt(paddr); unsigned long uaddr; unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index e4fffaabe53b..f97a08d0a8f9 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -141,7 +141,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, memory, + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 700e4647dd30..c07455d1695f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, } static dma_addr_t -gart_map_simple(struct device *dev, char *buf, size_t size, int dir) +gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + dma_addr_t map = dma_map_area(dev, paddr, size, dir); flush_gart(); @@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir) /* Map a single area into the IOMMU */ static dma_addr_t -gart_map_single(struct device *dev, void *addr, size_t size, int dir) +gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - unsigned long phys_mem, bus; + unsigned long bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); - if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + if (!need_iommu(dev, paddr, size)) + return paddr; - bus = gart_map_simple(dev, addr, size, dir); + bus = gart_map_simple(dev, paddr, size, dir); return bus; } diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index ab08e1832228..6e330769d017 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -26,10 +26,10 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) } static dma_addr_t -nommu_map_single(struct device *hwdev, void *ptr, size_t size, +nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int direction) { - dma_addr_t bus = virt_to_bus(ptr); + dma_addr_t bus = paddr; if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; return bus; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82a0a674a003..490da7f4b8d0 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -11,11 +11,18 @@ int swiotlb __read_mostly; +static dma_addr_t +swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, + int direction) +{ + return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); +} + const struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, + .map_single = swiotlb_map_single_phys, .unmap_single = swiotlb_unmap_single, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, -- cgit v1.2.3 From 802c1f6648aeb3eea670b4ef8b10014169b65699 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:34 -0300 Subject: x86: move dma_supported and dma_set_mask to pci-dma_32.c This is the way x86_64 does, so this make them equal. They have to be extern now in the header, and the extern definition is moved to the common dma-mapping.h header. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 51330321a5d3..453b4bda2714 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -156,6 +156,39 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); int forbid_dac; EXPORT_SYMBOL(forbid_dac); +int +dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < 0x00ffffff) + return 0; + + /* Work around chipset bugs */ + if (forbid_dac > 0 && mask > 0xffffffffULL) + return 0; + + if (dma_ops->dma_supported) + return dma_ops->dma_supported(dev, mask); + + return 1; +} + +int +dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} + + static __devinit void via_no_dac(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { -- cgit v1.2.3 From 7c18341665917b493fa40eeb3c7ff6c1a5ac47db Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:36 -0300 Subject: x86: provide a bad_dma_address symbol for i386 It's initially 0, since we don't expect any DMA there. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 453b4bda2714..55ab3c874d8f 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -14,6 +14,10 @@ #include #include +/* For i386, we make it point to the NULL address */ +dma_addr_t bad_dma_address __read_mostly = 0x0; +EXPORT_SYMBOL(bad_dma_address); + struct dma_coherent_mem { void *virt_base; u32 device_base; -- cgit v1.2.3 From c786df08f6df2833e34e78cee5ef62558e3b5346 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:37 -0300 Subject: x86: unify dma_mapping_error We provide a map_error function in pci-base_32.c to make sure i386 keeps with the same behaviour it used to. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index cf4bb28dfc6a..7caf5c211f23 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -30,6 +30,12 @@ static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, return nents; } +/* Make sure we keep the same behaviour */ +static int pci32_map_error(dma_addr_t dma_addr) +{ + return 0; +} + static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, @@ -41,6 +47,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_range_for_device = NULL, .sync_sg_for_cpu = NULL, .sync_sg_for_device = NULL, + .mapping_error = pci32_map_error, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 19e395afb44746ce7422a9eabcf883d5eec2bb80 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 27 Mar 2008 11:03:15 +0000 Subject: x86: move dma_supported and dma_set_mask to pci-dma_32.c, fix ERROR: "dma_supported" [drivers/ssb/ssb.ko] undefined! ERROR: "dma_set_mask" [drivers/scsi/qla2xxx/qla2xxx.ko] undefined! ERROR: "dma_set_mask" [drivers/scsi/aic7xxx/aic7xxx.ko] undefined! ERROR: "dma_set_mask" [drivers/scsi/aic7xxx/aic79xx.ko] undefined! ERROR: "dma_supported" [drivers/net/pcnet32.ko] undefined! ERROR: "dma_supported" [drivers/media/video/saa7134/saa7134.ko] undefined! ERROR: "dma_set_mask" [drivers/media/video/meye.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx88/cx8802.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx88/cx8800.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx88/cx88-alsa.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx23885/cx23885.ko] undefined! They just need to be exported like on x86_64. dma_supported() and dma_set_mask() were previously inlined, but are now moved to pci-dma_32.c. Since they're used by various drivers, they need to be exported. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 55ab3c874d8f..be6b1f6aa1a7 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -180,6 +180,7 @@ dma_supported(struct device *dev, u64 mask) return 1; } +EXPORT_SYMBOL(dma_supported); int dma_set_mask(struct device *dev, u64 mask) @@ -191,6 +192,7 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +EXPORT_SYMBOL(dma_set_mask); static __devinit void via_no_dac(struct pci_dev *dev) -- cgit v1.2.3 From 459121c9ec1e6c5d701f6520f4170719ac008951 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:43 -0300 Subject: x86: introduce pci-dma.c This patch introduces pci-dma.c, a common file for pci dma between i386 and x86_64. As a start, dma_set_mask() is the same between architectures, and is placed there. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/pci-dma.c | 14 ++++++++++++++ arch/x86/kernel/pci-dma_32.c | 12 ------------ arch/x86/kernel/pci-dma_64.c | 9 --------- 4 files changed, 15 insertions(+), 22 deletions(-) create mode 100644 arch/x86/kernel/pci-dma.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index edd5c54ffde9..1799f76a6a95 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o -obj-y += quirks.o i8237.o topology.o kdebugfs.o +obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o obj-$(CONFIG_X86_32) += pci-base_32.o diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c new file mode 100644 index 000000000000..f1c24d8e7942 --- /dev/null +++ b/arch/x86/kernel/pci-dma.c @@ -0,0 +1,14 @@ +#include + +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + + diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index be6b1f6aa1a7..9e8297657c32 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -182,18 +182,6 @@ dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -int -dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - *dev->dma_mask = mask; - - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - static __devinit void via_no_dac(struct pci_dev *dev) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index f97a08d0a8f9..e697b865c1a3 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -213,15 +213,6 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - /* * See for the iommu kernel parameter * documentation. -- cgit v1.2.3 From d5df63f48a67400a26eba15624aa883897a4f4d1 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:44 -0300 Subject: x86: delete empty functions from pci-nommu_64.c This functions are now called conditionally on their existence in the struct. So just delete them, instead of keeping an empty implementation. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 6e330769d017..90a7c40aa989 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -35,10 +35,6 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, return bus; } -static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - int direction) -{ -} /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scatter-gather version of the @@ -71,20 +67,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, int dir) -{ -} - const struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, - .unmap_single = nommu_unmap_single, .map_sg = nommu_map_sg, - .unmap_sg = nommu_unmap_sg, .is_phys = 1, }; -- cgit v1.2.3 From 9f9ab46d557c32b9cad49c31d094d659ec3b59c0 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:45 -0300 Subject: x86: implement mapping_error in pci-nommu_64.c This patch implements mapping_error for pci-nommu_64.c. It takes care to keep the same compatible behaviour it already had. Although this file is not (yet) used for i386, we introduce the i386 version here. Again, care is taken, even at the expense of an ifdef, to keep the same behaviour inconditionally. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 90a7c40aa989..a4e8ccfae4cb 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -67,9 +67,21 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } +/* Make sure we keep the same behaviour */ +static int nommu_mapping_error(dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + return (dma_addr == bad_dma_address); +#endif +} + + const struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, .map_sg = nommu_map_sg, + .mapping_error = nommu_mapping_error, .is_phys = 1, }; -- cgit v1.2.3 From e4dcdd6b4fa33efee94e89cccd75e871c570c510 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:46 -0300 Subject: x86: Add flush_write_buffers in nommu functions This patch adds flush_write_buffers() in some functions of pci-nommu_64.c They are added anywhere i386 would also have it. This is not a problem for x86_64, since flush_rite_buffers() an nop for it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index a4e8ccfae4cb..1da9cf9be3aa 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -32,6 +32,7 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, dma_addr_t bus = paddr; if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; + flush_write_buffers(); return bus; } @@ -64,6 +65,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return 0; s->dma_length = s->length; } + flush_write_buffers(); return nents; } -- cgit v1.2.3 From 30db2cbf38d68f466fd34488f8312a151225c9ac Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:47 -0300 Subject: x86: use sg_phys in x86_64 To make the code usable in i386, where we have high memory mappings, we drop te virt_to_bus(sg_virt()) construction in favour of sg_phys. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 1da9cf9be3aa..c6901e751770 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -60,7 +60,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); - s->dma_address = virt_to_bus(sg_virt(s)); + s->dma_address = sg_phys(s); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; -- cgit v1.2.3 From 5b3e5b7273435f8a7f83d3556a09adfd6f247e36 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:49 -0300 Subject: x86: use WARN_ON in mapping functions In the very same way i386 do, we use WARN_ON functions in map_simple and map_sg. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index c6901e751770..8d036aee2a8d 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -30,6 +30,7 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int direction) { dma_addr_t bus = paddr; + WARN_ON(size == 0); if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; flush_write_buffers(); @@ -58,6 +59,8 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, struct scatterlist *s; int i; + WARN_ON(nents == 0 || sg[0].length == 0); + for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); s->dma_address = sg_phys(s); -- cgit v1.2.3 From d741bde26dc3444eaeb269051d3f0b623b24de13 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:48 -0300 Subject: x86: use dma_length in i386 This is done to get the code closer to x86_64. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 7caf5c211f23..837bbe91043e 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -24,6 +24,7 @@ static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, BUG_ON(!sg_page(sg)); sg->dma_address = sg_phys(sg); + sg->dma_length = sg->length; } flush_write_buffers(); -- cgit v1.2.3 From 85c246ee16fe00bf7bf9e7ff09a5d17d9a83cf71 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:50 -0300 Subject: x86: move definition to pci-dma.c Move dma_ops structure definition to pci-dma.c, where it belongs. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 11 ++++++++--- arch/x86/kernel/pci-dma.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 837bbe91043e..b44ea517fcf0 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -37,7 +37,7 @@ static int pci32_map_error(dma_addr_t dma_addr) return 0; } -static const struct dma_mapping_ops pci32_dma_ops = { +const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, .map_sg = pci32_dma_map_sg, @@ -51,5 +51,10 @@ static const struct dma_mapping_ops pci32_dma_ops = { .mapping_error = pci32_map_error, }; -const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -EXPORT_SYMBOL(dma_ops); +/* this is temporary */ +int __init no_iommu_init(void) +{ + dma_ops = &pci32_dma_ops; + return 0; +} +fs_initcall(no_iommu_init); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f1c24d8e7942..1323cd80387b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,5 +1,8 @@ #include +const struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) -- cgit v1.2.3 From f9c258de3494a5249a61fe110ece2082e5927468 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:52 -0300 Subject: x86: unify pci-nommu merge pci-base_32.c and pci-nommu_64.c into pci-nommu.c Their code were made the same, so now they can be merged. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 5 +-- arch/x86/kernel/pci-base_32.c | 60 ------------------------- arch/x86/kernel/pci-dma.c | 8 ++++ arch/x86/kernel/pci-dma_64.c | 8 ---- arch/x86/kernel/pci-nommu.c | 100 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-nommu_64.c | 100 ----------------------------------------- 6 files changed, 110 insertions(+), 171 deletions(-) delete mode 100644 arch/x86/kernel/pci-base_32.c create mode 100644 arch/x86/kernel/pci-nommu.c delete mode 100644 arch/x86/kernel/pci-nommu_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1799f76a6a95..307aee5e8c5b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,9 +24,8 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o -obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o -obj-$(CONFIG_X86_32) += pci-base_32.o +obj-y += alternative.o i8253.o pci-nommu.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c deleted file mode 100644 index b44ea517fcf0..000000000000 --- a/arch/x86/kernel/pci-base_32.c +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include - -static dma_addr_t pci32_map_single(struct device *dev, phys_addr_t ptr, - size_t size, int direction) -{ - WARN_ON(size == 0); - flush_write_buffers(); - return ptr; -} - -static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, int direction) -{ - struct scatterlist *sg; - int i; - - WARN_ON(nents == 0 || sglist[0].length == 0); - - for_each_sg(sglist, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg->dma_address = sg_phys(sg); - sg->dma_length = sg->length; - } - - flush_write_buffers(); - return nents; -} - -/* Make sure we keep the same behaviour */ -static int pci32_map_error(dma_addr_t dma_addr) -{ - return 0; -} - -const struct dma_mapping_ops pci32_dma_ops = { - .map_single = pci32_map_single, - .unmap_single = NULL, - .map_sg = pci32_dma_map_sg, - .unmap_sg = NULL, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, - .mapping_error = pci32_map_error, -}; - -/* this is temporary */ -int __init no_iommu_init(void) -{ - dma_ops = &pci32_dma_ops; - return 0; -} -fs_initcall(no_iommu_init); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1323cd80387b..37a558a96153 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,6 +3,14 @@ const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); +#ifdef CONFIG_IOMMU_DEBUG +int panic_on_overflow __read_mostly = 1; +int force_iommu __read_mostly = 1; +#else +int panic_on_overflow __read_mostly = 0; +int force_iommu __read_mostly = 0; +#endif + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index e697b865c1a3..9ef18bfad2ad 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -27,14 +27,6 @@ EXPORT_SYMBOL(iommu_bio_merge); static int iommu_sac_force __read_mostly = 0; int no_iommu __read_mostly; -#ifdef CONFIG_IOMMU_DEBUG -int panic_on_overflow __read_mostly = 1; -int force_iommu __read_mostly = 1; -#else -int panic_on_overflow __read_mostly = 0; -int force_iommu __read_mostly= 0; -#endif - /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c new file mode 100644 index 000000000000..aec43d56f49c --- /dev/null +++ b/arch/x86/kernel/pci-nommu.c @@ -0,0 +1,100 @@ +/* Fallback functions when the main IOMMU code is not compiled in. This + code is roughly equivalent to i386. */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) +{ + if (hwdev && bus + size > *hwdev->dma_mask) { + if (*hwdev->dma_mask >= DMA_32BIT_MASK) + printk(KERN_ERR + "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", + name, (long long)bus, size, + (long long)*hwdev->dma_mask); + return 0; + } + return 1; +} + +static dma_addr_t +nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, + int direction) +{ + dma_addr_t bus = paddr; + WARN_ON(size == 0); + if (!check_addr("map_single", hwdev, bus, size)) + return bad_dma_address; + flush_write_buffers(); + return bus; +} + + +/* Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scatter-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + struct scatterlist *s; + int i; + + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + BUG_ON(!sg_page(s)); + s->dma_address = sg_phys(s); + if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) + return 0; + s->dma_length = s->length; + } + flush_write_buffers(); + return nents; +} + +/* Make sure we keep the same behaviour */ +static int nommu_mapping_error(dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + return (dma_addr == bad_dma_address); +#endif +} + + +const struct dma_mapping_ops nommu_dma_ops = { + .map_single = nommu_map_single, + .map_sg = nommu_map_sg, + .mapping_error = nommu_mapping_error, + .is_phys = 1, +}; + +void __init no_iommu_init(void) +{ + if (dma_ops) + return; + + force_iommu = 0; /* no HW IOMMU */ + dma_ops = &nommu_dma_ops; +} diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c deleted file mode 100644 index 8d036aee2a8d..000000000000 --- a/arch/x86/kernel/pci-nommu_64.c +++ /dev/null @@ -1,100 +0,0 @@ -/* Fallback functions when the main IOMMU code is not compiled in. This - code is roughly equivalent to i386. */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int -check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) -{ - if (hwdev && bus + size > *hwdev->dma_mask) { - if (*hwdev->dma_mask >= DMA_32BIT_MASK) - printk(KERN_ERR - "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static dma_addr_t -nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, - int direction) -{ - dma_addr_t bus = paddr; - WARN_ON(size == 0); - if (!check_addr("map_single", hwdev, bus, size)) - return bad_dma_address; - flush_write_buffers(); - return bus; -} - - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - BUG_ON(!sg_page(s)); - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - s->dma_length = s->length; - } - flush_write_buffers(); - return nents; -} - -/* Make sure we keep the same behaviour */ -static int nommu_mapping_error(dma_addr_t dma_addr) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - return (dma_addr == bad_dma_address); -#endif -} - - -const struct dma_mapping_ops nommu_dma_ops = { - .map_single = nommu_map_single, - .map_sg = nommu_map_sg, - .mapping_error = nommu_mapping_error, - .is_phys = 1, -}; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} -- cgit v1.2.3 From cb5867a5d8ca20e16ddc3397c36ee9c2e4cba219 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:51 -0300 Subject: x86: move initialization functions to pci-dma.c initcalls that triggers the various possibiities for dma subsys are moved to pci-dma.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 25 +++++++++++++++++++++++++ arch/x86/kernel/pci-dma_64.c | 23 ----------------------- 2 files changed, 25 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 37a558a96153..6b77fd872a7a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,4 +1,8 @@ #include +#include + +#include +#include const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -22,4 +26,25 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); +static int __init pci_iommu_init(void) +{ +#ifdef CONFIG_CALGARY_IOMMU + calgary_iommu_init(); +#endif + + intel_iommu_init(); + +#ifdef CONFIG_GART_IOMMU + gart_iommu_init(); +#endif + no_iommu_init(); + return 0; +} + +void pci_iommu_shutdown(void) +{ + gart_iommu_shutdown(); +} +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 9ef18bfad2ad..42021300964a 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -339,27 +339,6 @@ void __init pci_iommu_alloc(void) #endif } -static int __init pci_iommu_init(void) -{ -#ifdef CONFIG_CALGARY_IOMMU - calgary_iommu_init(); -#endif - - intel_iommu_init(); - -#ifdef CONFIG_GART_IOMMU - gart_iommu_init(); -#endif - - no_iommu_init(); - return 0; -} - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); -} - #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ @@ -372,5 +351,3 @@ static __devinit void via_no_dac(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); #endif -/* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); -- cgit v1.2.3 From 116890d556af38d539597655c564a73e6eef3d9e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:54 -0300 Subject: x86: move x86_64-specific to common code. This patch moves the bootmem functions, that are largely x86_64-specific into pci-dma.c. The code goes inside an ifdef. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_64.c | 68 ----------------------------------------- 2 files changed, 73 insertions(+), 68 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 6b77fd872a7a..91443361cb67 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,6 +1,9 @@ #include #include +#include +#include +#include #include #include @@ -26,6 +29,76 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); +#ifdef CONFIG_X86_64 +static __initdata void *dma32_bootmem_ptr; +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); + +static int __init parse_dma32_size_opt(char *p) +{ + if (!p) + return -EINVAL; + dma32_bootmem_size = memparse(p, &p); + return 0; +} +early_param("dma32_size", parse_dma32_size_opt); + +void __init dma32_reserve_bootmem(void) +{ + unsigned long size, align; + if (end_pfn <= MAX_DMA32_PFN) + return; + + align = 64ULL<<20; + size = round_up(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + __pa(MAX_DMA_ADDRESS)); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else + dma32_bootmem_size = 0; +} +static void __init dma32_free_bootmem(void) +{ + int node; + + if (end_pfn <= MAX_DMA32_PFN) + return; + + if (!dma32_bootmem_ptr) + return; + + for_each_online_node(node) + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), + dma32_bootmem_size); + + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; +} + +void __init pci_iommu_alloc(void) +{ + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ +#ifdef CONFIG_GART_IOMMU + gart_iommu_hole_init(); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + detect_calgary(); +#endif + + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} +#endif + static int __init pci_iommu_init(void) { #ifdef CONFIG_CALGARY_IOMMU diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 42021300964a..6b204cc42890 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -271,74 +271,6 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -static __initdata void *dma32_bootmem_ptr; -static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); - -static int __init parse_dma32_size_opt(char *p) -{ - if (!p) - return -EINVAL; - dma32_bootmem_size = memparse(p, &p); - return 0; -} -early_param("dma32_size", parse_dma32_size_opt); - -void __init dma32_reserve_bootmem(void) -{ - unsigned long size, align; - if (end_pfn <= MAX_DMA32_PFN) - return; - - align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); - dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, - __pa(MAX_DMA_ADDRESS)); - if (dma32_bootmem_ptr) - dma32_bootmem_size = size; - else - dma32_bootmem_size = 0; -} -static void __init dma32_free_bootmem(void) -{ - int node; - - if (end_pfn <= MAX_DMA32_PFN) - return; - - if (!dma32_bootmem_ptr) - return; - - for_each_online_node(node) - free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), - dma32_bootmem_size); - - dma32_bootmem_ptr = NULL; - dma32_bootmem_size = 0; -} - -void __init pci_iommu_alloc(void) -{ - /* free the range so iommu could get some range less than 4G */ - dma32_free_bootmem(); - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ -#ifdef CONFIG_GART_IOMMU - gart_iommu_hole_init(); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - detect_calgary(); -#endif - - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} - #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -- cgit v1.2.3 From bca5c09663030bdd18ab1b3ccb6671f663c3345a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:53 -0300 Subject: x86: move pci fixup to pci-dma.c via_no_dac provides a fixup that is the same for both architectures. Move it to pci-dma.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 18 ++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 13 ------------- arch/x86/kernel/pci-dma_64.c | 15 --------------- 3 files changed, 18 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 91443361cb67..48cccbe51aa5 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,12 +1,16 @@ #include #include #include +#include #include #include #include #include +int forbid_dac __read_mostly; +EXPORT_SYMBOL(forbid_dac); + const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -121,3 +125,17 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); + +#ifdef CONFIG_PCI +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ + +static __devinit void via_no_dac(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + printk(KERN_INFO "PCI: VIA PCI bridge detected." + "Disabling DAC.\n"); + forbid_dac = 1; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); +#endif diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 9e8297657c32..6543bb30b65d 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -157,9 +157,6 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -int forbid_dac; -EXPORT_SYMBOL(forbid_dac); - int dma_supported(struct device *dev, u64 mask) { @@ -182,16 +179,6 @@ dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); - static int check_iommu(char *s) { if (!strcmp(s, "usedac")) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 6b204cc42890..7820675a688a 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -161,8 +161,6 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); -static int forbid_dac __read_mostly; - int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI @@ -270,16 +268,3 @@ static __init int iommu_setup(char *p) return 0; } early_param("iommu", iommu_setup); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); -#endif -- cgit v1.2.3 From 8e0c379718ef32967deea55937895bfc9b493dd8 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:55 -0300 Subject: x86: merge dma_supported The code for both arches are very similar, so this patch merge them. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 24 ------------------------ arch/x86/kernel/pci-dma_64.c | 44 +------------------------------------------- 3 files changed, 45 insertions(+), 67 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 48cccbe51aa5..7d3bd652c36f 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -14,6 +14,8 @@ EXPORT_SYMBOL(forbid_dac); const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); +int iommu_sac_force __read_mostly = 0; + #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -103,6 +105,48 @@ void __init pci_iommu_alloc(void) } #endif +int dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", + dev->bus_id); + return 0; + } +#endif + + if (dma_ops->dma_supported) + return dma_ops->dma_supported(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + printk(KERN_INFO "%s: Force SAC with mask %Lx\n", + dev->bus_id, mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(dma_supported); + + static int __init pci_iommu_init(void) { #ifdef CONFIG_CALGARY_IOMMU diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 6543bb30b65d..1d4091af4417 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -155,30 +155,6 @@ void *dma_mark_declared_memory_occupied(struct device *dev, EXPORT_SYMBOL(dma_mark_declared_memory_occupied); #ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -int -dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < 0x00ffffff) - return 0; - - /* Work around chipset bugs */ - if (forbid_dac > 0 && mask > 0xffffffffULL) - return 0; - - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); - - return 1; -} -EXPORT_SYMBOL(dma_supported); - static int check_iommu(char *s) { if (!strcmp(s, "usedac")) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 7820675a688a..c80da76e7e61 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -24,7 +24,7 @@ EXPORT_SYMBOL(bad_dma_address); int iommu_bio_merge __read_mostly = 0; EXPORT_SYMBOL(iommu_bio_merge); -static int iommu_sac_force __read_mostly = 0; +extern int iommu_sac_force; int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ @@ -161,48 +161,6 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); -int dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - - - - printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id); - return 0; - } -#endif - - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); - - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) - return 0; - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { - printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(dma_supported); - /* * See for the iommu kernel parameter * documentation. -- cgit v1.2.3 From fae9a0d8ca68a14da8d2351ad3e0bf42f3b29899 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:56 -0300 Subject: x86: merge iommu initialization parameters we merge the iommu initialization parameters in pci-dma.c Nice thing, that both architectures at least recognize the same parameters. usedac i386 parameter is marked for deprecation Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 12 ------- arch/x86/kernel/pci-dma_64.c | 79 ------------------------------------------ 3 files changed, 81 insertions(+), 91 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 7d3bd652c36f..48ab52d052b6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -24,6 +24,18 @@ int panic_on_overflow __read_mostly = 0; int force_iommu __read_mostly = 0; #endif +int iommu_merge __read_mostly = 0; + +int no_iommu __read_mostly; +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly = 0; + +/* This tells the BIO block layer to assume merging. Default to off + because we cannot guarantee merging later. */ +int iommu_bio_merge __read_mostly = 0; +EXPORT_SYMBOL(iommu_bio_merge); + + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) @@ -105,6 +117,75 @@ void __init pci_iommu_alloc(void) } #endif +/* + * See for the iommu kernel parameter + * documentation. + */ +static __init int iommu_setup(char *p) +{ + iommu_merge = 1; + + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "off", 3)) + no_iommu = 1; + /* gart_parse_options has more force support */ + if (!strncmp(p, "force", 5)) + force_iommu = 1; + if (!strncmp(p, "noforce", 7)) { + iommu_merge = 0; + force_iommu = 0; + } + + if (!strncmp(p, "biomerge", 8)) { + iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "panic", 5)) + panic_on_overflow = 1; + if (!strncmp(p, "nopanic", 7)) + panic_on_overflow = 0; + if (!strncmp(p, "merge", 5)) { + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "nomerge", 7)) + iommu_merge = 0; + if (!strncmp(p, "forcesac", 8)) + iommu_sac_force = 1; + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) + forbid_dac = -1; + if (!strncmp(p, "usedac", 6)) { + forbid_dac = -1; + return 1; + } +#ifdef CONFIG_SWIOTLB + if (!strncmp(p, "soft", 4)) + swiotlb = 1; +#endif + +#ifdef CONFIG_GART_IOMMU + gart_parse_options(p); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + + p += strcspn(p, ","); + if (*p == ',') + ++p; + } + return 0; +} +early_param("iommu", iommu_setup); + int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 1d4091af4417..eea52df68a3b 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -153,15 +153,3 @@ void *dma_mark_declared_memory_occupied(struct device *dev, return mem->virt_base + (pos << PAGE_SHIFT); } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -#ifdef CONFIG_PCI -static int check_iommu(char *s) -{ - if (!strcmp(s, "usedac")) { - forbid_dac = -1; - return 1; - } - return 0; -} -__setup("iommu=", check_iommu); -#endif diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index c80da76e7e61..e7d45cf82251 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -14,22 +14,9 @@ #include #include -int iommu_merge __read_mostly = 0; - dma_addr_t bad_dma_address __read_mostly; EXPORT_SYMBOL(bad_dma_address); -/* This tells the BIO block layer to assume merging. Default to off - because we cannot guarantee merging later. */ -int iommu_bio_merge __read_mostly = 0; -EXPORT_SYMBOL(iommu_bio_merge); - -extern int iommu_sac_force; - -int no_iommu __read_mostly; -/* Set this to 1 if there is a HW IOMMU in the system */ -int iommu_detected __read_mostly = 0; - /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ @@ -160,69 +147,3 @@ void dma_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } EXPORT_SYMBOL(dma_free_coherent); - -/* - * See for the iommu kernel parameter - * documentation. - */ -static __init int iommu_setup(char *p) -{ - iommu_merge = 1; - - if (!p) - return -EINVAL; - - while (*p) { - if (!strncmp(p, "off", 3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p, "force", 5)) - force_iommu = 1; - if (!strncmp(p, "noforce", 7)) { - iommu_merge = 0; - force_iommu = 0; - } - - if (!strncmp(p, "biomerge", 8)) { - iommu_bio_merge = 4096; - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic", 5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic", 7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge", 5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge", 7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; - if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; - if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; - -#ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft", 4)) - swiotlb = 1; -#endif - -#ifdef CONFIG_GART_IOMMU - gart_parse_options(p); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - if (!strncmp(p, "calgary", 7)) - use_calgary = 1; -#endif /* CONFIG_CALGARY_IOMMU */ - - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; -} -early_param("iommu", iommu_setup); -- cgit v1.2.3 From 8e8edc6401205da3000cc3dfa76f3fd28a21d73c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:57 -0300 Subject: x86: move dma_coherent functions to pci-dma.c They are placed in an ifdef, since they are i386 specific the structure definition goes to dma-mapping.h. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 81 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 85 -------------------------------------------- 2 files changed, 81 insertions(+), 85 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 48ab52d052b6..967dfcfa2ad2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -186,6 +186,87 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); +#ifdef CONFIG_X86_32 +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dev->dma_mem = NULL; + iounmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pos, err; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); + + pages >>= PAGE_SHIFT; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); +#endif /* CONFIG_X86_32 */ + int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index eea52df68a3b..818d95efc3cb 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -18,14 +18,6 @@ dma_addr_t bad_dma_address __read_mostly = 0x0; EXPORT_SYMBOL(bad_dma_address); -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -76,80 +68,3 @@ void dma_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); - -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); -- cgit v1.2.3 From d09d815c1b1d437a3ea89ecd92c91179266d1243 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:58 -0300 Subject: x86: isolate coherent mapping functions i386 implements the declare coherent memory API, and x86_64 does not it is reflected in pieces of dma_alloc_coherent and dma_free_coherent. Those pieces are isolated in separate functions, that are declared as empty macros in x86_64. This way we can make the code the same. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 51 ++++++++++++++++++++++++++++++-------------- arch/x86/kernel/pci-dma_64.c | 11 +++++++++- 2 files changed, 45 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 818d95efc3cb..78c7640252a4 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -18,27 +18,50 @@ dma_addr_t bad_dma_address __read_mostly = 0x0; EXPORT_SYMBOL(bad_dma_address); -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) { - void *ret; struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); if (mem) { int page = bitmap_find_free_region(mem->bitmap, mem->size, order); if (page >= 0) { *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); } if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; + *ret = NULL; + } + return (mem != NULL); +} + +static int dma_release_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; } + return 0; +} + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + void *ret = NULL; + int order = get_order(size); + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) + return ret; if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; @@ -56,15 +79,11 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); + if (dma_release_coherent(dev, order, vaddr)) + return; + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index e7d45cf82251..6eacd58e451b 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -39,6 +39,8 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) return page ? page_address(page) : NULL; } +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) +#define dma_release_coherent(dev, order, vaddr) (0) /* * Allocate memory for a coherent mapping. */ @@ -50,6 +52,10 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned long dma_mask = 0; u64 bus; + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + return memory; + if (!dev) dev = &fallback_dev; dma_mask = dev->coherent_dma_mask; @@ -141,9 +147,12 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ + if (dma_release_coherent(dev, order, vaddr)) + return; if (dma_ops->unmap_single) dma_ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, get_order(size)); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From cac67877d268f21da74d879a355247e4e25b5b5f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:00 -0300 Subject: x86: move bad_dma_address It goes to pci-dma.c, and is removed from the arch-specific files. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 2 ++ arch/x86/kernel/pci-dma_32.c | 4 ---- arch/x86/kernel/pci-dma_64.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 967dfcfa2ad2..00527e74e49c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -35,6 +35,8 @@ int iommu_detected __read_mostly = 0; int iommu_bio_merge __read_mostly = 0; EXPORT_SYMBOL(iommu_bio_merge); +dma_addr_t bad_dma_address __read_mostly = 0; +EXPORT_SYMBOL(bad_dma_address); int dma_set_mask(struct device *dev, u64 mask) { diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 78c7640252a4..9199bccb9214 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -14,10 +14,6 @@ #include #include -/* For i386, we make it point to the NULL address */ -dma_addr_t bad_dma_address __read_mostly = 0x0; -EXPORT_SYMBOL(bad_dma_address); - static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 6eacd58e451b..5f03e4174210 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -14,8 +14,6 @@ #include #include -dma_addr_t bad_dma_address __read_mostly; -EXPORT_SYMBOL(bad_dma_address); /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible -- cgit v1.2.3 From 2e33e361188617628e47b4bc47e87e84feaf556f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:59 -0300 Subject: x86: adjust dma_free_coherent for i386 We call unmap_single, if available. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 9199bccb9214..5ae3470113c6 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -80,6 +80,8 @@ void dma_free_coherent(struct device *dev, size_t size, WARN_ON(irqs_disabled()); /* for portability */ if (dma_release_coherent(dev, order, vaddr)) return; + if (dma_ops->unmap_single) + dma_ops->unmap_single(dev, dma_handle, size, 0); free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 71848d687e2a477cb7c68a854d8fdeaa5dff0ffc Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:01 -0300 Subject: x86: remove virt_to_bus in pci-dma_64.c virt_to_bus() is deprecated according to the docs, and moreover, won't return the right thing in i386 if we're dealing with high memory mappings. So we make our allocation function return a page, and then use page_address() (for virtual addr) and page_to_phys() (for physical addr) instead. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 5f03e4174210..13a31a4a4c17 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -28,13 +28,11 @@ struct device fallback_dev = { noinline static void * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { - struct page *page; int node; node = dev_to_node(dev); - page = alloc_pages_node(node, gfp, order); - return page ? page_address(page) : NULL; + return alloc_pages_node(node, gfp, order); } #define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) @@ -47,6 +45,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *memory; + struct page *page; unsigned long dma_mask = 0; u64 bus; @@ -79,13 +78,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp |= GFP_DMA32; again: - memory = dma_alloc_pages(dev, gfp, get_order(size)); - if (memory == NULL) + page = dma_alloc_pages(dev, gfp, get_order(size)); + if (page == NULL) return NULL; { int high, mmu; - bus = virt_to_bus(memory); + bus = page_to_phys(page); + memory = page_address(page); high = (bus + size) >= dma_mask; mmu = high; if (force_iommu && !(gfp & GFP_DMA)) @@ -112,7 +112,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, memset(memory, 0, size); if (!mmu) { - *dma_handle = virt_to_bus(memory); + *dma_handle = bus; return memory; } } -- cgit v1.2.3 From d1a079029036881375110f78df47d352e7c28a77 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:02 -0300 Subject: x86: use numa allocation function in i386 We can do it here to, in the same way x86_64 does. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 5ae3470113c6..0d630ae3d910 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -48,10 +48,23 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr) return 0; } +/* Allocate DMA memory on node near device */ +noinline struct page * +dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) +{ + int node; + + node = dev_to_node(dev); + + return alloc_pages_node(node, gfp, order); +} + void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *ret = NULL; + struct page *page; + dma_addr_t bus; int order = get_order(size); /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); @@ -62,12 +75,16 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; - ret = (void *)__get_free_pages(gfp, order); + page = dma_alloc_pages(dev, gfp, order); + if (page == NULL) + return NULL; + + ret = page_address(page); + bus = page_to_phys(page); + + memset(ret, 0, size); + *dma_handle = bus; - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } return ret; } EXPORT_SYMBOL(dma_alloc_coherent); -- cgit v1.2.3 From 45a07e774950ef479f8996c0e2c5550dd6440453 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:04 -0300 Subject: x86: use a fallback dev for i386 We can use a fallback dev for cases of a NULL device being passed (mostly ISA) This comes from x86_64 implementation. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 0d630ae3d910..0600a37ba835 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -14,6 +14,16 @@ #include #include +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + + static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret) { @@ -75,6 +85,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; + if (!dev) + dev = &fallback_dev; + page = dma_alloc_pages(dev, gfp, order); if (page == NULL) return NULL; -- cgit v1.2.3 From 8779f2fc3b84ebb6c5181fb13d702e9944c16069 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:05 -0300 Subject: x86: don't try to allocate from DMA zone at first If we fail, we'll loop into the allocation again, and then allocate in the DMA zone. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 0600a37ba835..debe9119b724 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -82,9 +82,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) return ret; - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - if (!dev) dev = &fallback_dev; -- cgit v1.2.3 From 5fa78ca75d8e67063948a01b51594a0904af5710 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:05 -0300 Subject: x86: retry allocation if failed This patch puts in the code to retry allocation in case it fails. By its own, it does not make much sense but making the code look like x86_64. But later patches in this series will make we try to allocate from zones other than DMA first, which will possibly fail. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index debe9119b724..11f100a5f034 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -76,6 +76,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, struct page *page; dma_addr_t bus; int order = get_order(size); + unsigned long dma_mask = 0; + /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); @@ -85,15 +87,37 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (!dev) dev = &fallback_dev; + dma_mask = dev->coherent_dma_mask; + if (dma_mask == 0) + dma_mask = DMA_32BIT_MASK; + + again: page = dma_alloc_pages(dev, gfp, order); if (page == NULL) return NULL; - ret = page_address(page); - bus = page_to_phys(page); - - memset(ret, 0, size); - *dma_handle = bus; + { + int high, mmu; + bus = page_to_phys(page); + ret = page_address(page); + high = (bus + size) >= dma_mask; + mmu = high; + if (force_iommu && !(gfp & GFP_DMA)) + mmu = 1; + else if (high) { + free_pages((unsigned long)ret, + get_order(size)); + + /* Don't use the 16MB ZONE_DMA unless absolutely + needed. It's better to use remapping first. */ + if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + } + memset(ret, 0, size); + *dma_handle = bus; + } return ret; } -- cgit v1.2.3 From 8f19ca1341a6d89bd96e2e69e6e10f46d3258089 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:06 -0300 Subject: x86: unify gfp masks Use the same gfp masks for x86_64 and i386. It involves using HIGHMEM or DMA32 where necessary, for the sake of code compatibility, (no real effect), and using the NORETRY mask for i386. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 6 ++++-- arch/x86/kernel/pci-dma_64.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 11f100a5f034..5450bd142cb0 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -79,7 +79,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, unsigned long dma_mask = 0; /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) return ret; @@ -91,7 +91,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_mask == 0) dma_mask = DMA_32BIT_MASK; - again: + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; +again: page = dma_alloc_pages(dev, gfp, order); if (page == NULL) return NULL; diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 13a31a4a4c17..b956f5945d67 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -49,6 +49,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned long dma_mask = 0; u64 bus; + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) return memory; -- cgit v1.2.3 From aa99b16faadcc9a5b6bd9550fda117a8e9e46d26 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:07 -0300 Subject: x86: remove kludge from x86_64 The claim is that i386 does it. Just it does not. So remove it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index b956f5945d67..596c8c88f36d 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -68,10 +68,6 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; - /* Kludge to make it bug-to-bug compatible with i386. i386 - uses the normal dma_mask for alloc_coherent. */ - dma_mask &= *dev->dma_mask; - /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If -- cgit v1.2.3 From da60cab4dd922cd933e82bace490f6155a32a90e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:08 -0300 Subject: x86: return conditional to mmu Just return our allocation if we don't have an mmu. For i386, where this patch is being applied, we never have. So our goal is just to have the code to look like x86_64's. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 5450bd142cb0..f134de3833a2 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -116,12 +116,42 @@ again: gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; } + + /* Let low level make its own zone decisions */ + gfp &= ~(GFP_DMA32|GFP_DMA); + + if (dma_ops->alloc_coherent) + return dma_ops->alloc_coherent(dev, size, + dma_handle, gfp); + return NULL; + } memset(ret, 0, size); - *dma_handle = bus; + if (!mmu) { + *dma_handle = bus; + return ret; + } + } + + if (dma_ops->alloc_coherent) { + free_pages((unsigned long)ret, get_order(size)); + gfp &= ~(GFP_DMA|GFP_DMA32); + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + } + + if (dma_ops->map_simple) { + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(ret), + size, + PCI_DMA_BIDIRECTIONAL); + if (*dma_handle != bad_dma_address) + return ret; } - return ret; + if (panic_on_overflow) + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", + (unsigned long)size); + free_pages((unsigned long)ret, get_order(size)); + return NULL; } EXPORT_SYMBOL(dma_alloc_coherent); -- cgit v1.2.3 From bb8ada95a7c11adf3dad4e8d5c55ef1650560592 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:09 -0300 Subject: x86: don't do dma if mask is NULL. if the device hasn't provided a mask, abort allocation. Note that we're using a fallback device now, so it does not cover the case of a NULL device: just drivers passing NULL masks around. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index f134de3833a2..d2f70744a93a 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -91,6 +91,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_mask == 0) dma_mask = DMA_32BIT_MASK; + if (dev->dma_mask == NULL) + return NULL; + /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; again: -- cgit v1.2.3 From 098cb7f27ed69276e4db560a444b94b982e4bb8f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:10 -0300 Subject: x86: integrate pci-dma.c The code in pci-dma_{32,64}.c are now sufficiently close to each other. We merge them in pci-dma.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/pci-dma.c | 175 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 173 ------------------------------------------ arch/x86/kernel/pci-dma_64.c | 154 ------------------------------------- 4 files changed, 176 insertions(+), 328 deletions(-) delete mode 100644 arch/x86/kernel/pci-dma_32.c delete mode 100644 arch/x86/kernel/pci-dma_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 307aee5e8c5b..90e092d0af0c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o +obj-y += bootflag.o e820_$(BITS).o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 00527e74e49c..388b113a7d88 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -38,6 +38,15 @@ EXPORT_SYMBOL(iommu_bio_merge); dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to older i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) @@ -267,6 +276,43 @@ void *dma_mark_declared_memory_occupied(struct device *dev, return mem->virt_base + (pos << PAGE_SHIFT); } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); + } + if (mem->flags & DMA_MEMORY_EXCLUSIVE) + *ret = NULL; + } + return (mem != NULL); +} + +static int dma_release_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; + } + return 0; +} +#else +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) +#define dma_release_coherent(dev, order, vaddr) (0) #endif /* CONFIG_X86_32 */ int dma_supported(struct device *dev, u64 mask) @@ -310,6 +356,135 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); +/* Allocate DMA memory on node near device */ +noinline struct page * +dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) +{ + int node; + + node = dev_to_node(dev); + + return alloc_pages_node(node, gfp, order); +} + +/* + * Allocate memory for a coherent mapping. + */ +void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + void *memory = NULL; + struct page *page; + unsigned long dma_mask = 0; + dma_addr_t bus; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + return memory; + + if (!dev) + dev = &fallback_dev; + dma_mask = dev->coherent_dma_mask; + if (dma_mask == 0) + dma_mask = DMA_32BIT_MASK; + + /* Device not DMA able */ + if (dev->dma_mask == NULL) + return NULL; + + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; + +#ifdef CONFIG_X86_64 + /* Why <=? Even when the mask is smaller than 4GB it is often + larger than 16MB and in this case we have a chance of + finding fitting memory in the next higher zone first. If + not retry with true GFP_DMA. -AK */ + if (dma_mask <= DMA_32BIT_MASK) + gfp |= GFP_DMA32; +#endif + + again: + page = dma_alloc_pages(dev, gfp, get_order(size)); + if (page == NULL) + return NULL; + + { + int high, mmu; + bus = page_to_phys(page); + memory = page_address(page); + high = (bus + size) >= dma_mask; + mmu = high; + if (force_iommu && !(gfp & GFP_DMA)) + mmu = 1; + else if (high) { + free_pages((unsigned long)memory, + get_order(size)); + + /* Don't use the 16MB ZONE_DMA unless absolutely + needed. It's better to use remapping first. */ + if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + + /* Let low level make its own zone decisions */ + gfp &= ~(GFP_DMA32|GFP_DMA); + + if (dma_ops->alloc_coherent) + return dma_ops->alloc_coherent(dev, size, + dma_handle, gfp); + return NULL; + } + + memset(memory, 0, size); + if (!mmu) { + *dma_handle = bus; + return memory; + } + } + + if (dma_ops->alloc_coherent) { + free_pages((unsigned long)memory, get_order(size)); + gfp &= ~(GFP_DMA|GFP_DMA32); + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + } + + if (dma_ops->map_simple) { + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), + size, + PCI_DMA_BIDIRECTIONAL); + if (*dma_handle != bad_dma_address) + return memory; + } + + if (panic_on_overflow) + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", + (unsigned long)size); + free_pages((unsigned long)memory, get_order(size)); + return NULL; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * Unmap coherent memory. + * The caller must ensure that the device has finished accessing the mapping. + */ +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) +{ + int order = get_order(size); + WARN_ON(irqs_disabled()); /* for portability */ + if (dma_release_coherent(dev, order, vaddr)) + return; + if (dma_ops->unmap_single) + dma_ops->unmap_single(dev, bus, size, 0); + free_pages((unsigned long)vaddr, order); +} +EXPORT_SYMBOL(dma_free_coherent); static int __init pci_iommu_init(void) { diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c deleted file mode 100644 index d2f70744a93a..000000000000 --- a/arch/x86/kernel/pci-dma_32.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include -#include - -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ -struct device fallback_dev = { - .bus_id = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, -}; - - -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; - } - return (mem != NULL); -} - -static int dma_release_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - return 1; - } - return 0; -} - -/* Allocate DMA memory on node near device */ -noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *ret = NULL; - struct page *page; - dma_addr_t bus; - int order = get_order(size); - unsigned long dma_mask = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) - return ret; - - if (!dev) - dev = &fallback_dev; - - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = DMA_32BIT_MASK; - - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; -again: - page = dma_alloc_pages(dev, gfp, order); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - ret = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)ret, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - - } - memset(ret, 0, size); - if (!mmu) { - *dma_handle = bus; - return ret; - } - } - - if (dma_ops->alloc_coherent) { - free_pages((unsigned long)ret, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(ret), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return ret; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)ret, get_order(size)); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - int order = get_order(size); - - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) - return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, dma_handle, size, 0); - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c deleted file mode 100644 index 596c8c88f36d..000000000000 --- a/arch/x86/kernel/pci-dma_64.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Dynamic DMA mapping support. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ -struct device fallback_dev = { - .bus_id = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, -}; - -/* Allocate DMA memory on node near device */ -noinline static void * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) -#define dma_release_coherent(dev, order, vaddr) (0) -/* - * Allocate memory for a coherent mapping. - */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) -{ - void *memory; - struct page *page; - unsigned long dma_mask = 0; - u64 bus; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) - dev = &fallback_dev; - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; - - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK) - gfp |= GFP_DMA32; - - again: - page = dma_alloc_pages(dev, gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (dma_ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); - free_pages((unsigned long)memory, get_order(size)); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) - return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 34d0559178393547505ec9492321255405f4e441 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 16 Apr 2008 11:45:15 -0500 Subject: x86: UV startup of slave cpus This patch changes smpboot.c so that it can start slave cpus running in UV non-unique apicid mode. The SIPI must be sent using a UV-specific mechanism. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 17 +++++++++++------ arch/x86/kernel/smpboot.c | 29 ++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 5d77c9cd8e15..ebf13908a743 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | - (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); + APIC_DM_INIT; + uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + mdelay(10); + + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + APIC_DM_STARTUP; uv_write_global_mmr64(nasid, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid; + unsigned long val, apicid, lapicid; int nasid; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ nasid = uv_apicid_to_nasid(apicid); val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << + (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(nasid, UVH_IPI_INT, val); - printk(KERN_DEBUG - "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n", - cpu, apicid, vector, nasid, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e6abe8a49b1f..6a925394bc7e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) { + send_status = uv_wakeup_secondary(phys_apicid, start_eip); + atomic_set(&init_deasserted, 1); + return send_status; + } + /* * Be paranoid about clearing APIC errors. */ @@ -918,16 +925,19 @@ do_rest: atomic_set(&init_deasserted, 0); - Dprintk("Setting warm reset code and vector.\n"); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - store_NMI_vector(&nmi_high, &nmi_low); + Dprintk("Setting warm reset code and vector.\n"); - smpboot_setup_warm_reset_vector(start_ip); - /* - * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_ip); + /* + * Be paranoid about clearing APIC errors. + */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } /* * Starting actual IPI sequence... @@ -966,7 +976,8 @@ do_rest: else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - inquire_remote_apic(apicid); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) + inquire_remote_apic(apicid); } } -- cgit v1.2.3 From 6b6309b4c7f6da467c5d5b7d18fa8cb79730f381 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 25 Mar 2008 15:06:56 -0700 Subject: x86: reduce memory and stack usage in intel_cacheinfo * Change the following static arrays sized by NR_CPUS to per_cpu data variables: _cpuid4_info *cpuid4_info[NR_CPUS]; _index_kobject *index_kobject[NR_CPUS]; kobject * cache_kobject[NR_CPUS]; * Remove the local NR_CPUS array with a kmalloc'd region in show_shared_cpu_map(). Also some minor complaints from checkpatch.pl fixed. Cc: H. Peter Anvin Cc: Andi Kleen Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 70 ++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 30 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1b889860eb73..2e8b323b34e4 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -129,7 +129,7 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; - cpumask_t shared_cpu_map; + cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; unsigned short num_cache_leaves; @@ -451,8 +451,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } /* pointer to _cpuid4_info array (for each cache leaf) */ -static struct _cpuid4_info *cpuid4_info[NR_CPUS]; -#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) +static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -474,7 +474,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { cpu_set(i, this_leaf->shared_cpu_map); - if (i != cpu && cpuid4_info[i]) { + if (i != cpu && per_cpu(cpuid4_info, i)) { sibling_leaf = CPUID4_INFO_IDX(i, index); cpu_set(cpu, sibling_leaf->shared_cpu_map); } @@ -505,8 +505,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) for (i = 0; i < num_cache_leaves; i++) cache_remove_shared_cpu_map(cpu, i); - kfree(cpuid4_info[cpu]); - cpuid4_info[cpu] = NULL; + kfree(per_cpu(cpuid4_info, cpu)); + per_cpu(cpuid4_info, cpu) = NULL; } static int __cpuinit detect_cache_attributes(unsigned int cpu) @@ -519,9 +519,9 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) if (num_cache_leaves == 0) return -ENOENT; - cpuid4_info[cpu] = kzalloc( + per_cpu(cpuid4_info, cpu) = kzalloc( sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (cpuid4_info[cpu] == NULL) + if (per_cpu(cpuid4_info, cpu) == NULL) return -ENOMEM; oldmask = current->cpus_allowed; @@ -546,8 +546,8 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) out: if (retval) { - kfree(cpuid4_info[cpu]); - cpuid4_info[cpu] = NULL; + kfree(per_cpu(cpuid4_info, cpu)); + per_cpu(cpuid4_info, cpu) = NULL; } return retval; @@ -561,7 +561,7 @@ out: extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ /* pointer to kobject for cpuX/cache */ -static struct kobject * cache_kobject[NR_CPUS]; +static DEFINE_PER_CPU(struct kobject *, cache_kobject); struct _index_kobject { struct kobject kobj; @@ -570,8 +570,8 @@ struct _index_kobject { }; /* pointer to array of kobjects for cpuX/cache/indexY */ -static struct _index_kobject *index_kobject[NR_CPUS]; -#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) +static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -593,9 +593,16 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) { - char mask_str[NR_CPUS]; - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); - return sprintf(buf, "%s\n", mask_str); + int n = 0; + int len = cpumask_scnprintf_len(nr_cpu_ids); + char *mask_str = kmalloc(len, GFP_KERNEL); + + if (mask_str) { + cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map); + n = sprintf(buf, "%s\n", mask_str); + kfree(mask_str); + } + return n; } static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { @@ -684,10 +691,10 @@ static struct kobj_type ktype_percpu_entry = { static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { - kfree(cache_kobject[cpu]); - kfree(index_kobject[cpu]); - cache_kobject[cpu] = NULL; - index_kobject[cpu] = NULL; + kfree(per_cpu(cache_kobject, cpu)); + kfree(per_cpu(index_kobject, cpu)); + per_cpu(cache_kobject, cpu) = NULL; + per_cpu(index_kobject, cpu) = NULL; free_cache_attributes(cpu); } @@ -703,13 +710,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) return err; /* Allocate all required memory */ - cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(cache_kobject[cpu] == NULL)) + per_cpu(cache_kobject, cpu) = + kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) goto err_out; - index_kobject[cpu] = kzalloc( + per_cpu(index_kobject, cpu) = kzalloc( sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); - if (unlikely(index_kobject[cpu] == NULL)) + if (unlikely(per_cpu(index_kobject, cpu) == NULL)) goto err_out; return 0; @@ -733,7 +741,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (unlikely(retval < 0)) return retval; - retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry, + retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), + &ktype_percpu_entry, &sys_dev->kobj, "%s", "cache"); if (retval < 0) { cpuid4_cache_sysfs_exit(cpu); @@ -745,13 +754,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object->cpu = cpu; this_object->index = i; retval = kobject_init_and_add(&(this_object->kobj), - &ktype_cache, cache_kobject[cpu], + &ktype_cache, + per_cpu(cache_kobject, cpu), "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) { kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); } - kobject_put(cache_kobject[cpu]); + kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); break; } @@ -760,7 +770,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (!retval) cpu_set(cpu, cache_dev_map); - kobject_uevent(cache_kobject[cpu], KOBJ_ADD); + kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return retval; } @@ -769,7 +779,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - if (cpuid4_info[cpu] == NULL) + if (per_cpu(cpuid4_info, cpu) == NULL) return; if (!cpu_isset(cpu, cache_dev_map)) return; @@ -777,7 +787,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - kobject_put(cache_kobject[cpu]); + kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); } -- cgit v1.2.3 From d366f8cbc16882e93538d9a52423c2f50dad7c06 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:12 -0700 Subject: cpumask: Cleanup more uses of CPU_MASK and NODE_MASK * Replace usages of CPU_MASK_NONE, CPU_MASK_ALL, NODE_MASK_NONE, NODE_MASK_ALL to reduce stack requirements for large NR_CPUS and MAXNODES counts. * In some cases, the cpumask variable was initialized but then overwritten with another value. This is the case for changes like this: - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b54464b26658..9ba11d07920f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -785,7 +785,7 @@ static void __clear_irq_vector(int irq) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cfg->domain = CPU_MASK_NONE; + cpus_clear(cfg->domain); } void __setup_vector_irq(int cpu) -- cgit v1.2.3 From fc0e474840d1fd96f28fbd76d4f36b80e7ad1cc3 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:05 -0700 Subject: x86: use new set_cpus_allowed_ptr function * Use new set_cpus_allowed_ptr() function added by previous patch, which instead of passing the "newly allowed cpus" cpumask_t arg by value, pass it by pointer: -int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) +int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) * Cleanup uses of CPU_MASK_ALL. * Collapse other NR_CPUS changes to arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c Use pointers to cpumask_t arguments whenever possible. Depends on: [sched-devel]: sched: add new set_cpus_allowed_ptr function Cc: Len Brown Cc: Dave Jones Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/cstate.c | 4 +-- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 28 ++++++++++----------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 32 +++++++++++++----------- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 13 +++++----- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 20 +++++++-------- arch/x86/kernel/cpu/intel_cacheinfo.c | 4 +-- arch/x86/kernel/microcode.c | 16 ++++++------ arch/x86/kernel/reboot.c | 2 +- 8 files changed, 61 insertions(+), 58 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8ca3557a6d59..c6dc05af8827 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -93,7 +93,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) return -1; @@ -130,7 +130,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, cx->address); out: - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index a962dcb9c408..e2d870de837c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -192,9 +192,9 @@ static void drv_read(struct drv_cmd *cmd) cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed(current, cmd->mask); + set_cpus_allowed_ptr(current, &cmd->mask); do_drv_read(cmd); - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); } static void drv_write(struct drv_cmd *cmd) @@ -203,30 +203,30 @@ static void drv_write(struct drv_cmd *cmd) unsigned int i; for_each_cpu_mask(i, cmd->mask) { - set_cpus_allowed(current, cpumask_of_cpu(i)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return; } -static u32 get_cur_val(cpumask_t mask) +static u32 get_cur_val(const cpumask_t *mask) { struct acpi_processor_performance *perf; struct drv_cmd cmd; - if (unlikely(cpus_empty(mask))) + if (unlikely(cpus_empty(*mask))) return 0; - switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) { + switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(drv_data, first_cpu(mask))->acpi_data; + perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -234,7 +234,7 @@ static u32 get_cur_val(cpumask_t mask) return 0; } - cmd.mask = mask; + cmd.mask = *mask; drv_read(&cmd); @@ -271,7 +271,7 @@ static unsigned int get_measured_perf(unsigned int cpu) unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -329,7 +329,7 @@ static unsigned int get_measured_perf(unsigned int cpu) retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; put_cpu(); - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); return retval; @@ -347,13 +347,13 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return 0; } - freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); + freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); dprintk("cur freq = %u\n", freq); return freq; } -static unsigned int check_freqs(cpumask_t mask, unsigned int freq, +static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; @@ -449,7 +449,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, drv_write(&cmd); if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { + if (!check_freqs(&cmd.mask, freqs.new, data)) { dprintk("acpi_cpufreq_target failed (%d)\n", policy->cpu); return -EAGAIN; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c99d59d8ef2e..46d4034d9f37 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -478,12 +478,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -528,7 +528,7 @@ static int check_supported_cpu(unsigned int cpu) rc = 1; out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); return rc; } @@ -1015,7 +1015,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i /* Driver entry point to switch to the target frequency */ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1030,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1085,7 +1085,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi ret = 0; err_out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); return ret; } @@ -1104,7 +1104,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; int rc; if (!cpu_online(pol->cpu)) @@ -1145,7 +1145,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1164,7 +1164,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) fidvid_msr_init(); /* run on any CPU again */ - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); @@ -1205,7 +1205,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; err_out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); powernow_k8_cpu_exit_acpi(data); kfree(data); @@ -1242,10 +1242,11 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed(current, oldmask); + printk(KERN_ERR PFX + "limiting to CPU %d failed in powernowk8_get\n", cpu); + set_cpus_allowed_ptr(current, &oldmask); return 0; } @@ -1253,13 +1254,14 @@ static unsigned int powernowk8_get (unsigned int cpu) goto out; if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + khz = find_khz_freq_from_pstate(data->powernow_table, + data->currpstate); else khz = find_khz_freq_from_fid(data->currfid); out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); return khz; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 3031f1196192..908dd347c67e 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -315,7 +315,7 @@ static unsigned int get_cur_freq(unsigned int cpu) cpumask_t saved_mask; saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) return 0; @@ -333,7 +333,7 @@ static unsigned int get_cur_freq(unsigned int cpu) clock_freq = extract_clock(l, cpu, 1); } - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return clock_freq; } @@ -487,7 +487,7 @@ static int centrino_target (struct cpufreq_policy *policy, else cpu_set(j, set_mask); - set_cpus_allowed(current, set_mask); + set_cpus_allowed_ptr(current, &set_mask); preempt_disable(); if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { dprintk("couldn't limit to CPUs in this domain\n"); @@ -555,7 +555,8 @@ static int centrino_target (struct cpufreq_policy *policy, if (!cpus_empty(covered_cpus)) { for_each_cpu_mask(j, covered_cpus) { - set_cpus_allowed(current, cpumask_of_cpu(j)); + set_cpus_allowed_ptr(current, + &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } } @@ -569,12 +570,12 @@ static int centrino_target (struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } } - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return 0; migrate_end: preempt_enable(); - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 14d68aa301ee..1b50244b1fdf 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -229,22 +229,22 @@ static unsigned int speedstep_detect_chipset (void) return 0; } -static unsigned int _speedstep_get(cpumask_t cpus) +static unsigned int _speedstep_get(const cpumask_t *cpus) { unsigned int speed; cpumask_t cpus_allowed; cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpus); + set_cpus_allowed_ptr(current, cpus); speed = speedstep_get_processor_frequency(speedstep_processor); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); dprintk("detected %u kHz as current frequency\n", speed); return speed; } static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(cpumask_of_cpu(cpu)); + return _speedstep_get(&cpumask_of_cpu(cpu)); } /** @@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy, if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) return -EINVAL; - freqs.old = _speedstep_get(policy->cpus); + freqs.old = _speedstep_get(&policy->cpus); freqs.new = speedstep_freqs[newstate].frequency; freqs.cpu = policy->cpu; @@ -285,12 +285,12 @@ static int speedstep_target (struct cpufreq_policy *policy, } /* switch to physical CPU where state is to be changed */ - set_cpus_allowed(current, policy->cpus); + set_cpus_allowed_ptr(current, &policy->cpus); speedstep_set_state(newstate); /* allow to be run on all CPUs */ - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); for_each_cpu_mask(i, policy->cpus) { freqs.cpu = i; @@ -326,7 +326,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) #endif cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, policy->cpus); + set_cpus_allowed_ptr(current, &policy->cpus); /* detect low and high frequency and transition latency */ result = speedstep_get_freqs(speedstep_processor, @@ -334,12 +334,12 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) &speedstep_freqs[SPEEDSTEP_HIGH].frequency, &policy->cpuinfo.transition_latency, &speedstep_set_state); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); if (result) return result; /* get current speed setting */ - speed = _speedstep_get(policy->cpus); + speed = _speedstep_get(&policy->cpus); if (!speed) return -EIO; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2e8b323b34e4..e073a93ceb42 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -525,7 +525,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) goto out; @@ -542,7 +542,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) } cache_shared_cpu_map_setup(cpu, j); } - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); out: if (retval) { diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 25cf6dee4e56..69729e38b78a 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -402,7 +402,7 @@ static int do_microcode_update (void) if (!uci->valid) continue; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -416,7 +416,7 @@ out: vfree(new_mc); if (cursor < 0) error = cursor; - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); return error; } @@ -579,7 +579,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -610,7 +610,7 @@ static int apply_microcode_check_cpu(int cpu) " sig=0x%x, pf=0x%x, rev=0x%x\n", cpu, uci->sig, uci->pf, uci->rev); - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); return err; } @@ -621,13 +621,13 @@ static void microcode_init_cpu(int cpu, int resume) old = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); } static void microcode_fini_cpu(int cpu) @@ -657,14 +657,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) err = cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); put_online_cpus(); - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); } if (err) return err; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9692202d3bfb..19c9386ac118 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -420,7 +420,7 @@ static void native_machine_shutdown(void) reboot_cpu_id = smp_processor_id(); /* Make certain I only run on the appropriate processor */ - set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. -- cgit v1.2.3 From b53e921ba1cff8453dc9a87a84052fa12d5b30bd Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:08 -0700 Subject: generic: reduce stack pressure in sched_affinity * Modify sched_affinity functions to pass cpumask_t variables by reference instead of by value. * Use new set_cpus_allowed_ptr function. Depends on: [sched-devel]: sched: add new set_cpus_allowed_ptr function Cc: Paul Jackson Cc: Cliff Wickman Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 46 ++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 32671da8184e..7c9a813e1193 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -251,18 +251,18 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; -static cpumask_t affinity_set(unsigned int cpu) +static void affinity_set(unsigned int cpu, cpumask_t *oldmask, + cpumask_t *newmask) { - cpumask_t oldmask = current->cpus_allowed; - cpumask_t newmask = CPU_MASK_NONE; - cpu_set(cpu, newmask); - set_cpus_allowed(current, newmask); - return oldmask; + *oldmask = current->cpus_allowed; + cpus_clear(*newmask); + cpu_set(cpu, *newmask); + set_cpus_allowed_ptr(current, newmask); } -static void affinity_restore(cpumask_t oldmask) +static void affinity_restore(const cpumask_t *oldmask) { - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, oldmask); } #define SHOW_FIELDS(name) \ @@ -277,15 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask; + cpumask_t oldmask, newmask; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - oldmask = affinity_set(b->cpu); + affinity_set(b->cpu, &oldmask, &newmask); threshold_restart_bank(b, 0, 0); - affinity_restore(oldmask); + affinity_restore(&oldmask); return end - buf; } @@ -294,7 +294,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask; + cpumask_t oldmask, newmask; u16 old; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) @@ -306,9 +306,9 @@ static ssize_t store_threshold_limit(struct threshold_block *b, old = b->threshold_limit; b->threshold_limit = new; - oldmask = affinity_set(b->cpu); + affinity_set(b->cpu, &oldmask, &newmask); threshold_restart_bank(b, 0, old); - affinity_restore(oldmask); + affinity_restore(&oldmask); return end - buf; } @@ -316,10 +316,10 @@ static ssize_t store_threshold_limit(struct threshold_block *b, static ssize_t show_error_count(struct threshold_block *b, char *buf) { u32 high, low; - cpumask_t oldmask; - oldmask = affinity_set(b->cpu); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); rdmsr(b->address, low, high); - affinity_restore(oldmask); + affinity_restore(&oldmask); return sprintf(buf, "%x\n", (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); } @@ -327,10 +327,10 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf) static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - cpumask_t oldmask; - oldmask = affinity_set(b->cpu); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); threshold_restart_bank(b, 1, 0); - affinity_restore(oldmask); + affinity_restore(&oldmask); return 1; } @@ -468,7 +468,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; - cpumask_t oldmask = CPU_MASK_NONE; + cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -519,10 +519,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - oldmask = affinity_set(cpu); + affinity_set(cpu, &oldmask, &newmask); err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MC0_MISC + bank * 4); - affinity_restore(oldmask); + affinity_restore(&oldmask); if (err) goto out_free; -- cgit v1.2.3 From 9f0e8d0400d925c3acd5f4e01dbeb736e4011882 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:01 -0700 Subject: x86: convert cpumask_of_cpu macro to allocated array * Here is a simple patch to use an allocated array of cpumasks to represent cpumask_of_cpu() instead of constructing one on the stack. It's based on the Kconfig option "HAVE_CPUMASK_OF_CPU_MAP" which is currently only set for x86_64 SMP. Otherwise the the existing cpumask_of_cpu() is used but has been changed to produce an lvalue so a pointer to it can be used. Cc: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ed157c90412e..0d1f44ae6eea 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -54,6 +54,24 @@ static void __init setup_per_cpu_maps(void) #endif } +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP +cpumask_t *cpumask_of_cpu_map __read_mostly; +EXPORT_SYMBOL(cpumask_of_cpu_map); + +/* requires nr_cpu_ids to be initialized */ +static void __init setup_cpumask_of_cpu(void) +{ + int i; + + /* alloc_bootmem zeroes memory */ + cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); + for (i = 0; i < nr_cpu_ids; i++) + cpu_set(i, cpumask_of_cpu_map[i]); +} +#else +static inline void setup_cpumask_of_cpu(void) { } +#endif + #ifdef CONFIG_X86_32 /* * Great future not-so-futuristic plan: make i386 and x86_64 do it @@ -70,7 +88,7 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - int i; + int i, highest_cpu = 0; unsigned long size; #ifdef CONFIG_HOTPLUG_CPU @@ -104,10 +122,18 @@ void __init setup_per_cpu_areas(void) __per_cpu_offset[i] = ptr - __per_cpu_start; #endif memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + highest_cpu = i; } + nr_cpu_ids = highest_cpu + 1; + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); + /* Setup percpu data maps */ setup_per_cpu_maps(); + + /* Setup cpumask_of_cpu map */ + setup_cpumask_of_cpu(); } #endif -- cgit v1.2.3 From fb0f330e62d71f7c535251438068199af320cf73 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 8 Apr 2008 11:43:02 -0700 Subject: x86: modify show_shared_cpu_map in intel_cacheinfo * Removed kmalloc (or local array) in show_shared_cpu_map(). * Added show_shared_cpu_list() function. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index e073a93ceb42..26d615dcb149 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -591,20 +591,34 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) return sprintf (buf, "%luK\n", this_leaf->size / 1024); } -static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) +static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, + int type, char *buf) { + ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; int n = 0; - int len = cpumask_scnprintf_len(nr_cpu_ids); - char *mask_str = kmalloc(len, GFP_KERNEL); - if (mask_str) { - cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map); - n = sprintf(buf, "%s\n", mask_str); - kfree(mask_str); + if (len > 1) { + cpumask_t *mask = &this_leaf->shared_cpu_map; + + n = type? + cpulist_scnprintf(buf, len-2, *mask): + cpumask_scnprintf(buf, len-2, *mask); + buf[n++] = '\n'; + buf[n] = '\0'; } return n; } +static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) +{ + return show_shared_cpu_map_func(leaf, 0, buf); +} + +static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) +{ + return show_shared_cpu_map_func(leaf, 1, buf); +} + static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { switch(this_leaf->eax.split.type) { case CACHE_TYPE_DATA: @@ -640,6 +654,7 @@ define_one_ro(ways_of_associativity); define_one_ro(number_of_sets); define_one_ro(size); define_one_ro(shared_cpu_map); +define_one_ro(shared_cpu_list); static struct attribute * default_attrs[] = { &type.attr, @@ -650,6 +665,7 @@ static struct attribute * default_attrs[] = { &number_of_sets.attr, &size.attr, &shared_cpu_map.attr, + &shared_cpu_list.attr, NULL }; -- cgit v1.2.3 From 138fe4e069798d9aa948a5402ff15e58f483ee4e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Date: Wed, 9 Apr 2008 19:50:41 -0700 Subject: Firmware: add iSCSI iBFT Support Add /sysfs/firmware/ibft/[initiator|targetX|ethernetX] directories along with text properties which export the the iSCSI Boot Firmware Table (iBFT) structure. What is iSCSI Boot Firmware Table? It is a mechanism for the iSCSI tools to extract from the machine NICs the iSCSI connection information so that they can automagically mount the iSCSI share/target. Currently the iSCSI information is hard-coded in the initrd. The /sysfs entries are read-only one-name-and-value fields. The usual set of data exposed is: # for a in `find /sys/firmware/ibft/ -type f -print`; do echo -n "$a: "; cat $a; done /sys/firmware/ibft/target0/target-name: iqn.2007.com.intel-sbx44:storage-10gb /sys/firmware/ibft/target0/nic-assoc: 0 /sys/firmware/ibft/target0/chap-type: 0 /sys/firmware/ibft/target0/lun: 00000000 /sys/firmware/ibft/target0/port: 3260 /sys/firmware/ibft/target0/ip-addr: 192.168.79.116 /sys/firmware/ibft/target0/flags: 3 /sys/firmware/ibft/target0/index: 0 /sys/firmware/ibft/ethernet0/mac: 00:11:25:9d:8b:01 /sys/firmware/ibft/ethernet0/vlan: 0 /sys/firmware/ibft/ethernet0/gateway: 192.168.79.254 /sys/firmware/ibft/ethernet0/origin: 0 /sys/firmware/ibft/ethernet0/subnet-mask: 255.255.252.0 /sys/firmware/ibft/ethernet0/ip-addr: 192.168.77.41 /sys/firmware/ibft/ethernet0/flags: 7 /sys/firmware/ibft/ethernet0/index: 0 /sys/firmware/ibft/initiator/initiator-name: iqn.2007-07.com:konrad.initiator /sys/firmware/ibft/initiator/flags: 3 /sys/firmware/ibft/initiator/index: 0 For full details of the IBFT structure please take a look at: ftp://ftp.software.ibm.com/systems/support/system_x_pdf/ibm_iscsi_boot_firmware_table_v1.02.pdf [akpm@linux-foundation.org: fix build] Signed-off-by: Konrad Rzeszutek Cc: Mike Christie Cc: Peter Jones Cc: James Bottomley Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup_32.c | 3 +++ arch/x86/kernel/setup_64.c | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5b0bffb7fcc9..4ef91749959e 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -689,6 +690,8 @@ void __init setup_bootmem_allocator(void) #endif numa_kva_reserve(); reserve_crashkernel(); + + reserve_ibft_region(); } /* diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 674ef3510cdf..216c93bd9993 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -434,6 +435,9 @@ void __init setup_arch(char **cmdline_p) } #endif reserve_crashkernel(); + + reserve_ibft_region(); + paging_init(); map_vsyscall(); -- cgit v1.2.3 From b844eba292b477cda14582bfc6f535deed57a82d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 23 Mar 2008 20:28:24 +0100 Subject: PM: Remove destroy_suspended_device() After 2.6.24 there was a plan to make the PM core acquire all device semaphores during a suspend/hibernation to protect itself from concurrent operations involving device objects. That proved to be too heavy-handed and we found a better way to achieve the goal, but before it happened, we had introduced the functions device_pm_schedule_removal() and destroy_suspended_device() to allow drivers to "safely" destroy a suspended device and we had adapted some drivers to use them. Now that these functions are no longer necessary, it seems reasonable to remove them and modify their users to use the normal device unregistration instead. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpuid.c | 4 +--- arch/x86/kernel/msr.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 288e7a6598ac..daff52a62248 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -154,12 +154,10 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, err = cpuid_device_create(cpu); break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: cpuid_device_destroy(cpu); break; - case CPU_UP_CANCELED_FROZEN: - destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); - break; } return err ? NOTIFY_BAD : NOTIFY_OK; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4dfb40530057..1f3abe048e93 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -162,12 +162,10 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, err = msr_device_create(cpu); break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: msr_device_destroy(cpu); break; - case CPU_UP_CANCELED_FROZEN: - destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu)); - break; } return err ? NOTIFY_BAD : NOTIFY_OK; } -- cgit v1.2.3 From a2b5d877840f29b5fbb5f53b63dfcbf8bc0aea47 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Feb 2008 09:32:03 -0800 Subject: PCI: remove pci_get_device_reverse from calgary driver This isn't needed, we can just walk the devices in bus order with no problems at all, as we really want to remove pci_get_device_reverse from the kernel tree. Acked-by: Muli Ben-Yehuda Cc: Jon Mason Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/pci-calgary_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1b5464c2434f..67a3feb90edb 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1232,8 +1232,7 @@ static int __init calgary_init(void) error: do { - dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM, - PCI_ANY_ID, dev); + dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); if (!dev) break; if (!is_cal_pci_dev(dev->device)) -- cgit v1.2.3 From 4d33bdb7688de7a61859dafc783eb9b6bca279fc Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 21 Apr 2008 13:31:55 +0400 Subject: x86: Drop duplicate from setup.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0d1f44ae6eea..c0c68c18a788 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -18,8 +18,6 @@ unsigned disabled_cpus __cpuinitdata; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); -physid_mask_t phys_cpu_present_map; - DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); -- cgit v1.2.3 From a4928cffe6435caf427ae673131a633c1329dbf3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Apr 2008 13:20:56 +0200 Subject: "make namespacecheck" fixes Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 4 ++-- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/setup_32.c | 4 ++-- arch/x86/kernel/smpboot.c | 12 ++++++------ arch/x86/kernel/tlb_64.c | 2 +- arch/x86/kernel/vsyscall_64.c | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 687208190b06..8317401170b8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -902,7 +902,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __cpuinit lapic_setup_esr(void) +static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9e8e5c050c55..bf83157337e4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -429,7 +429,7 @@ void __init setup_boot_APIC_clock(void) * set the DUMMY flag again and force the broadcast mode in the * clockevents layer. */ -void __cpuinit check_boot_apic_timer_broadcast(void) +static void __cpuinit check_boot_apic_timer_broadcast(void) { if (!disable_apic_timer || (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) @@ -834,7 +834,7 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -void __cpuinit lapic_setup_esr(void) +static void __cpuinit lapic_setup_esr(void) { unsigned maxlvt = lapic_get_maxlvt(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7adad088e373..77de848bd1fb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -550,7 +550,7 @@ static void hard_enable_TSC(void) write_cr4(read_cr4() & ~X86_CR4_TSD); } -void enable_TSC(void) +static void enable_TSC(void) { preempt_disable(); if (test_and_clear_thread_flag(TIF_NOTSC)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 891af1a1b48a..131c2ee7ac56 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -562,7 +562,7 @@ static void hard_enable_TSC(void) write_cr4(read_cr4() & ~X86_CR4_TSD); } -void enable_TSC(void) +static void enable_TSC(void) { preempt_disable(); if (test_and_clear_thread_flag(TIF_NOTSC)) diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 78828b0f604f..455d3c80960b 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -442,7 +442,7 @@ static void __init reserve_ebda_region(void) } #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init setup_bootmem_allocator(void); +static void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) { /* @@ -477,7 +477,7 @@ static unsigned long __init setup_memory(void) return max_low_pfn; } -void __init zone_sizes_init(void) +static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6a925394bc7e..ade371f9663a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -184,7 +184,7 @@ static void unmap_cpu_to_node(int cpu) u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; -void map_cpu_to_logical_apicid(void) +static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); @@ -197,7 +197,7 @@ void map_cpu_to_logical_apicid(void) map_cpu_to_node(cpu, node); } -void unmap_cpu_to_logical_apicid(int cpu) +static void unmap_cpu_to_logical_apicid(int cpu) { cpu_2_logical_apicid[cpu] = BAD_APICID; unmap_cpu_to_node(cpu); @@ -211,7 +211,7 @@ void unmap_cpu_to_logical_apicid(int cpu) * Report back to the Boot Processor. * Running on AP. */ -void __cpuinit smp_callin(void) +static void __cpuinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; @@ -436,7 +436,7 @@ valid_k7: #endif } -void __cpuinit smp_checks(void) +static void __cpuinit smp_checks(void) { if (smp_b_stepping) printk(KERN_WARNING "WARNING: SMP operation may be unreliable" @@ -565,7 +565,7 @@ void __init smp_alloc_memory(void) } #endif -void impress_friends(void) +static void impress_friends(void) { int cpu; unsigned long bogosum = 0; @@ -1287,7 +1287,7 @@ void cpu_exit_clear(void) } # endif /* CONFIG_X86_32 */ -void remove_siblinginfo(int cpu) +static void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 1558e513757e..df224a8774cb 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } -int __cpuinit init_smp_flush(void) +static int __cpuinit init_smp_flush(void) { int i; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index edff4c985485..61efa2f7d564 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) return 0; } -long __vsyscall(3) venosys_1(void) +static long __vsyscall(3) venosys_1(void) { return -ENOSYS; } -- cgit v1.2.3 From 6944a9c8945212a0cc1de3589736d59ec542c539 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:01 -0700 Subject: x86: rename paravirt_alloc_pt etc after the pagetable structure Rename (alloc|release)_(pt|pd) to pte/pmd to explicitly match the name of the appropriate pagetable level structure. [ x86.git merge work by Mark McLoughlin ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Mark McLoughlin Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 10 +++++----- arch/x86/kernel/vmi_32.c | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 3733412d1357..362653da003f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -366,11 +366,11 @@ struct pv_mmu_ops pv_mmu_ops = { .flush_tlb_single = native_flush_tlb_single, .flush_tlb_others = native_flush_tlb_others, - .alloc_pt = paravirt_nop, - .alloc_pd = paravirt_nop, - .alloc_pd_clone = paravirt_nop, - .release_pt = paravirt_nop, - .release_pd = paravirt_nop, + .alloc_pte = paravirt_nop, + .alloc_pmd = paravirt_nop, + .alloc_pmd_clone = paravirt_nop, + .release_pte = paravirt_nop, + .release_pmd = paravirt_nop, .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 12affe1f9bce..44f7ca153b71 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); } -static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) { /* * This call comes in very early, before mem_map is setup. @@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); } -static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) +static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) { vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); } -static void vmi_release_pt(u32 pfn) +static void vmi_release_pte(u32 pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); } -static void vmi_release_pd(u32 pfn) +static void vmi_release_pmd(u32 pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); @@ -871,15 +871,15 @@ static inline int __init activate_vmi(void) vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); if (vmi_ops.allocate_page) { - pv_mmu_ops.alloc_pt = vmi_allocate_pt; - pv_mmu_ops.alloc_pd = vmi_allocate_pd; - pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; + pv_mmu_ops.alloc_pte = vmi_allocate_pte; + pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; + pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; } vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); if (vmi_ops.release_page) { - pv_mmu_ops.release_pt = vmi_release_pt; - pv_mmu_ops.release_pd = vmi_release_pd; + pv_mmu_ops.release_pte = vmi_release_pte; + pv_mmu_ops.release_pmd = vmi_release_pmd; } /* Set linear is needed in all cases */ -- cgit v1.2.3 From 2761fa0920756dc471d297843646a4a9bca6656f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:02 -0700 Subject: x86: add pud_alloc for 4-level pagetables Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 362653da003f..74f0c5ea2a03 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -369,8 +369,10 @@ struct pv_mmu_ops pv_mmu_ops = { .alloc_pte = paravirt_nop, .alloc_pmd = paravirt_nop, .alloc_pmd_clone = paravirt_nop, + .alloc_pud = paravirt_nop, .release_pte = paravirt_nop, .release_pmd = paravirt_nop, + .release_pud = paravirt_nop, .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, -- cgit v1.2.3 From 90e9f53662826db3cdd6d99bd394d727b05160c1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:12 -0700 Subject: xen: make sure iret faults are trapped Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0f8934fc303..568c6ccd7ae2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -409,7 +409,7 @@ restore_nocheck_notrace: irq_return: INTERRUPT_RETURN .section .fixup,"ax" -iret_exc: +ENTRY(iret_exc) pushl $0 # no error code pushl $do_iret_error jmp error_code -- cgit v1.2.3 From 68db065c845bd9d0eb96946ab104b4c82d0ae9da Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:13 -0700 Subject: x86: unify KERNEL_PGD_PTRS Make KERNEL_PGD_PTRS common, as previously it was only being defined for 32-bit. There are a couple of follow-on changes from this: - KERNEL_PGD_PTRS was being defined in terms of USER_PGD_PTRS. The definition of USER_PGD_PTRS doesn't really make much sense on x86-64, since it can have two different user address-space configurations. I renamed USER_PGD_PTRS to KERNEL_PGD_BOUNDARY, which is meaningful for all of 32/32, 32/64 and 64/64 process configurations. - USER_PTRS_PER_PGD was also defined and was being used for similar purposes. Converting its users to KERNEL_PGD_BOUNDARY left it completely unused, and so I removed it. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Zach Amsden Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/vmi_32.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 19c9386ac118..1791a751a772 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -15,7 +16,6 @@ # include # include # include -# include #else # include #endif @@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length) /* Remap the kernel at virtual address zero, as well as offset zero from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6a925394bc7e..2de2f7a2ed5d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) #ifdef CONFIG_X86_32 /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); #endif diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 44f7ca153b71..956f38927aa7 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page) * pdes need to be zeroed. */ if (type & VMI_PAGE_CLONE) - limit = USER_PTRS_PER_PGD; + limit = KERNEL_PGD_BOUNDARY; for (i = 0; i < limit; i++) BUG_ON(ptr[i]); } -- cgit v1.2.3 From e2a81baf6604a2e08e10c7405b0349106f77c8af Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:17 -0700 Subject: xen: support sysenter/sysexit if hypervisor does 64-bit Xen supports sysenter for 32-bit guests, so support its use. (sysenter is faster than int $0x80 in 32-on-64.) sysexit is still not supported, so we fake it up using iret. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/entry_32.S | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 568c6ccd7ae2..5d80d53eaff8 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper) ENDPROC(kernel_thread_helper) #ifdef CONFIG_XEN +/* Xen doesn't set %esp to be precisely what the normal sysenter + entrypoint expects, so fix it up before using the normal path. */ +ENTRY(xen_sysenter_target) + RING0_INT_FRAME + addl $5*4, %esp /* remove xen-provided frame */ + jmp sysenter_past_esp + ENTRY(xen_hypervisor_callback) CFI_STARTPROC pushl $0 @@ -1036,8 +1043,17 @@ ENTRY(xen_hypervisor_callback) jae 1f call xen_iret_crit_fixup + jmp 2f + +1: cmpl $xen_sysexit_start_crit,%eax + jb 2f + cmpl $xen_sysexit_end_crit,%eax + jae 2f + + jmp xen_sysexit_crit_fixup -1: mov %esp, %eax +ENTRY(xen_do_upcall) +2: mov %esp, %eax call xen_evtchn_do_upcall jmp ret_from_intr CFI_ENDPROC -- cgit v1.2.3 From 0f2c87695219b1129ccf93e0f58acdcdd49724b9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:22 -0700 Subject: xen: jump to iret fixup Use jmp rather than call for the iret fixup, so its consistent with the sysexit fixup, and it simplifies the stack (which is already complex). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/entry_32.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5d80d53eaff8..209c334bb920 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1042,8 +1042,7 @@ ENTRY(xen_hypervisor_callback) cmpl $xen_iret_end_crit,%eax jae 1f - call xen_iret_crit_fixup - jmp 2f + jmp xen_iret_crit_fixup 1: cmpl $xen_sysexit_start_crit,%eax jb 2f -- cgit v1.2.3 From b77797fb2bf31bf076e6b69736119bc6a077525b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 Apr 2008 10:54:11 -0700 Subject: xen: fold xen_sysexit into xen_iret xen_sysexit and xen_iret were doing essentially the same thing. Rather than having a separate implementation for xen_sysexit, we can just strip the stack back to an iret frame and jump into xen_iret. This removes a lot of code and complexity - specifically, another critical region. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/entry_32.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 209c334bb920..2a609dc3271c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1044,15 +1044,8 @@ ENTRY(xen_hypervisor_callback) jmp xen_iret_crit_fixup -1: cmpl $xen_sysexit_start_crit,%eax - jb 2f - cmpl $xen_sysexit_end_crit,%eax - jae 2f - - jmp xen_sysexit_crit_fixup - ENTRY(xen_do_upcall) -2: mov %esp, %eax +1: mov %esp, %eax call xen_evtchn_do_upcall jmp ret_from_intr CFI_ENDPROC -- cgit v1.2.3 From 8b132ecbcfea8b1b556a832df7290379df79ad79 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 28 Apr 2008 02:51:23 +0200 Subject: x86: fix text_poke() kernel_text_address returns true even for modules which is not wanted in text_poke. Use core_kernel_text instead. This is a regression introduced in e587cadd8f47e202a30712e2906a65a0606d5865 which caused occasionaly crashes after suspend/resume. Signed-off-by: Jiri Slaby CC: Mathieu Desnoyers CC: Andi Kleen CC: pageexec@freemail.hu CC: H. Peter Anvin CC: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index df4099dc1c68..7ab3a9774763 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -515,7 +515,7 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) BUG_ON(len > sizeof(long)); BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) - ((long)addr & ~(sizeof(long) - 1))); - if (kernel_text_address((unsigned long)addr)) { + if (core_kernel_text((unsigned long)addr)) { struct page *pages[2] = { virt_to_page(addr), virt_to_page(addr + PAGE_SIZE) }; if (!pages[1]) -- cgit v1.2.3 From b7b66baa8bc3f8e0cda6576e31e9bde09382565d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 24 Apr 2008 11:03:33 -0400 Subject: x86: clean up text_poke() Clean up the codepath, remove alignment restrictions and do sanity checking of the end result, to make sure we patched the right site. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7ab3a9774763..60299f61843f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -511,31 +511,29 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) unsigned long flags; char *vaddr; int nr_pages = 2; + struct page *pages[2]; + int i; - BUG_ON(len > sizeof(long)); - BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) - - ((long)addr & ~(sizeof(long) - 1))); - if (core_kernel_text((unsigned long)addr)) { - struct page *pages[2] = { virt_to_page(addr), - virt_to_page(addr + PAGE_SIZE) }; - if (!pages[1]) - nr_pages = 1; - vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); - BUG_ON(!vaddr); - local_irq_save(flags); - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); - vunmap(vaddr); + if (!core_kernel_text((unsigned long)addr)) { + pages[0] = vmalloc_to_page(addr); + pages[1] = vmalloc_to_page(addr + PAGE_SIZE); } else { - /* - * modules are in vmalloc'ed memory, always writable. - */ - local_irq_save(flags); - memcpy(addr, opcode, len); - local_irq_restore(flags); + pages[0] = virt_to_page(addr); + pages[1] = virt_to_page(addr + PAGE_SIZE); } + BUG_ON(!pages[0]); + if (!pages[1]) + nr_pages = 1; + vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + BUG_ON(!vaddr); + local_irq_save(flags); + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); + local_irq_restore(flags); + vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ + for (i = 0; i < len; i++) + BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); return addr; } -- cgit v1.2.3 From 00c6b2d5d7b2414bd46c620d6a8c37fa7a716f29 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 25 Apr 2008 17:07:03 +0200 Subject: x86: harden kernel code patching Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 60299f61843f..65c7857a90dd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -519,6 +519,7 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) pages[1] = vmalloc_to_page(addr + PAGE_SIZE); } else { pages[0] = virt_to_page(addr); + WARN_ON(!PageReserved(pages[0])); pages[1] = virt_to_page(addr + PAGE_SIZE); } BUG_ON(!pages[0]); -- cgit v1.2.3 From 2cfed60cc24676d65e01278dbf10d0069de02592 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 25 Apr 2008 04:21:11 -0600 Subject: Update .gitignore files Add some autogenerated files to various .gitignore files Signed-off-by: Matthew Wilcox Signed-off-by: Linus Torvalds --- arch/x86/kernel/acpi/realmode/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 arch/x86/kernel/acpi/realmode/.gitignore (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore new file mode 100644 index 000000000000..58f1f48a58f8 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/.gitignore @@ -0,0 +1,3 @@ +wakeup.bin +wakeup.elf +wakeup.lds -- cgit v1.2.3 From 5065dbafc299507f16731434e95b91dadff03006 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:16:50 +0100 Subject: i386: fix asm constraint in do_IRQ() Two prior changes resulted in the "ecx" clobber being lost. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 6ea67b76a214..00bda7bcda63 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs) : "=a" (arg1), "=d" (arg2), "=b" (bx) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) - : "memory", "cc" + : "memory", "cc", "ecx" ); } else #endif -- cgit v1.2.3 From 86d78f640257344cc90a50da8cd52297ba1c6bdf Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:28:41 +0100 Subject: x86: fix watchdog ops for CoreDuo There apparently was an unnoticed conflict between an earlier patch to this file and mine (d1e084746b0e5806e6345ab31c5b370f8dee2b23), which I noticed only now. I suppose a change like the one below (untested) is needed; I didn't get any response on a confirmation request for this from the submitter of the first patch. The issue is the writing of the 'checkbit' member at the end of setup_intel_arch_watchdog(), which my patch made go to intel_arch_wd_ops rather than wd_ops. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index b943e10ad814..f9ae93adffe5 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = { .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, }; -static struct wd_ops coreduo_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, -}; - static void probe_nmi_watchdog(void) { switch (boot_cpu_data.x86_vendor) { @@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void) /* Work around Core Duo (Yonah) errata AE49 where perfctr1 doesn't have a working enable bit. */ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { - wd_ops = &coreduo_wd_ops; - break; + intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; + intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; } if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { wd_ops = &intel_arch_wd_ops; -- cgit v1.2.3 From 79bf0e0353e0a34dbe0b2ef659a9bdd8056ca524 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:19:25 +0100 Subject: i386: fix signal type for iret exception .. since it uses ILL_BADSTK (which is meaningless in the context of SIGSEGV). Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 471e694d6713..bde6f63e15d5 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) +DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { -- cgit v1.2.3 From 911f6a7ba21795865ab30fc5f88aa198b0daee5f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:22:21 +0100 Subject: x86-64: extend MCE CPU quirk handling At least on my Barcelona, I see MCE log entries after cold boot caused by BIOS not properly clearing the respective registers. Therefore, this patch extends the workaround to families 0x10 and 0x11 (the latter just for completeness, I have nothing to verify this against). At the same time, provide a way to make these entries visible via the 'mce=bootlog' command line option even on these machines. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_64.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 9a699ed03598..e07e8c068ae0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -49,7 +49,7 @@ static int banks; static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; -static int mce_bootlog = 1; +static int mce_bootlog = -1; static atomic_t mce_events; static char trigger[128]; @@ -471,13 +471,15 @@ static void mce_init(void *dummy) static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { - /* disable GART TBL walk error reporting, which trips off - incorrectly with the IOMMU & 3ware & Cerberus. */ - clear_bit(10, &bank[4]); - /* Lots of broken BIOS around that don't clear them - by default and leave crap in there. Don't log. */ - mce_bootlog = 0; + if (c->x86_vendor == X86_VENDOR_AMD) { + if(c->x86 == 15) + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, &bank[4]); + if(c->x86 <= 17 && mce_bootlog < 0) + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; } } -- cgit v1.2.3 From f3b14a32db9a74f2bbda980bc95cd4b1f136d80a Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Sun, 20 Apr 2008 06:54:31 +0400 Subject: x86: remove unused function amd_init_cpu() There are no users for the function amd_init_cpu() defined in arch/x86/kernel/cpu/amd.c. This patch removes this routine. This patch was build-tested using defconfigs for i386 and x86_64, and a few randconfig instances. Runtime tests were performed by booting 32- and 64-bit x86 boxen up to the shell prompt. Signed-off-by: Dmitri Vorobiev Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0173065dc3b7..245866828294 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_size_cache = amd_size_cache, }; -int __init amd_init_cpu(void) -{ - cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; - return 0; -} - cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); -- cgit v1.2.3 From a2b4bd9c95a799ce1002e699187f17ddaa754eb1 Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Sun, 20 Apr 2008 06:54:33 +0400 Subject: x86: array can become static In arch/x86/kernel/setup_64.c, the standard_io_resources array is needlessly defined as global. This patch makes this variable static. This patch was successfully build-tested using the defconfig for x86_64. Runtime test was performed by booting a 64-bit x86 box up to the shell prompt. Signed-off-by: Dmitri Vorobiev Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c2ec3dcb6b99..17bdf2343095 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -116,7 +116,7 @@ extern int root_mountflags; char __initdata command_line[COMMAND_LINE_SIZE]; -struct resource standard_io_resources[] = { +static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, { .name = "pic1", .start = 0x20, .end = 0x21, -- cgit v1.2.3 From f7f17a67c589f031c567d9fdc809dee7c5868c8a Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Mon, 21 Apr 2008 00:47:55 +0400 Subject: x86: remove NexGen support It is claimed that NexGen CPUs were never shipped: http://lkml.org/lkml/2008/4/20/179 Also, the kernel support for these chips has been broken for a long time, the code intended to support NexGen thereby being essentially dead. As an outcome of the discussion that can be found using the URL above, this patch removes the NexGen support altogether. The changes in this patch survived a defconfig build for i386, a couple of successful randconfig builds, as well as a runtime test, which consisted in booting a 32-bit x86 box up to the shell prompt. Signed-off-by: Dmitri Vorobiev Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 1 - arch/x86/kernel/cpu/nexgen.c | 59 -------------------------------------------- 2 files changed, 60 deletions(-) delete mode 100644 arch/x86/kernel/cpu/nexgen.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee7c45235e54..a0c6f8190887 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_32) += nexgen.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c deleted file mode 100644 index 5d5e1c134123..000000000000 --- a/arch/x86/kernel/cpu/nexgen.c +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include -#include - -#include "cpu.h" - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __cpuinit deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx"); - return ret; -} - -static void __cpuinit init_nexgen(struct cpuinfo_x86 *c) -{ - c->x86_cache_size = 256; /* A few had 1 MB... */ -} - -static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c) -{ - /* Detect NexGen with old hypercode */ - if (deep_magic_nexgen_probe()) - strcpy(c->x86_vendor_id, "NexGenDriven"); -} - -static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { - .c_vendor = "Nexgen", - .c_ident = { "NexGenDriven" }, - .c_models = { - { .vendor = X86_VENDOR_NEXGEN, - .family = 5, - .model_names = { [1] = "Nx586" } - }, - }, - .c_init = init_nexgen, - .c_identify = nexgen_identify, -}; - -int __init nexgen_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; - return 0; -} -- cgit v1.2.3 From 8b9c5ff380aa4f10658171ed2b9abc1e0861b770 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 14:26:54 -0700 Subject: x86 signals: lift flags diddling code This lifts the code diddling the TF and DF bits for signal handler setup out of the several places copying the same code into the one place that calls them all. There is no change in what it does. I also separated the recently-added DF bit clearing from the TF diddling. The compiler turns them back into one instruction anyway. The tossing in of DF to the same line of code with no new comments was a bit more arcane than seems wise. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 35 +++++++++++++++-------------------- arch/x86/kernel/signal_64.c | 19 +++++++++++++++---- 2 files changed, 30 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index f1b117930837..8e05e7f7bd40 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->ss = __USER_DS; regs->cs = __USER_CS; - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->ss = __USER_DS; regs->cs = __USER_CS; - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 827179c5b32a..3a76702dc3f1 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -289,10 +289,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, see include/asm-x86_64/uaccess.h for details. */ set_fs(USER_DS); - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -380,6 +376,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret == 0) { + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) -- cgit v1.2.3 From 55928e37b29ba5557a5edc8ab679fe5191bc051d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 14:27:56 -0700 Subject: x86 signals: lift set_fs This lifts the set_fs(USER_DS) call for signal handler setup out of the three places copying the same code into the one place that calls them all. There is no change in what it does. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 3a76702dc3f1..ccb2a4560c2d 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -285,10 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - /* This, by contrast, has nothing to do with segment registers - - see include/asm-x86_64/uaccess.h for details. */ - set_fs(USER_DS); - return 0; give_sigsegv: @@ -376,6 +372,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret == 0) { + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); + /* * Clear the direction flag as per the ABI for function entry. */ -- cgit v1.2.3 From cdb69904794d2173176b1a58e849b7b39fced390 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 22 Apr 2008 12:20:20 -0700 Subject: x86_64 ia32 ptrace: use compat_ptrace_request for siginfo This removes the special-case handling for PTRACE_GETSIGINFO and PTRACE_SETSIGINFO from x86_64's sys32_ptrace. The generic compat_ptrace_request code handles these. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 559c1b027417..870dc1023d2d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1207,32 +1207,6 @@ static int genregs32_set(struct task_struct *target, return ret; } -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) -{ - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - int ret; - - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; - } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); - } - return ret; -} - asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) { struct task_struct *child; @@ -1280,11 +1254,9 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) case PTRACE_SETFPXREGS: case PTRACE_GETFPXREGS: case PTRACE_GETEVENTMSG: - break; - case PTRACE_SETSIGINFO: case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); + break; } child = ptrace_get_task_struct(pid); -- cgit v1.2.3 From 562b80bafffaf42a6d916b0a2ee3d684220a1c10 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 22 Apr 2008 12:21:25 -0700 Subject: x86_64 ia32 ptrace: convert to compat_arch_ptrace Now that there are no more special cases in sys32_ptrace, we can convert to using the generic compat_sys_ptrace entry point. The sys32_ptrace function gets simpler and becomes compat_arch_ptrace. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 67 ++++++------------------------------------------ 1 file changed, 8 insertions(+), 59 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 870dc1023d2d..fb03ef380f0e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1207,68 +1207,15 @@ static int genregs32_set(struct task_struct *target, return ret; } -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - struct task_struct *child; - struct pt_regs *childregs; + unsigned long addr = caddr; + unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; __u32 val; - switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_SINGLEBLOCK: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: -#ifdef X86_BTS - case PTRACE_BTS_CONFIG: - case PTRACE_BTS_STATUS: - case PTRACE_BTS_SIZE: - case PTRACE_BTS_GET: - case PTRACE_BTS_CLEAR: - case PTRACE_BTS_DRAIN: -#endif - return sys_ptrace(request, pid, addr, data); - - default: - return -EINVAL; - - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - break; - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; - - childregs = task_pt_regs(child); - switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); @@ -1315,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) sizeof(struct user32_fxsr_struct), datap); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + default: return compat_ptrace_request(child, request, addr, data); } - out: - put_task_struct(child); return ret; } -- cgit v1.2.3 From 5d47a271f38cf2ba7299047ad0bf3ac7e4c4a214 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:11 +0900 Subject: x86: use BUILD_BUG_ON() for the size of struct intel_mp_floating Use BUILD_BUG_ON() instead of compile-time error technique with extern non-exsistent function. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 70744e344fa1..101b13cab689 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -686,13 +686,11 @@ void __init get_smp_config(void) static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { - extern void __bad_mpf_size(void); unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); - if (sizeof(*mpf) != 16) - __bad_mpf_size(); + BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { mpf = (struct intel_mp_floating *)bp; -- cgit v1.2.3 From 4abc1a0068945ac078fb0a00a359cd3be2e7dd8d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:12 +0900 Subject: x86: use MP_intsrc_info() Remove duplicate code by using MP_intsrc_info() in mpparse.c Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 101b13cab689..534790b6d0f1 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -907,14 +907,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ intsrc.mpc_dstirq = pin; /* INTIN# */ - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } int es7000_plat; @@ -983,15 +976,7 @@ void __init mp_config_acpi_legacy_irqs(void) intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } } -- cgit v1.2.3 From a1a33fa315b8a5a390f1132681485209500ff5b5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:13 +0900 Subject: x86: use bitmap library for pin_programmed Use bitmap library for pin_programmed rather than reinvent bitmaps. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 534790b6d0f1..23e8432a9826 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -799,7 +799,6 @@ void __init find_smp_config(void) #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; @@ -982,9 +981,8 @@ void __init mp_config_acpi_legacy_irqs(void) int mp_register_gsi(u32 gsi, int triggering, int polarity) { - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; + int ioapic; + int ioapic_pin; #ifdef CONFIG_X86_32 #define MAX_GSI_NUM 4096 #define IRQ_COMPRESSION_START 64 @@ -1024,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * with redundant pin->gsi mappings (but unique PCI devices); * we only program the IOAPIC on the first. */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { printk(KERN_ERR "Invalid reference to IOAPIC pin " "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); return gsi; } - if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); #ifdef CONFIG_X86_32 @@ -1042,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) #endif } - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); #ifdef CONFIG_X86_32 /* * For GSI >= 64, use IRQ compression -- cgit v1.2.3 From b1fceac2b9e04d278316b2faddf276015fc06e3b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:14 +0900 Subject: x86: remove unnecessary memset and NULL check after alloc_bootmem() memset and NULL check after alloc_bootmem() are unnecessary. Because it returns zeroed memory and it never return NULL. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 057ccf1d5ad4..977ed5cdeaa3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - if (!hpet_res) - return 0; - - memset(hpet_res, 0, sizeof(*hpet_res)); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", -- cgit v1.2.3 From d454157b113718a92ba5accc03cee64c7e081483 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:15 +0900 Subject: x86: cleanup clocksource_hz2mult usage Remove the magic number in the second argument of clocksource_hz2mult() Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8253.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 8540abe86ade..e325cfe06401 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -224,7 +224,8 @@ static int __init init_pit_clocksource(void) pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) return 0; - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, + clocksource_pit.shift); return clocksource_register(&clocksource_pit); } arch_initcall(init_pit_clocksource); -- cgit v1.2.3 From 877084fb1cf6167c5441b0a30c3d9ef9b7be0a3a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:16 +0900 Subject: x86: cleanup div_sc() usage Remove the magic number in the third argment of div_sc(). Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 ++- arch/x86/kernel/apic_64.c | 3 ++- arch/x86/kernel/hpet.c | 2 +- arch/x86/kernel/i8253.c | 3 ++- arch/x86/kernel/mfgpt_32.c | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8317401170b8..4b99b1bdeb6c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void) } /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index bf83157337e4..5910020c3f24 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void) result / 1000 / 1000, result / 1000 % 1000); /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 36652ea1a265..9007f9ea64ee 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void) hpet_freq = 1000000000000000ULL; do_div(hpet_freq, hpet_period); hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); + NSEC_PER_SEC, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index e325cfe06401..c1b5e3ece1f2 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -115,7 +115,8 @@ void __init setup_pit_timer(void) * IO_APIC has been initialized. */ pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); + pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, + pit_clockevent.shift); pit_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_clockevent); pit_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index b402c0f3f192..cfc2648d25ff 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void) geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); /* Set up the clock event */ - mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); + mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, + mfgpt_clockevent.shift); mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &mfgpt_clockevent); mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, -- cgit v1.2.3 From 7c04e64a1b43b4c8fea281ce1f82df30ed9bab4e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:17 +0900 Subject: x86: use cpumask function for present, possible, and online cpus cpu_online(), cpu_present(), for_each_possible_cpu(), num_possible_cpus() Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/tlb_64.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 9546ef408b92..021624c83583 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -51,7 +51,7 @@ void __init setup_apic_routing(void) else #endif - if (cpus_weight(cpu_possible_map) <= 8) + if (num_possible_cpus() <= 8) genapic = &apic_flat; else genapic = &apic_physflat; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eef79e84145f..04c662ba18f1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) check_tsc_sync_source(cpu); local_irq_restore(flags); - while (!cpu_isset(cpu, cpu_online_map)) { + while (!cpu_online(cpu)) { cpu_relax(); touch_nmi_watchdog(); } @@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void) int i; struct cpuinfo_x86 *c; - for_each_cpu_mask(i, cpu_possible_map) { + for_each_possible_cpu(i) { c = &cpu_data(i); /* mark all to hotplug */ c->cpu_index = NR_CPUS; diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index df224a8774cb..a1f07d793202 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -195,9 +195,9 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_cpu_mask(i, cpu_possible_map) { + for_each_possible_cpu(i) spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); - } + return 0; } core_initcall(init_smp_flush); -- cgit v1.2.3 From ae5830a6f8278e1bb700a0956cacc9ceaf311f83 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:19 +0900 Subject: x86: remove duplicate get_bios_ebda() from rio.h get_bios_ebda() exists in asm/rio.h and asm/bios_ebda.h. This patch removes the one in asm/rio.h. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 2edee22e9c30..e28ec497e142 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; -- cgit v1.2.3 From 356fa0c6e1ad3d3b01884f08a203bc84d555b880 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 19 Apr 2008 23:55:20 +0900 Subject: x86: use get_bios_ebda() Use get_bios_ebda(). Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 5 ++--- arch/x86/kernel/setup_32.c | 4 +--- arch/x86/kernel/summit_32.c | 5 +++-- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 993c76773256..d31d6b72d60d 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -22,6 +22,7 @@ #include #include #include +#include static void __init zap_identity_mappings(void) { @@ -49,7 +50,6 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -80,8 +80,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 455d3c80960b..44cc9b933932 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -389,7 +389,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -420,8 +419,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 6878a9c2df5d..ae751094eba9 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -29,6 +29,7 @@ #include #include #include +#include #include static struct rio_table_hdr *rio_table_hdr __initdata; @@ -140,8 +141,8 @@ void __init setup_summit(void) int i, next_wpeg, next_bus = 0; /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ - ptr = *(unsigned short *)phys_to_virt(0x40Eul); - ptr = (unsigned long)phys_to_virt(ptr << 4); + ptr = get_bios_ebda(); + ptr = (unsigned long)phys_to_virt(ptr); rio_table_hdr = NULL; offset = 0x180; -- cgit v1.2.3 From 4c01f23bdbd34e7edeadbaa920c3018307a541d5 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Sat, 12 Apr 2008 17:38:52 +0200 Subject: x86: trampoline_32.S - switch to .cpuinit.data This patch fixes section mismatch warnings of __cpuinit setup_trampoline() on 32-bit host. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/trampoline_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 64580679861e..d8ccc3c6552f 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -33,7 +33,7 @@ /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU -.section ".init.data","aw",@progbits +.section ".cpuinit.data","aw",@progbits #else .section .rodata,"a",@progbits #endif -- cgit v1.2.3 From 991074fd35e9e584d3cc28b4cba2e12743aeaa46 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Sat, 12 Apr 2008 17:39:57 +0200 Subject: x86: uniq_ioapic_id - fix section mismatch warning Fix folowing warning: WARNING: arch/x86/kernel/built-in.o(.text+0x10799): Section mismatch in reference from the function uniq_ioapic_id() uniq_ioapic_id() is only used by __init mp_register_ioapic(). Annotate uniq_ioapic_id() with __init. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mpparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 23e8432a9826..3e2c54dc8b29 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -817,7 +817,7 @@ static int mp_find_ioapic(int gsi) return -1; } -static u8 uniq_ioapic_id(u8 id) +static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && -- cgit v1.2.3 From 28acf285deb193a1898bd531d778b0d1b1b75f2c Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Sat, 12 Apr 2008 17:41:12 +0200 Subject: x86: unlock_ExtINT_logic() - fix section mismatch warnings Fix following warning: WARNING: arch/x86/kernel/built-in.o(.text+0x12cc9): Section mismatch in reference from the function unlock_ExtINT_logic() unlock_ExtINT_logic() is only used by __init check_timer(). Annotate unlock_ExtINT_logic() witch __init. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 2e2f42074e18..696b8e4e66bb 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2068,7 +2068,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; -- cgit v1.2.3 From 5afca33a43786408ce74540b54855973dde32bab Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Fri, 11 Apr 2008 13:29:04 +0200 Subject: x86: section mismatch fixes, #3 This patch fixes section mismatch warnings in unlock_ExtINT_logic(). WARNING: arch/x86/kernel/built-in.o(.text+0x14a92): Section mismatch in reference from the function unlock_ExtINT_logic() to the function .init.text:find_isa_irq_pin() The function unlock_ExtINT_logic() references the function __init find_isa_irq_pin(). This is often because unlock_ExtINT_logic lacks a __init annotation or the annotation of find_isa_irq_pin is wrong. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9ba11d07920f..ef1a8dfcc529 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1599,7 +1599,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; -- cgit v1.2.3 From 50eae2a7c9862afe263a2003c12f457ecfc9e6a2 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Fri, 28 Mar 2008 10:49:42 +0800 Subject: x86, boot: add free_early to early reservation machanism Add free_early to early reservation mechanism - this way early bootup failure paths can stop wasting memory. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_64.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index cbd42e51cb08..79f0d52fa99a 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -84,6 +84,28 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name) strncpy(r->name, name, sizeof(r->name) - 1); } +void __init free_early(unsigned long start, unsigned long end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %lx-%lx!", start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memcpy(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + void __init early_res_to_bootmem(void) { int i; -- cgit v1.2.3 From 8b664aa66e824a0ddf4ec56d41fa0cf7bb374de6 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Fri, 28 Mar 2008 10:49:44 +0800 Subject: x86, boot: add linked list of struct setup_data This patch adds a field of 64-bit physical pointer to NULL terminated single linked list of struct setup_data to real-mode kernel header. This is used as a more extensible boot parameters passing mechanism. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 20 ++++++++++++++++++++ arch/x86/kernel/setup_64.c | 22 ++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d31d6b72d60d..e25c57b8aa84 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -100,6 +101,24 @@ static void __init reserve_ebda_region(void) reserve_early(lowmem, 0x100000, "BIOS reserved"); } +static void __init reserve_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + char buf[32]; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } +} + void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -156,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data) #endif reserve_ebda_region(); + reserve_setup_data(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 17bdf2343095..e1a21d6b3671 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -264,6 +264,26 @@ void __attribute__((weak)) __init memory_setup(void) machine_specific_memory_setup(); } +static void __init parse_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, PAGE_SIZE); + switch (data->type) { + default: + break; + } + free_early(pa_data, pa_data+sizeof(*data)+data->len); + pa_data = data->next; + early_iounmap(data, PAGE_SIZE); + } +} + /* * setup_arch - architecture-specific boot-time initializations * @@ -316,6 +336,8 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; + parse_setup_data(); + parse_early_param(); #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT -- cgit v1.2.3 From c14b2adf19b5d35aff91280b1a73c41a4dcabfe3 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Fri, 28 Mar 2008 10:49:48 +0800 Subject: x86, boot: export linked list of struct setup_data via debugfs Export linked list of struct setup_data via debugfs. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/kdebugfs.c | 163 +++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/setup_64.c | 2 + 2 files changed, 160 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 73354302fda7..c03205991718 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -6,23 +6,171 @@ * * This file is released under the GPLv2. */ - #include +#include #include #include +#include +#include #include #ifdef CONFIG_DEBUG_BOOT_PARAMS +struct setup_data_node { + u64 paddr; + u32 type; + u32 len; +}; + +static ssize_t +setup_data_read(struct file *file, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct setup_data_node *node = file->private_data; + unsigned long remain; + loff_t pos = *ppos; + struct page *pg; + void *p; + u64 pa; + + if (pos < 0) + return -EINVAL; + if (pos >= node->len) + return 0; + + if (count > node->len - pos) + count = node->len - pos; + pa = node->paddr + sizeof(struct setup_data) + pos; + pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + p = ioremap_cache(pa, count); + if (!p) + return -ENXIO; + } else { + p = __va(pa); + } + + remain = copy_to_user(user_buf, p, count); + + if (PageHighMem(pg)) + iounmap(p); + + if (remain) + return -EFAULT; + + *ppos = pos + count; + + return count; +} + +static int setup_data_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations fops_setup_data = { + .read = setup_data_read, + .open = setup_data_open, +}; + +static int __init +create_setup_data_node(struct dentry *parent, int no, + struct setup_data_node *node) +{ + struct dentry *d, *type, *data; + char buf[16]; + int error; + + sprintf(buf, "%d", no); + d = debugfs_create_dir(buf, parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + type = debugfs_create_x32("type", S_IRUGO, d, &node->type); + if (!type) { + error = -ENOMEM; + goto err_dir; + } + data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); + if (!data) { + error = -ENOMEM; + goto err_type; + } + return 0; + +err_type: + debugfs_remove(type); +err_dir: + debugfs_remove(d); +err_return: + return error; +} + +static int __init create_setup_data_nodes(struct dentry *parent) +{ + struct setup_data_node *node; + struct setup_data *data; + int error, no = 0; + struct dentry *d; + struct page *pg; + u64 pa_data; + + d = debugfs_create_dir("setup_data", parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + + pa_data = boot_params.hdr.setup_data; + + while (pa_data) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + error = -ENOMEM; + goto err_dir; + } + pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) { + error = -ENXIO; + goto err_dir; + } + } else { + data = __va(pa_data); + } + + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + error = create_setup_data_node(d, no, node); + pa_data = data->next; + + if (PageHighMem(pg)) + iounmap(data); + if (error) + goto err_dir; + no++; + } + return 0; + +err_dir: + debugfs_remove(d); +err_return: + return error; +} + static struct debugfs_blob_wrapper boot_params_blob = { - .data = &boot_params, - .size = sizeof(boot_params), + .data = &boot_params, + .size = sizeof(boot_params), }; static int __init boot_params_kdebugfs_init(void) { - int error; struct dentry *dbp, *version, *data; + int error; dbp = debugfs_create_dir("boot_params", NULL); if (!dbp) { @@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void) error = -ENOMEM; goto err_version; } + error = create_setup_data_nodes(dbp); + if (error) + goto err_data; return 0; + +err_data: + debugfs_remove(data); err_version: debugfs_remove(version); err_dir: @@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void) return error; } - arch_initcall(arch_kdebugfs_init); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index e1a21d6b3671..b04e2c011e1a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -278,7 +278,9 @@ static void __init parse_setup_data(void) default: break; } +#ifndef CONFIG_DEBUG_BOOT_PARAMS free_early(pa_data, pa_data+sizeof(*data)+data->len); +#endif pa_data = data->next; early_iounmap(data, PAGE_SIZE); } -- cgit v1.2.3 From 1a27fc0a42162964d758e9d36d2d1b49c082a67c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:52:37 -0700 Subject: x86_64: fix setup_node_bootmem to support big mem excluding with memmap typical case: four sockets system, every node has 4g ram, and we are using: memmap=10g$4g to mask out memory on node1 and node2 when numa is enabled, early_node_mem is used to get node_data and node_bootmap. if it can not get memory from the same node with find_e820_area(), it will use alloc_bootmem to get buff from previous nodes. so check it and print out some info about it. need to move early_res_to_bootmem into every setup_node_bootmem. and it takes range that node has. otherwise alloc_bootmem could return addr that reserved early. depends on "mm: make reserve_bootmem can crossed the nodes". Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 13 +++++++++---- arch/x86/kernel/setup_64.c | 3 +-- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 79f0d52fa99a..645ee5e32a27 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) early_res[j - 1].end = 0; } -void __init early_res_to_bootmem(void) +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b04e2c011e1a..60e64c8eee92 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn< Date: Fri, 25 Apr 2008 17:39:01 +0200 Subject: fix idle (arch, acpi and apm) and lockdep OK, so 25-mm1 gave a lockdep error which made me look into this. The first thing that I noticed was the horrible mess; the second thing I saw was hacks like: 71e93d15612c61c2e26a169567becf088e71b8ff The problem is that arch idle routines are somewhat inconsitent with their IRQ state handling and instead of fixing _that_, we go paper over the problem. So the thing I've tried to do is set a standard for idle routines and fix them all up to adhere to that. So the rules are: idle routines are entered with IRQs disabled idle routines will exit with IRQs enabled Nearly all already did this in one form or another. Merge the 32 and 64 bit bits so they no longer have different bugs. As for the actual lockdep warning; __sti_mwait() did a plainly un-annotated irq-enable. Signed-off-by: Peter Zijlstra Tested-by: Bob Copeland Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 3 ++ arch/x86/kernel/process.c | 117 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 118 ++--------------------------------------- arch/x86/kernel/process_64.c | 123 ++----------------------------------------- 4 files changed, 127 insertions(+), 234 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f0030a0999c7..e4ea362e8480 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -904,6 +904,7 @@ recalc: original_pm_idle(); else default_idle(); + local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) goto recalc; @@ -911,6 +912,8 @@ recalc: if (apm_idle_done) apm_do_busy(); + + local_irq_enable(); } /** diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3004d716539d..67e9b4a1e89d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include struct kmem_cache *task_xstate_cachep; @@ -42,3 +44,118 @@ void arch_task_cache_init(void) __alignof__(union thread_xstate), SLAB_PANIC, NULL); } + +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 0, 1); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(ax, cx); + } +} + +/* Default MONITOR/MWAIT with no hints, used for default C1 state */ +static void mwait_idle(void) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else + local_irq_enable(); +} + + +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +{ + if (force_mwait) + return 1; + /* Any C1 states supported? */ + return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + cpu_relax(); +} + +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ + static int selected; + + if (selected) + return; +#ifdef CONFIG_X86_SMP + if (pm_idle == poll_idle && smp_num_siblings > 1) { + printk(KERN_WARNING "WARNING: polling idle and HT enabled," + " performance may degrade.\n"); + } +#endif + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { + /* + * Skip, if setup has overridden idle. + * One CPU supports mwait => All CPUs supports mwait + */ + if (!pm_idle) { + printk(KERN_INFO "using mwait in idle threads.\n"); + pm_idle = mwait_idle; + } + } + selected = 1; +} + +static int __init idle_setup(char *str) +{ + if (!strcmp(str, "poll")) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } else if (!strcmp(str, "mwait")) + force_mwait = 1; + else + return -1; + + boot_option_idle_override = 1; + return 0; +} +early_param("idle", idle_setup); + diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77de848bd1fb..f8476dfbb60d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -111,12 +111,10 @@ void default_idle(void) */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { local_irq_enable(); @@ -128,17 +126,6 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU #include /* We don't actually take CPU down, just spin without interrupts. */ @@ -196,6 +183,7 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); + local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } @@ -206,104 +194,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(ax, cx); - else - local_irq_enable(); - } else - local_irq_enable(); -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - mwait_idle_with_hints(0, 0); -} - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 131c2ee7ac56..e2319f39988b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -106,26 +106,13 @@ void default_idle(void) * test NEED_RESCHED: */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -192,110 +179,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(ax, cx); - } -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - } else { - local_irq_enable(); - } -} - - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) { -- cgit v1.2.3 From 790c73f6289a204f858ffdcbe4a2b38e91657ec6 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Fri, 15 Feb 2008 17:52:48 -0200 Subject: x86: KVM guest: paravirtualized clocksource This is the guest part of kvm clock implementation It does not do tsc-only timing, as tsc can have deltas between cpus, and it did not seem worthy to me to keep adjusting them. We do use it, however, for fine-grained adjustment. Other than that, time comes from the host. [randy dunlap: add missing include] [randy dunlap: disallow on Voyager or Visual WS] Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Randy Dunlap Signed-off-by: Avi Kivity --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/kvmclock.c | 160 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_32.c | 5 ++ arch/x86/kernel/setup_64.c | 5 ++ 4 files changed, 171 insertions(+) create mode 100644 arch/x86/kernel/kvmclock.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90e092d0af0c..483047a33024 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o +obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o ifdef CONFIG_INPUT_PCSPKR diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c new file mode 100644 index 000000000000..b999f5e5b3bf --- /dev/null +++ b/arch/x86/kernel/kvmclock.c @@ -0,0 +1,160 @@ +/* KVM paravirtual clock driver. A clocksource implementation + Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include +#include +#include +#include +#include +#include + +#define KVM_SCALE 22 + +static int kvmclock = 1; + +static int parse_no_kvmclock(char *arg) +{ + kvmclock = 0; + return 0; +} +early_param("no-kvmclock", parse_no_kvmclock); + +/* The hypervisor will put information about time periodically here */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); +#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field + +static inline u64 kvm_get_delta(u64 last_tsc) +{ + int cpu = smp_processor_id(); + u64 delta = native_read_tsc() - last_tsc; + return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; +} + +static struct kvm_wall_clock wall_clock; +static cycle_t kvm_clock_read(void); +/* + * The wallclock is the time of day when we booted. Since then, some time may + * have elapsed since the hypervisor wrote the data. So we try to account for + * that with system time + */ +unsigned long kvm_get_wallclock(void) +{ + u32 wc_sec, wc_nsec; + u64 delta; + struct timespec ts; + int version, nsec; + int low, high; + + low = (int)__pa(&wall_clock); + high = ((u64)__pa(&wall_clock) >> 32); + + delta = kvm_clock_read(); + + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); + do { + version = wall_clock.wc_version; + rmb(); + wc_sec = wall_clock.wc_sec; + wc_nsec = wall_clock.wc_nsec; + rmb(); + } while ((wall_clock.wc_version != version) || (version & 1)); + + delta = kvm_clock_read() - delta; + delta += wc_nsec; + nsec = do_div(delta, NSEC_PER_SEC); + set_normalized_timespec(&ts, wc_sec + delta, nsec); + /* + * Of all mechanisms of time adjustment I've tested, this one + * was the champion! + */ + return ts.tv_sec + 1; +} + +int kvm_set_wallclock(unsigned long now) +{ + return 0; +} + +/* + * This is our read_clock function. The host puts an tsc timestamp each time + * it updates a new time. Without the tsc adjustment, we can have a situation + * in which a vcpu starts to run earlier (smaller system_time), but probes + * time later (compared to another vcpu), leading to backwards time + */ +static cycle_t kvm_clock_read(void) +{ + u64 last_tsc, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + + last_tsc = get_clock(cpu, tsc_timestamp); + now = get_clock(cpu, system_time); + + now += kvm_get_delta(last_tsc); + preempt_enable(); + + return now; +} +static struct clocksource kvm_clock = { + .name = "kvm-clock", + .read = kvm_clock_read, + .rating = 400, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << KVM_SCALE, + .shift = KVM_SCALE, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static int kvm_register_clock(void) +{ + int cpu = smp_processor_id(); + int low, high; + low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; + high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); + + return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); +} + +static void kvm_setup_secondary_clock(void) +{ + /* + * Now that the first cpu already had this clocksource initialized, + * we shouldn't fail. + */ + WARN_ON(kvm_register_clock()); + /* ok, done with our trickery, call native */ + setup_secondary_APIC_clock(); +} + +void __init kvmclock_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { + if (kvm_register_clock()) + return; + pv_time_ops.get_wallclock = kvm_get_wallclock; + pv_time_ops.set_wallclock = kvm_set_wallclock; + pv_time_ops.sched_clock = kvm_clock_read; + pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; + clocksource_register(&kvm_clock); + } +} diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 44cc9b933932..5a849ddd09ee 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -47,6 +47,7 @@ #include #include #include +#include #include