From 9ce53d23da31815c0d0ae0380f898da4b5aa7af6 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Thu, 8 Dec 2011 22:23:00 +0200
Subject: ARM: OMAP: rx51: fix USB

commit e5fe29c7198a1f6616286dfc8602a69da165cb3f upstream.

Commit 10299e2e4e3ed3b16503d4e04edd48b33083f4e2 (ARM: RX-51:
Enable isp1704 power on/off) added power management for isp1704.

However, the transceiver should be powered on by default,
otherwise USB doesn't work at all for networking during
boot.

All kernels after v3.0 are affected.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Reviewed-by: Sebastian Reichel <sre@debian.org>
[tony@atomide.com: updated comments]
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/mach-omap2/board-rx51-peripherals.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 88bd6f7705f0..c56597172bfc 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -133,7 +133,7 @@ static struct platform_device rx51_charger_device = {
 static void __init rx51_charger_init(void)
 {
 	WARN_ON(gpio_request_one(RX51_USB_TRANSCEIVER_RST_GPIO,
-		GPIOF_OUT_INIT_LOW, "isp1704_reset"));
+		GPIOF_OUT_INIT_HIGH, "isp1704_reset"));
 
 	platform_device_register(&rx51_charger_device);
 }
-- 
cgit v1.2.3


From 43579d76bdd733c9baf131bbc29719c2a2821569 Mon Sep 17 00:00:00 2001
From: Jason Chen <jason.chen@linaro.org>
Date: Wed, 30 Nov 2011 11:34:27 +0800
Subject: MXC PWM: should active during DOZE/WAIT/DBG mode

commit c0d96aed8c6dd925afe9ea35491a0cd458642a86 upstream.

Signed-off-by: Jason Chen <jason.chen@linaro.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/plat-mxc/pwm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c
index 7a61ef8f471a..7bf2a00034f8 100644
--- a/arch/arm/plat-mxc/pwm.c
+++ b/arch/arm/plat-mxc/pwm.c
@@ -32,6 +32,9 @@
 #define MX3_PWMSAR                0x0C    /* PWM Sample Register */
 #define MX3_PWMPR                 0x10    /* PWM Period Register */
 #define MX3_PWMCR_PRESCALER(x)    (((x - 1) & 0xFFF) << 4)
+#define MX3_PWMCR_DOZEEN                (1 << 24)
+#define MX3_PWMCR_WAITEN                (1 << 23)
+#define MX3_PWMCR_DBGEN			(1 << 22)
 #define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16)
 #define MX3_PWMCR_CLKSRC_IPG      (1 << 16)
 #define MX3_PWMCR_EN              (1 << 0)
@@ -77,7 +80,9 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
 		writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR);
 		writel(period_cycles, pwm->mmio_base + MX3_PWMPR);
 
-		cr = MX3_PWMCR_PRESCALER(prescale) | MX3_PWMCR_EN;
+		cr = MX3_PWMCR_PRESCALER(prescale) |
+			MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
+			MX3_PWMCR_DBGEN | MX3_PWMCR_EN;
 
 		if (cpu_is_mx25())
 			cr |= MX3_PWMCR_CLKSRC_IPG;
-- 
cgit v1.2.3


From 3c681ec96dcc0acd6d5386773b44e6c129919394 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 19 Dec 2011 16:38:30 +0100
Subject: oprofile: Fix uninitialized memory access when writing to writing to
 oprofilefs

commit 913050b91eb94f194392dd797b1ff3779f606ac0 upstream.

If oprofilefs_ulong_from_user() is called with count equals
zero, *val remains unchanged. Depending on the implementation it
might be uninitialized.

Change oprofilefs_ulong_from_user()'s interface to return count
on success. Thus, we are able to return early if count equals
zero which avoids using *val uninitialized. Fixing all users of
oprofilefs_ulong_ from_user().

This follows write syscall implementation when count is zero:
"If count is zero ... [and if] no errors are detected, 0 will be
returned without causing any other effect." (man 2 write)

Reported-By: Mike Waychison <mikew@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: oprofile-list <oprofile-list@lists.sourceforge.net>
Link: http://lkml.kernel.org/r/20111219153830.GH16765@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/s390/oprofile/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 0e358c2cffeb..422110a4385b 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -90,7 +90,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
 		return -EINVAL;
 
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval)
+	if (retval <= 0)
 		return retval;
 
 	if (oprofile_started)
-- 
cgit v1.2.3


From 8048ac75378918eab0f68ba175915e72ef73e0da Mon Sep 17 00:00:00 2001
From: Jason Chen <jason.chen@linaro.org>
Date: Mon, 19 Dec 2011 11:23:28 +0800
Subject: ARM:imx:fix pwm period value

commit 5776ac2eb33164c77cdb4d2b48feee15616eaba3 upstream.

According to imx pwm RM, the real period value should be
PERIOD value in PWMPR plus 2.

PWMO (Hz) = PCLK(Hz) / (period +2)

Signed-off-by: Jason Chen <jason.chen@linaro.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/plat-mxc/pwm.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c
index 7bf2a00034f8..f4b68beddbb3 100644
--- a/arch/arm/plat-mxc/pwm.c
+++ b/arch/arm/plat-mxc/pwm.c
@@ -77,6 +77,15 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
 		do_div(c, period_ns);
 		duty_cycles = c;
 
+		/*
+		 * according to imx pwm RM, the real period value should be
+		 * PERIOD value in PWMPR plus 2.
+		 */
+		if (period_cycles > 2)
+			period_cycles -= 2;
+		else
+			period_cycles = 0;
+
 		writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR);
 		writel(period_cycles, pwm->mmio_base + MX3_PWMPR);
 
-- 
cgit v1.2.3


From 4347b837ab999865063649cf801ba4f8d7e8748e Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vladimir.zapolskiy@nokia.com>
Date: Thu, 22 Dec 2011 16:15:40 +0100
Subject: oprofile, arm/sh: Fix oprofile_arch_exit() linkage issue

commit 55205c916e179e09773d98d290334d319f45ac6b upstream.

This change fixes a linking problem, which happens if oprofile
is selected to be compiled as built-in:

  `oprofile_arch_exit' referenced in section `.init.text' of
  arch/arm/oprofile/built-in.o: defined in discarded section
  `.exit.text' of arch/arm/oprofile/built-in.o

The problem is appeared after commit 87121ca504, which
introduced oprofile_arch_exit() calls from __init function. Note
that the aforementioned commit has been backported to stable
branches, and the problem is known to be reproduced at least
with 3.0.13 and 3.1.5 kernels.

Signed-off-by: Vladimir Zapolskiy <vladimir.zapolskiy@nokia.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: oprofile-list <oprofile-list@lists.sourceforge.net>
Link: http://lkml.kernel.org/r/20111222151540.GB16765@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/oprofile/common.c | 2 +-
 arch/sh/oprofile/common.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index c074e66ad224..4e0a371630b3 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -116,7 +116,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	return oprofile_perf_init(ops);
 }
 
-void __exit oprofile_arch_exit(void)
+void oprofile_arch_exit(void)
 {
 	oprofile_perf_exit();
 }
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index b4c2d2b946dd..e4dd5d5a1115 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -49,7 +49,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	return oprofile_perf_init(ops);
 }
 
-void __exit oprofile_arch_exit(void)
+void oprofile_arch_exit(void)
 {
 	oprofile_perf_exit();
 	kfree(sh_pmu_op_name);
@@ -60,5 +60,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	ops->backtrace = sh_backtrace;
 	return -ENODEV;
 }
-void __exit oprofile_arch_exit(void) {}
+void oprofile_arch_exit(void) {}
 #endif /* CONFIG_HW_PERF_EVENTS */
-- 
cgit v1.2.3


From 747b409502fe765784cda1135d806042beddaa89 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 22 Dec 2011 13:23:59 -0800
Subject: sparc64: Fix MSIQ HV call ordering in pci_sun4v_msiq_build_irq().

[ Upstream commit 7cc8583372a21d98a23b703ad96cab03180b5030 ]

This silently was working for many years and stopped working on
Niagara-T3 machines.

We need to set the MSIQ to VALID before we can set it's state to IDLE.

On Niagara-T3, setting the state to IDLE first was causing HV_EINVAL
errors.  The hypervisor documentation says, rather ambiguously, that
the MSIQ must be "initialized" before one can set the state.

I previously understood this to mean merely that a successful setconf()
operation has been performed on the MSIQ, which we have done at this
point.  But it seems to also mean that it has been set VALID too.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/kernel/pci_sun4v.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index b01a06e9ae4e..9e73c4a37ae9 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -848,10 +848,10 @@ static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
 	if (!irq)
 		return -ENOMEM;
 
-	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
-		return -EINVAL;
 	if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
 		return -EINVAL;
+	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
+		return -EINVAL;
 
 	return irq;
 }
-- 
cgit v1.2.3


From fde939495571ffd22458e94745b0c2e6af33478d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 14 Dec 2011 10:05:22 -0800
Subject: sparc32: Be less strict in matching %lo part of relocation.

[ Upstream commit b1f44e13a525d2ffb7d5afe2273b7169d6f2222e ]

The "(insn & 0x01800000) != 0x01800000" test matches 'restore'
but that is a legitimate place to see the %lo() part of a 32-bit
symbol relocation, particularly in tail calls.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Sergei Trofimovich <slyfox@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/mm/btfixup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/mm/btfixup.c b/arch/sparc/mm/btfixup.c
index 5175ac2f4820..8a7f81743c12 100644
--- a/arch/sparc/mm/btfixup.c
+++ b/arch/sparc/mm/btfixup.c
@@ -302,8 +302,7 @@ void __init btfixup(void)
 				case 'i':	/* INT */
 					if ((insn & 0xc1c00000) == 0x01000000) /* %HI */
 						set_addr(addr, q[1], fmangled, (insn & 0xffc00000) | (p[1] >> 10));
-					else if ((insn & 0x80002000) == 0x80002000 &&
-					         (insn & 0x01800000) != 0x01800000) /* %LO */
+					else if ((insn & 0x80002000) == 0x80002000) /* %LO */
 						set_addr(addr, q[1], fmangled, (insn & 0xffffe000) | (p[1] & 0x3ff));
 					else {
 						prom_printf(insn_i, p, addr, insn);
-- 
cgit v1.2.3


From cff6d2096e9a57c2497dd5ee4aed3c97149bfc9e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Nov 2011 22:44:58 -0800
Subject: sparc64: Patch sun4v code sequences properly on module load.

[ Upstream commit 0b64120cceb86e93cb1bda0dc055f13016646907 ]

Some of the sun4v code patching occurs in inline functions visible
to, and usable by, modules.

Therefore we have to patch them up during module load.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/kernel/entry.h    |  7 +++++++
 arch/sparc/kernel/module.c   | 27 +++++++++++++++++++++++++
 arch/sparc/kernel/setup_64.c | 48 ++++++++++++++++++++++++++------------------
 3 files changed, 63 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index e27f8ea8656e..0c218e4c0881 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -42,6 +42,9 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
 extern void fpload(unsigned long *fpregs, unsigned long *fsr);
 
 #else /* CONFIG_SPARC32 */
+
+#include <asm/trap_block.h>
+
 struct popc_3insn_patch_entry {
 	unsigned int	addr;
 	unsigned int	insns[3];
@@ -57,6 +60,10 @@ extern struct popc_6insn_patch_entry __popc_6insn_patch,
 	__popc_6insn_patch_end;
 
 extern void __init per_cpu_patch(void);
+extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
+				    struct sun4v_1insn_patch_entry *);
+extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+				    struct sun4v_2insn_patch_entry *);
 extern void __init sun4v_patch(void);
 extern void __init boot_cpu_id_too_large(int cpu);
 extern unsigned int dcache_parity_tl1_occurred;
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 99ba5baa9497..8172c18d844f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -17,6 +17,8 @@
 #include <asm/processor.h>
 #include <asm/spitfire.h>
 
+#include "entry.h"
+
 #ifdef CONFIG_SPARC64
 
 #include <linux/jump_label.h>
@@ -220,6 +222,29 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 }
 
 #ifdef CONFIG_SPARC64
+static void do_patch_sections(const Elf_Ehdr *hdr,
+			      const Elf_Shdr *sechdrs)
+{
+	const Elf_Shdr *s, *sun4v_1insn = NULL, *sun4v_2insn = NULL;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		if (!strcmp(".sun4v_1insn_patch", secstrings + s->sh_name))
+			sun4v_1insn = s;
+		if (!strcmp(".sun4v_2insn_patch", secstrings + s->sh_name))
+			sun4v_2insn = s;
+	}
+
+	if (sun4v_1insn && tlb_type == hypervisor) {
+		void *p = (void *) sun4v_1insn->sh_addr;
+		sun4v_patch_1insn_range(p, p + sun4v_1insn->sh_size);
+	}
+	if (sun4v_2insn && tlb_type == hypervisor) {
+		void *p = (void *) sun4v_2insn->sh_addr;
+		sun4v_patch_2insn_range(p, p + sun4v_2insn->sh_size);
+	}
+}
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
@@ -227,6 +252,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 	/* make jump label nops */
 	jump_label_apply_nops(me);
 
+	do_patch_sections(hdr, sechdrs);
+
 	/* Cheetah's I-cache is fully coherent.  */
 	if (tlb_type == spitfire) {
 		unsigned long va;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 3c5bb784214f..4e7d3ff0ccb4 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -234,40 +234,50 @@ void __init per_cpu_patch(void)
 	}
 }
 
-void __init sun4v_patch(void)
+void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *start,
+			     struct sun4v_1insn_patch_entry *end)
 {
-	extern void sun4v_hvapi_init(void);
-	struct sun4v_1insn_patch_entry *p1;
-	struct sun4v_2insn_patch_entry *p2;
-
-	if (tlb_type != hypervisor)
-		return;
+	while (start < end) {
+		unsigned long addr = start->addr;
 
-	p1 = &__sun4v_1insn_patch;
-	while (p1 < &__sun4v_1insn_patch_end) {
-		unsigned long addr = p1->addr;
-
-		*(unsigned int *) (addr +  0) = p1->insn;
+		*(unsigned int *) (addr +  0) = start->insn;
 		wmb();
 		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
 
-		p1++;
+		start++;
 	}
+}
 
-	p2 = &__sun4v_2insn_patch;
-	while (p2 < &__sun4v_2insn_patch_end) {
-		unsigned long addr = p2->addr;
+void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+			     struct sun4v_2insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
 
-		*(unsigned int *) (addr +  0) = p2->insns[0];
+		*(unsigned int *) (addr +  0) = start->insns[0];
 		wmb();
 		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
 
-		*(unsigned int *) (addr +  4) = p2->insns[1];
+		*(unsigned int *) (addr +  4) = start->insns[1];
 		wmb();
 		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
 
-		p2++;
+		start++;
 	}
+}
+
+void __init sun4v_patch(void)
+{
+	extern void sun4v_hvapi_init(void);
+
+	if (tlb_type != hypervisor)
+		return;
+
+	sun4v_patch_1insn_range(&__sun4v_1insn_patch,
+				&__sun4v_1insn_patch_end);
+
+	sun4v_patch_2insn_range(&__sun4v_2insn_patch,
+				&__sun4v_2insn_patch_end);
 
 	sun4v_hvapi_init();
 }
-- 
cgit v1.2.3


From 2d2eb1d284257cbb7ebb29bd75a3cbbc9275e4f7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Nov 2011 18:17:59 -0800
Subject: sparc: Kill custom io_remap_pfn_range().

[ Upstream commit 3e37fd3153ac95088a74f5e7c569f7567e9f993a ]

To handle the large physical addresses, just make a simple wrapper
around remap_pfn_range() like MIPS does.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/include/asm/pgtable_32.h |  20 ++++-
 arch/sparc/include/asm/pgtable_64.h |  20 ++++-
 arch/sparc/mm/Makefile              |   1 -
 arch/sparc/mm/generic_32.c          |  98 ---------------------
 arch/sparc/mm/generic_64.c          | 164 ------------------------------------
 5 files changed, 32 insertions(+), 271 deletions(-)
 delete mode 100644 arch/sparc/mm/generic_32.c
 delete mode 100644 arch/sparc/mm/generic_64.c

(limited to 'arch')

diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 5b31a8e89823..a790cc657476 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -431,10 +431,6 @@ extern unsigned long *sparc_valid_addr_bitmap;
 #define kern_addr_valid(addr) \
 	(test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap))
 
-extern int io_remap_pfn_range(struct vm_area_struct *vma,
-			      unsigned long from, unsigned long pfn,
-			      unsigned long size, pgprot_t prot);
-
 /*
  * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
  * its high 4 bits.  These macros/functions put it there or get it from there.
@@ -443,6 +439,22 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma,
 #define GET_IOSPACE(pfn)		(pfn >> (BITS_PER_LONG - 4))
 #define GET_PFN(pfn)			(pfn & 0x0fffffffUL)
 
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long,
+			   unsigned long, pgprot_t);
+
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long from, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	unsigned long long offset, space, phys_base;
+
+	offset = ((unsigned long long) GET_PFN(pfn)) << PAGE_SHIFT;
+	space = GET_IOSPACE(pfn);
+	phys_base = offset | (space << 32ULL);
+
+	return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+}
+
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 ({									  \
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1e03c5a6b4f7..982262804237 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -750,10 +750,6 @@ static inline bool kern_addr_valid(unsigned long addr)
 
 extern int page_in_phys_avail(unsigned long paddr);
 
-extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-			       unsigned long pfn,
-			       unsigned long size, pgprot_t prot);
-
 /*
  * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
  * its high 4 bits.  These macros/functions put it there or get it from there.
@@ -762,6 +758,22 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 #define GET_IOSPACE(pfn)		(pfn >> (BITS_PER_LONG - 4))
 #define GET_PFN(pfn)			(pfn & 0x0fffffffffffffffUL)
 
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long,
+			   unsigned long, pgprot_t);
+
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long from, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
+	int space = GET_IOSPACE(pfn);
+	unsigned long phys_base;
+
+	phys_base = offset | (((unsigned long) space) << 32UL);
+
+	return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+}
+
 #include <asm-generic/pgtable.h>
 
 /* We provide our own get_unmapped_area to cope with VA holes and
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 79836a7dd00c..3b6e248650d4 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o
 obj-y                   += fault_$(BITS).o
 obj-y                   += init_$(BITS).o
 obj-$(CONFIG_SPARC32)   += loadmmu.o
-obj-y                   += generic_$(BITS).o
 obj-$(CONFIG_SPARC32)   += extable.o btfixup.o srmmu.o iommu.o io-unit.o
 obj-$(CONFIG_SPARC32)   += hypersparc.o viking.o tsunami.o swift.o
 obj-$(CONFIG_SPARC_LEON)+= leon_mm.o
diff --git a/arch/sparc/mm/generic_32.c b/arch/sparc/mm/generic_32.c
deleted file mode 100644
index e6067b75f11c..000000000000
--- a/arch/sparc/mm/generic_32.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * generic.c: Generic Sparc mm routines that are not dependent upon
- *            MMU type but are Sparc specific.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-/* Remap IO memory, the same way as remap_pfn_range(), but use
- * the obio memory space.
- *
- * They use a pgprot that sets PAGE_IO and does not check the
- * mem_map table as this is independent of normal memory.
- */
-static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	do {
-		set_pte_at(mm, address, pte, mk_pte_io(offset, prot, space));
-		address += PAGE_SIZE;
-		offset += PAGE_SIZE;
-		pte++;
-	} while (address < end);
-}
-
-static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	offset -= address;
-	do {
-		pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
-		if (!pte)
-			return -ENOMEM;
-		io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-	return 0;
-}
-
-int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-		       unsigned long pfn, unsigned long size, pgprot_t prot)
-{
-	int error = 0;
-	pgd_t * dir;
-	unsigned long beg = from;
-	unsigned long end = from + size;
-	struct mm_struct *mm = vma->vm_mm;
-	int space = GET_IOSPACE(pfn);
-	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
-
-	/* See comment in mm/memory.c remap_pfn_range */
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-	vma->vm_pgoff = (offset >> PAGE_SHIFT) |
-		((unsigned long)space << 28UL);
-
-	offset -= from;
-	dir = pgd_offset(mm, from);
-	flush_cache_range(vma, beg, end);
-
-	while (from < end) {
-		pmd_t *pmd = pmd_alloc(mm, dir, from);
-		error = -ENOMEM;
-		if (!pmd)
-			break;
-		error = io_remap_pmd_range(mm, pmd, from, end - from, offset + from, prot, space);
-		if (error)
-			break;
-		from = (from + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	}
-
-	flush_tlb_range(vma, beg, end);
-	return error;
-}
-EXPORT_SYMBOL(io_remap_pfn_range);
diff --git a/arch/sparc/mm/generic_64.c b/arch/sparc/mm/generic_64.c
deleted file mode 100644
index 3cb00dfd4bd6..000000000000
--- a/arch/sparc/mm/generic_64.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * generic.c: Generic Sparc mm routines that are not dependent upon
- *            MMU type but are Sparc specific.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-
-/* Remap IO memory, the same way as remap_pfn_range(), but use
- * the obio memory space.
- *
- * They use a pgprot that sets PAGE_IO and does not check the
- * mem_map table as this is independent of normal memory.
- */
-static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
-				      unsigned long address,
-				      unsigned long size,
-				      unsigned long offset, pgprot_t prot,
-				      int space)
-{
-	unsigned long end;
-
-	/* clear hack bit that was used as a write_combine side-effect flag */
-	offset &= ~0x1UL;
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	do {
-		pte_t entry;
-		unsigned long curend = address + PAGE_SIZE;
-		
-		entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
-		if (!(address & 0xffff)) {
-			if (PAGE_SIZE < (4 * 1024 * 1024) &&
-			    !(address & 0x3fffff) &&
-			    !(offset & 0x3ffffe) &&
-			    end >= address + 0x400000) {
-				entry = mk_pte_io(offset, prot, space,
-						  4 * 1024 * 1024);
-				curend = address + 0x400000;
-				offset += 0x400000;
-			} else if (PAGE_SIZE < (512 * 1024) &&
-				   !(address & 0x7ffff) &&
-				   !(offset & 0x7fffe) &&
-				   end >= address + 0x80000) {
-				entry = mk_pte_io(offset, prot, space,
-						  512 * 1024 * 1024);
-				curend = address + 0x80000;
-				offset += 0x80000;
-			} else if (PAGE_SIZE < (64 * 1024) &&
-				   !(offset & 0xfffe) &&
-				   end >= address + 0x10000) {
-				entry = mk_pte_io(offset, prot, space,
-						  64 * 1024);
-				curend = address + 0x10000;
-				offset += 0x10000;
-			} else
-				offset += PAGE_SIZE;
-		} else
-			offset += PAGE_SIZE;
-
-		if (pte_write(entry))
-			entry = pte_mkdirty(entry);
-		do {
-			BUG_ON(!pte_none(*pte));
-			set_pte_at(mm, address, pte, entry);
-			address += PAGE_SIZE;
-			pte_val(entry) += PAGE_SIZE;
-			pte++;
-		} while (address < curend);
-	} while (address < end);
-}
-
-static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	offset -= address;
-	do {
-		pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
-		if (!pte)
-			return -ENOMEM;
-		io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
-		pte_unmap(pte);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-	return 0;
-}
-
-static inline int io_remap_pud_range(struct mm_struct *mm, pud_t * pud, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PUD_MASK;
-	end = address + size;
-	if (end > PUD_SIZE)
-		end = PUD_SIZE;
-	offset -= address;
-	do {
-		pmd_t *pmd = pmd_alloc(mm, pud, address);
-		if (!pud)
-			return -ENOMEM;
-		io_remap_pmd_range(mm, pmd, address, end - address, address + offset, prot, space);
-		address = (address + PUD_SIZE) & PUD_MASK;
-		pud++;
-	} while (address < end);
-	return 0;
-}
-
-int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-		unsigned long pfn, unsigned long size, pgprot_t prot)
-{
-	int error = 0;
-	pgd_t * dir;
-	unsigned long beg = from;
-	unsigned long end = from + size;
-	struct mm_struct *mm = vma->vm_mm;
-	int space = GET_IOSPACE(pfn);
-	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
-	unsigned long phys_base;
-
-	phys_base = offset | (((unsigned long) space) << 32UL);
-
-	/* See comment in mm/memory.c remap_pfn_range */
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-	vma->vm_pgoff = phys_base >> PAGE_SHIFT;
-
-	offset -= from;
-	dir = pgd_offset(mm, from);
-	flush_cache_range(vma, beg, end);
-
-	while (from < end) {
-		pud_t *pud = pud_alloc(mm, dir, from);
-		error = -ENOMEM;
-		if (!pud)
-			break;
-		error = io_remap_pud_range(mm, pud, from, end - from, offset + from, prot, space);
-		if (error)
-			break;
-		from = (from + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	}
-
-	flush_tlb_range(vma, beg, end);
-	return error;
-}
-EXPORT_SYMBOL(io_remap_pfn_range);
-- 
cgit v1.2.3


From 9dd04b12d0588de337ff615f6991862956dd40de Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 Oct 2011 15:15:58 -0700
Subject: sparc32: Remove non-kernel code from memcpy implementation.

[ Upstream commit 045b7de9ca0cf09f1adc3efa467f668b89238390 ]

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Kjetil Oftedal <oftedal@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/lib/memcpy.S | 607 +-----------------------------------------------
 1 file changed, 2 insertions(+), 605 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
index 34fe65751737..6a8ef5d8daf0 100644
--- a/arch/sparc/lib/memcpy.S
+++ b/arch/sparc/lib/memcpy.S
@@ -7,17 +7,12 @@
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
-#ifdef __KERNEL__
-
-#define FUNC(x) 											\
+#define FUNC(x) 		\
 	.globl	x;		\
 	.type	x,@function;	\
-	.align	4;											\
+	.align	4;		\
 x:
 
-#undef FASTER_REVERSE
-#undef FASTER_NONALIGNED
-#define FASTER_ALIGNED
 
 /* In kernel these functions don't return a value.
  * One should use macros in asm/string.h for that purpose.
@@ -26,21 +21,6 @@ x:
 #define SETUP_RETL
 #define RETL_INSN	clr	%o0
 
-#else
-
-/* libc */
-
-#include "DEFS.h"
-
-#define FASTER_REVERSE
-#define FASTER_NONALIGNED
-#define FASTER_ALIGNED
-
-#define SETUP_RETL	mov	%o0, %g6
-#define RETL_INSN	mov	%g6, %o0
-
-#endif
-
 /* Both these macros have to start with exactly the same insn */
 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 	ldd	[%src + (offset) + 0x00], %t0; \
@@ -164,30 +144,6 @@ x:
 	.text
 	.align	4
 
-#ifdef FASTER_REVERSE
-
-70:	/* rdword_align */
-
-	andcc		%o1, 1, %g0
-	be		4f
-	 andcc		%o1, 2, %g0
-
-	ldub		[%o1 - 1], %g2
-	sub		%o1, 1, %o1
-	stb		%g2, [%o0 - 1]
-	sub		%o2, 1, %o2
-	be		3f
-	 sub		%o0, 1, %o0
-4:
-	lduh		[%o1 - 2], %g2
-	sub		%o1, 2, %o1
-	sth		%g2, [%o0 - 2]
-	sub		%o2, 2, %o2
-	b		3f
-	 sub		%o0, 2, %o0
-
-#endif /* FASTER_REVERSE */
-
 0:
 	retl
 	 nop		! Only bcopy returns here and it retuns void...
@@ -207,8 +163,6 @@ FUNC(memmove)
 	bleu		0f
 	 andcc		%o4, 3, %o5
 
-#ifndef FASTER_REVERSE
-
 	add		%o1, %o2, %o1
 	add		%o0, %o2, %o0
 	sub		%o1, 1, %o1
@@ -226,294 +180,6 @@ FUNC(memmove)
 	retl
 	 RETL_INSN
 
-#else /* FASTER_REVERSE */
-
-	add		%o1, %o2, %o1
-	add		%o0, %o2, %o0
-	bne		77f
-	 cmp		%o2, 15
-	bleu		91f
-	 andcc		%o1, 3, %g0
-	bne		70b
-3:
-	 andcc		%o1, 4, %g0
-
-	be		2f
-	 mov		%o2, %g1
-
-	ld		[%o1 - 4], %o4
-	sub		%g1, 4, %g1
-	st		%o4, [%o0 - 4]
-	sub		%o1, 4, %o1
-	sub		%o0, 4, %o0
-2:
-	andcc		%g1, 0xffffff80, %g7
-	be		3f
-	 andcc		%o0, 4, %g0
-
-	be		74f + 4
-5:
-	RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
-	sub		%o1, 128, %o1
-	bne		5b
-	 sub		%o0, 128, %o0
-3:
-	andcc		%g1, 0x70, %g7
-	be		72f
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(72f), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	sub		%o1, %g7, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(72f), %g0
-	 sub		%o0, %g7, %o0
-
-71:	/* rmemcpy_table */
-	RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-
-72:	/* rmemcpy_table_end */
-
-	be		73f
-	 andcc		%g1, 4, %g0
-
-	ldd		[%o1 - 0x08], %g2
-	sub		%o0, 8, %o0
-	sub		%o1, 8, %o1
-	st		%g2, [%o0]
-	st		%g3, [%o0 + 0x04]
-
-73:	/* rmemcpy_last7 */
-
-	be		1f
-	 andcc		%g1, 2, %g0
-
-	ld		[%o1 - 4], %g2
-	sub		%o1, 4, %o1
-	st		%g2, [%o0 - 4]
-	sub		%o0, 4, %o0
-1:
-	be		1f
-	 andcc		%g1, 1, %g0
-
-	lduh		[%o1 - 2], %g2
-	sub		%o1, 2, %o1
-	sth		%g2, [%o0 - 2]
-	sub		%o0, 2, %o0
-1:
-	be		1f
-	 nop
-
-	ldub		[%o1 - 1], %g2
-	stb		%g2, [%o0 - 1]
-1:
-	retl
- 	 RETL_INSN
-
-74:	/* rldd_std */
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
-	sub		%o1, 128, %o1
-	bne		74b
-	 sub		%o0, 128, %o0
-
-	andcc		%g1, 0x70, %g7
-	be		72b
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(72b), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	sub		%o1, %g7, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(72b), %g0
-	 sub		%o0, %g7, %o0
-
-75:	/* rshort_end */
-
-	and		%o2, 0xe, %o3
-2:
-	sethi		%hi(76f), %o5
-	sll		%o3, 3, %o4
-	sub		%o0, %o3, %o0
-	sub		%o5, %o4, %o5
-	sub		%o1, %o3, %o1
-	jmpl		%o5 + %lo(76f), %g0
-	 andcc		%o2, 1, %g0
-
-	RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
-
-76:	/* rshort_table_end */
-
-	be		1f
-	 nop
-	ldub		[%o1 - 1], %g2
-	stb		%g2, [%o0 - 1]
-1:
-	retl
- 	 RETL_INSN
-
-91:	/* rshort_aligned_end */
-
-	bne		75b
-	 andcc		%o2, 8, %g0
-
-	be		1f
-	 andcc		%o2, 4, %g0
-
-	ld		[%o1 - 0x08], %g2
-	ld		[%o1 - 0x04], %g3
-	sub		%o1, 8, %o1
-	st		%g2, [%o0 - 0x08]
-	st		%g3, [%o0 - 0x04]
-	sub		%o0, 8, %o0
-1:
-	b		73b
-	 mov		%o2, %g1
-
-77:	/* rnon_aligned */
-	cmp		%o2, 15
-	bleu		75b
-	 andcc		%o0, 3, %g0
-	be		64f
-	 andcc		%o0, 1, %g0
-	be		63f
-	 andcc		%o0, 2, %g0
-	ldub		[%o1 - 1], %g5
-	sub		%o1, 1, %o1
-	stb		%g5, [%o0 - 1]
-	sub		%o0, 1, %o0
-	be		64f
-	 sub		%o2, 1, %o2
-63:
-	ldub		[%o1 - 1], %g5
-	sub		%o1, 2, %o1
-	stb		%g5, [%o0 - 1]
-	sub		%o0, 2, %o0
-	ldub		[%o1], %g5
-	sub		%o2, 2, %o2
-	stb		%g5, [%o0]
-64:	
-	and		%o1, 3, %g2
-	and		%o1, -4, %o1
-	and		%o2, 0xc, %g3
-	add		%o1, 4, %o1
-	cmp		%g3, 4
-	sll		%g2, 3, %g4
-	mov		32, %g2
-	be		4f
-	 sub		%g2, %g4, %g7
-
-	blu		3f
-	 cmp		%g3, 8
-
-	be		2f
-	 srl		%o2, 2, %g3
-
-	ld		[%o1 - 4], %o3
-	add		%o0, -8, %o0
-	ld		[%o1 - 8], %o4
-	add		%o1, -16, %o1
-	b		7f
-	 add		%g3, 1, %g3
-2:
-	ld		[%o1 - 4], %o4
-	add		%o0, -4, %o0
-	ld		[%o1 - 8], %g1
-	add		%o1, -12, %o1
-	b		8f
-	 add		%g3, 2, %g3
-3:
-	ld		[%o1 - 4], %o5
-	add		%o0, -12, %o0
-	ld		[%o1 - 8], %o3
-	add		%o1, -20, %o1
-	b		6f
-	 srl		%o2, 2, %g3
-4:
-	ld		[%o1 - 4], %g1
-	srl		%o2, 2, %g3
-	ld		[%o1 - 8], %o5
-	add		%o1, -24, %o1
-	add		%o0, -16, %o0
-	add		%g3, -1, %g3
-
-	ld		[%o1 + 12], %o3
-5:
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 12]
-6:
-	ld		[%o1 + 8], %o4
-	sll		%o3, %g4, %g2
-	srl		%o5, %g7, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 8]
-7:
-	ld		[%o1 + 4], %g1
-	sll		%o4, %g4, %g2
-	srl		%o3, %g7, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 4]
-8:
-	ld		[%o1], %o5
-	sll		%g1, %g4, %g2
-	srl		%o4, %g7, %g5
-	addcc		%g3, -4, %g3
-	or		%g2, %g5, %g2
-	add		%o1, -16, %o1
-	st		%g2, [%o0]
-	add		%o0, -16, %o0
-	bne,a		5b	
-	 ld		[%o1 + 12], %o3
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
-	srl		%g4, 3, %g3
-	or		%g2, %g5, %g2
-	add		%o1, %g3, %o1
-	andcc		%o2, 2, %g0
-	st		%g2, [%o0 + 12]
-	be		1f
-	 andcc		%o2, 1, %g0
-	
-	ldub		[%o1 + 15], %g5
-	add		%o1, -2, %o1
-	stb		%g5, [%o0 + 11]
-	add		%o0, -2, %o0
-	ldub		[%o1 + 16], %g5
-	stb		%g5, [%o0 + 12]
-1:
-	be		1f
-	 nop
-	ldub		[%o1 + 15], %g5
-	stb		%g5, [%o0 + 11]
-1:
-	retl
-	 RETL_INSN
-
-#endif /* FASTER_REVERSE */
-
 /* NOTE: This code is executed just for the cases,
          where %src (=%o1) & 3 is != 0.
 	 We need to align it to 4. So, for (%src & 3)
@@ -653,22 +319,6 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	bne		82b
 	 add		%o0, 128, %o0
 
-#ifndef FASTER_ALIGNED
-
-	andcc		%g1, 0x70, %g7
-	be		80b
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(80b), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	add		%o1, %g7, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(80b), %g0
-	 add		%o0, %g7, %o0
-
-#else /* FASTER_ALIGNED */
-
 	andcc		%g1, 0x70, %g7
 	be		84f
 	 andcc		%g1, 8, %g0
@@ -723,19 +373,9 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	retl
  	 RETL_INSN
 
-#endif /* FASTER_ALIGNED */
-
 86:	/* non_aligned */
 	cmp		%o2, 6
 	bleu		88f
-
-#ifdef FASTER_NONALIGNED
-
-	 cmp		%o2, 256
-	bcc		87f
-
-#endif /* FASTER_NONALIGNED */
-
 	 andcc		%o0, 3, %g0
 	be		61f
 	 andcc		%o0, 1, %g0
@@ -855,249 +495,6 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	retl
 	 RETL_INSN
 
-#ifdef FASTER_NONALIGNED
-
-87:	/* faster_nonaligned */
-
-	andcc		%o1, 3, %g0
-	be		3f
-	 andcc		%o1, 1, %g0
-
-	be		4f
-	 andcc		%o1, 2, %g0
-
-	ldub		[%o1], %g2
-	add		%o1, 1, %o1
-	stb		%g2, [%o0]
-	sub		%o2, 1, %o2
-	bne		3f
-	 add		%o0, 1, %o0
-4:
-	lduh		[%o1], %g2
-	add		%o1, 2, %o1
-	srl		%g2, 8, %g3
-	sub		%o2, 2, %o2
-	stb		%g3, [%o0]
-	add		%o0, 2, %o0
-	stb		%g2, [%o0 - 1]
-3:
-	 andcc		%o1, 4, %g0
-
-	bne		2f
-	 cmp		%o5, 1
-
-	ld		[%o1], %o4
-	srl		%o4, 24, %g2
-	stb		%g2, [%o0]
-	srl		%o4, 16, %g3
-	stb		%g3, [%o0 + 1]
-	srl		%o4, 8, %g2
-	stb		%g2, [%o0 + 2]
-	sub		%o2, 4, %o2
-	stb		%o4, [%o0 + 3]
-	add		%o1, 4, %o1
-	add		%o0, 4, %o0
-2:
-	be		33f
-	 cmp		%o5, 2
-	be		32f
-	 sub		%o2, 4, %o2
-31:
-	ld		[%o1], %g2
-	add		%o1, 4, %o1
-	srl		%g2, 24, %g3
-	and		%o0, 7, %g5
-	stb		%g3, [%o0]
-	cmp		%g5, 7
-	sll		%g2, 8, %g1
-	add		%o0, 4, %o0
-	be		41f
-	 and		%o2, 0xffffffc0, %o3
-	ld		[%o0 - 7], %o4
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		4b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	st		%o4, [%o0 - 7]
-	sth		%g2, [%o0 - 3]
-	srl		%g1, 8, %g4
-	b		88f
-	 stb		%g4, [%o0 - 1]
-32:
-	ld		[%o1], %g2
-	add		%o1, 4, %o1
-	srl		%g2, 16, %g3
-	and		%o0, 7, %g5
-	sth		%g3, [%o0]
-	cmp		%g5, 6
-	sll		%g2, 16, %g1
-	add		%o0, 4, %o0
-	be		42f
-	 and		%o2, 0xffffffc0, %o3
-	ld		[%o0 - 6], %o4
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		4b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	st		%o4, [%o0 - 6]
-	b		88f
-	 sth		%g2, [%o0 - 2]
-33:
-	ld		[%o1], %g2
-	sub		%o2, 4, %o2
-	srl		%g2, 24, %g3
-	and		%o0, 7, %g5
-	stb		%g3, [%o0]
-	cmp		%g5, 5
-	srl		%g2, 8, %g4
-	sll		%g2, 24, %g1
-	sth		%g4, [%o0 + 1]
-	add		%o1, 4, %o1
-	be		43f
-	 and		%o2, 0xffffffc0, %o3
-
-	ld		[%o0 - 1], %o4
-	add		%o0, 4, %o0
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		4b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 24, %g2
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 24, %g2
-1:
-	st		%o4, [%o0 - 5]
-	b		88f
-	 stb		%g2, [%o0 - 1]
-41:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		41b
-	 add		%o0, 64, %o0
-	 
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	sth		%g2, [%o0 - 3]
-	srl		%g1, 8, %g4
-	b		88f
-	 stb		%g4, [%o0 - 1]
-43:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		43b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 24, %g2
-4:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 24, %g2
-1:
-	stb		%g2, [%o0 + 3]
-	b		88f
-	 add		%o0, 4, %o0
-42:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		42b
-	 add		%o0, 64, %o0
-	 
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	sth		%g2, [%o0 - 2]
-
-	/* Fall through */
-	 
-#endif /* FASTER_NONALIGNED */
-
 88:	/* short_end */
 
 	and		%o2, 0xe, %o3
-- 
cgit v1.2.3


From 2588f7f219ab1e93be648ecf8bcb1599b1ecba09 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 Oct 2011 15:30:14 -0700
Subject: sparc32: Remove uses of %g7 in memcpy implementation.

[ Upstream commit 21f74d361dfd6a7d0e47574e315f780d8172084a ]

This is setting things up so that we can correct the return
value, so that it properly returns the original destination
buffer pointer.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Kjetil Oftedal <oftedal@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/lib/memcpy.S | 179 +++++++++++++++++++++++++-----------------------
 1 file changed, 92 insertions(+), 87 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
index 6a8ef5d8daf0..90a598846bab 100644
--- a/arch/sparc/lib/memcpy.S
+++ b/arch/sparc/lib/memcpy.S
@@ -235,7 +235,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	add		%o1, 4, %o1
 	add		%o0, 4, %o0
 2:
-	andcc		%g1, 0xffffff80, %g7
+	andcc		%g1, 0xffffff80, %g0
 	be		3f
 	 andcc		%o0, 4, %g0
 
@@ -245,22 +245,23 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
+	sub		%g1, 128, %g1
 	add		%o1, 128, %o1
-	bne		5b
+	cmp		%g1, 128
+	bge		5b
 	 add		%o0, 128, %o0
 3:
-	andcc		%g1, 0x70, %g7
+	andcc		%g1, 0x70, %g4
 	be		80f
 	 andcc		%g1, 8, %g0
 
 	sethi		%hi(80f), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	add		%o1, %g7, %o1
+	srl		%g4, 1, %o4
+	add		%g4, %o4, %o4
+	add		%o1, %g4, %o1
 	sub		%o5, %o4, %o5
 	jmpl		%o5 + %lo(80f), %g0
-	 add		%o0, %g7, %o0
+	 add		%o0, %g4, %o0
 
 79:	/* memcpy_table */
 
@@ -314,20 +315,21 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
+	subcc		%g1, 128, %g1
 	add		%o1, 128, %o1
-	bne		82b
+	cmp		%g1, 128
+	bge		82b
 	 add		%o0, 128, %o0
 
-	andcc		%g1, 0x70, %g7
+	andcc		%g1, 0x70, %g4
 	be		84f
 	 andcc		%g1, 8, %g0
 
 	sethi		%hi(84f), %o5
-	add		%o1, %g7, %o1
-	sub		%o5, %g7, %o5
+	add		%o1, %g4, %o1
+	sub		%o5, %g4, %o5
 	jmpl		%o5 + %lo(84f), %g0
-	 add		%o0, %g7, %o0
+	 add		%o0, %g4, %o0
 
 83:	/* amemcpy_table */
 
@@ -376,124 +378,127 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 86:	/* non_aligned */
 	cmp		%o2, 6
 	bleu		88f
-	 andcc		%o0, 3, %g0
+	 nop
+
+	save		%sp, -96, %sp
+	andcc		%i0, 3, %g0
 	be		61f
-	 andcc		%o0, 1, %g0
+	 andcc		%i0, 1, %g0
 	be		60f
-	 andcc		%o0, 2, %g0
+	 andcc		%i0, 2, %g0
 
-	ldub		[%o1], %g5
-	add		%o1, 1, %o1
-	stb		%g5, [%o0]
-	sub		%o2, 1, %o2
+	ldub		[%i1], %g5
+	add		%i1, 1, %i1
+	stb		%g5, [%i0]
+	sub		%i2, 1, %i2
 	bne		61f
-	 add		%o0, 1, %o0
+	 add		%i0, 1, %i0
 60:
-	ldub		[%o1], %g3
-	add		%o1, 2, %o1
-	stb		%g3, [%o0]
-	sub		%o2, 2, %o2
-	ldub		[%o1 - 1], %g3
-	add		%o0, 2, %o0
-	stb		%g3, [%o0 - 1]
+	ldub		[%i1], %g3
+	add		%i1, 2, %i1
+	stb		%g3, [%i0]
+	sub		%i2, 2, %i2
+	ldub		[%i1 - 1], %g3
+	add		%i0, 2, %i0
+	stb		%g3, [%i0 - 1]
 61:
-	and		%o1, 3, %g2
-	and		%o2, 0xc, %g3
-	and		%o1, -4, %o1
+	and		%i1, 3, %g2
+	and		%i2, 0xc, %g3
+	and		%i1, -4, %i1
 	cmp		%g3, 4
 	sll		%g2, 3, %g4
 	mov		32, %g2
 	be		4f
-	 sub		%g2, %g4, %g7
+	 sub		%g2, %g4, %l0
 	
 	blu		3f
 	 cmp		%g3, 0x8
 
 	be		2f
-	 srl		%o2, 2, %g3
+	 srl		%i2, 2, %g3
 
-	ld		[%o1], %o3
-	add		%o0, -8, %o0
-	ld		[%o1 + 4], %o4
+	ld		[%i1], %i3
+	add		%i0, -8, %i0
+	ld		[%i1 + 4], %i4
 	b		8f
 	 add		%g3, 1, %g3
 2:
-	ld		[%o1], %o4
-	add		%o0, -12, %o0
-	ld		[%o1 + 4], %o5
+	ld		[%i1], %i4
+	add		%i0, -12, %i0
+	ld		[%i1 + 4], %i5
 	add		%g3, 2, %g3
 	b		9f
-	 add		%o1, -4, %o1
+	 add		%i1, -4, %i1
 3:
-	ld		[%o1], %g1
-	add		%o0, -4, %o0
-	ld		[%o1 + 4], %o3
-	srl		%o2, 2, %g3
+	ld		[%i1], %g1
+	add		%i0, -4, %i0
+	ld		[%i1 + 4], %i3
+	srl		%i2, 2, %g3
 	b		7f
-	 add		%o1, 4, %o1
+	 add		%i1, 4, %i1
 4:
-	ld		[%o1], %o5
-	cmp		%o2, 7
-	ld		[%o1 + 4], %g1
-	srl		%o2, 2, %g3
+	ld		[%i1], %i5
+	cmp		%i2, 7
+	ld		[%i1 + 4], %g1
+	srl		%i2, 2, %g3
 	bleu		10f
-	 add		%o1, 8, %o1
+	 add		%i1, 8, %i1
 
-	ld		[%o1], %o3
+	ld		[%i1], %i3
 	add		%g3, -1, %g3
 5:
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
+	sll		%i5, %g4, %g2
+	srl		%g1, %l0, %g5
 	or		%g2, %g5, %g2
-	st		%g2, [%o0]
+	st		%g2, [%i0]
 7:
-	ld		[%o1 + 4], %o4
+	ld		[%i1 + 4], %i4
 	sll		%g1, %g4, %g2
-	srl		%o3, %g7, %g5
+	srl		%i3, %l0, %g5
 	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 4]
+	st		%g2, [%i0 + 4]
 8:
-	ld		[%o1 + 8], %o5
-	sll		%o3, %g4, %g2
-	srl		%o4, %g7, %g5
+	ld		[%i1 + 8], %i5
+	sll		%i3, %g4, %g2
+	srl		%i4, %l0, %g5
 	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 8]
+	st		%g2, [%i0 + 8]
 9:
-	ld		[%o1 + 12], %g1
-	sll		%o4, %g4, %g2
-	srl		%o5, %g7, %g5
+	ld		[%i1 + 12], %g1
+	sll		%i4, %g4, %g2
+	srl		%i5, %l0, %g5
 	addcc		%g3, -4, %g3
 	or		%g2, %g5, %g2
-	add		%o1, 16, %o1
-	st		%g2, [%o0 + 12]
-	add		%o0, 16, %o0
+	add		%i1, 16, %i1
+	st		%g2, [%i0 + 12]
+	add		%i0, 16, %i0
 	bne,a		5b
-	 ld		[%o1], %o3
+	 ld		[%i1], %i3
 10:
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
-	srl		%g7, 3, %g3
+	sll		%i5, %g4, %g2
+	srl		%g1, %l0, %g5
+	srl		%l0, 3, %g3
 	or		%g2, %g5, %g2
-	sub		%o1, %g3, %o1
-	andcc		%o2, 2, %g0
-	st		%g2, [%o0]
+	sub		%i1, %g3, %i1
+	andcc		%i2, 2, %g0
+	st		%g2, [%i0]
 	be		1f
-	 andcc		%o2, 1, %g0
-
-	ldub		[%o1], %g2
-	add		%o1, 2, %o1
-	stb		%g2, [%o0 + 4]
-	add		%o0, 2, %o0
-	ldub		[%o1 - 1], %g2
-	stb		%g2, [%o0 + 3]
+	 andcc		%i2, 1, %g0
+
+	ldub		[%i1], %g2
+	add		%i1, 2, %i1
+	stb		%g2, [%i0 + 4]
+	add		%i0, 2, %i0
+	ldub		[%i1 - 1], %g2
+	stb		%g2, [%i0 + 3]
 1:
 	be		1f
 	 nop
-	ldub		[%o1], %g2
-	stb		%g2, [%o0 + 4]
+	ldub		[%i1], %g2
+	stb		%g2, [%i0 + 4]
 1:
-	retl
-	 RETL_INSN
+	ret
+	 restore	%g0, %g0, %o0
 
 88:	/* short_end */
 
-- 
cgit v1.2.3


From 23a652b45554faf08ac5a7a86c176a95cce8ca83 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 Oct 2011 15:31:55 -0700
Subject: sparc32: Correct the return value of memcpy.

[ Upstream commit a52312b88c8103e965979a79a07f6b34af82ca4b ]

Properly return the original destination buffer pointer.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Kjetil Oftedal <oftedal@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/lib/memcpy.S | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
index 90a598846bab..4d8c497517bd 100644
--- a/arch/sparc/lib/memcpy.S
+++ b/arch/sparc/lib/memcpy.S
@@ -13,14 +13,6 @@
 	.align	4;		\
 x:
 
-
-/* In kernel these functions don't return a value.
- * One should use macros in asm/string.h for that purpose.
- * We return 0, so that bugs are more apparent.
- */
-#define SETUP_RETL
-#define RETL_INSN	clr	%o0
-
 /* Both these macros have to start with exactly the same insn */
 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 	ldd	[%src + (offset) + 0x00], %t0; \
@@ -154,7 +146,7 @@ FUNC(__memmove)
 #endif
 FUNC(memmove)
 	cmp		%o0, %o1
-	SETUP_RETL
+	mov		%o0, %g7
 	bleu		9f
 	 sub		%o0, %o1, %o4
 
@@ -178,7 +170,7 @@ FUNC(memmove)
 	 sub		%o0, 1, %o0
 
 	retl
-	 RETL_INSN
+	 mov		%g7, %o0
 
 /* NOTE: This code is executed just for the cases,
          where %src (=%o1) & 3 is != 0.
@@ -212,7 +204,7 @@ FUNC(memmove)
 FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 
 	sub		%o0, %o1, %o4
-	SETUP_RETL
+	mov		%o0, %g7
 9:
 	andcc		%o4, 3, %o5
 0:
@@ -308,7 +300,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%o0]
 1:
 	retl
- 	 RETL_INSN
+	 mov		%g7, %o0
 
 82:	/* ldd_std */
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
@@ -373,7 +365,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%o0]
 1:
 	retl
- 	 RETL_INSN
+	 mov		%g7, %o0
 
 86:	/* non_aligned */
 	cmp		%o2, 6
@@ -498,7 +490,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%i0 + 4]
 1:
 	ret
-	 restore	%g0, %g0, %o0
+	 restore	%g7, %g0, %o0
 
 88:	/* short_end */
 
@@ -529,7 +521,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%o0]
 1:
 	retl
- 	 RETL_INSN
+	 mov		%g7, %o0
 
 90:	/* short_aligned_end */
 	bne		88b
-- 
cgit v1.2.3


From d4afed4d20e91a12de7cd1c64bb3451ee2236d19 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 31 Oct 2011 01:05:49 -0700
Subject: sparc64: Fix masking and shifting in VIS fpcmp emulation.

[ Upstream commit 2e8ecdc008a16b9a6c4b9628bb64d0d1c05f9f92 ]

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/kernel/visemul.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
index 36357717d691..9384a0cbeba4 100644
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -713,17 +713,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a > b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPGT32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a > b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 
@@ -733,17 +733,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a <= b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPLE32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a <= b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 
@@ -753,17 +753,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a != b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPNE32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a != b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 
@@ -773,17 +773,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a == b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPEQ32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a == b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 	}
-- 
cgit v1.2.3


From 2a89fc8b91abf1ba56daa23b05e6572b30331837 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 26 Dec 2011 12:30:13 -0500
Subject: sparc: Fix handling of orig_i0 wrt. debugging when restarting
 syscalls.

[ A combination of upstream commits 1d299bc7732c34d85bd43ac1a8745f5a2fed2078 and
  e88d2468718b0789b4c33da2f7e1cef2a1eee279 ]

Although we provide a proper way for a debugger to control whether
syscall restart occurs, we run into problems because orig_i0 is not
saved and restored properly.

Luckily we can solve this problem without having to make debuggers
aware of the issue.  Across system calls, several registers are
considered volatile and can be safely clobbered.

Therefore we use the pt_regs save area of one of those registers, %g6,
as a place to save and restore orig_i0.

Debuggers transparently will do the right thing because they save and
restore this register already.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sparc/kernel/signal32.c  | 18 ++++++++++--------
 arch/sparc/kernel/signal_32.c | 30 +++++++++++++++++++++++++-----
 arch/sparc/kernel/signal_64.c | 42 ++++++++++++++++++++++++++++--------------
 3 files changed, 63 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 5d92488fc167..2e58328c30e0 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -829,21 +829,23 @@ static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * mistake.
  */
-void do_signal32(sigset_t *oldset, struct pt_regs * regs,
-		 int restart_syscall, unsigned long orig_i0)
+void do_signal32(sigset_t *oldset, struct pt_regs * regs)
 {
 	struct k_sigaction ka;
+	unsigned long orig_i0;
+	int restart_syscall;
 	siginfo_t info;
 	int signr;
 	
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
-	/* If the debugger messes with the program counter, it clears
-	 * the "in syscall" bit, directing us to not perform a syscall
-	 * restart.
-	 */
-	if (restart_syscall && !pt_regs_is_syscall(regs))
-		restart_syscall = 0;
+	restart_syscall = 0;
+	orig_i0 = 0;
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
 
 	if (signr > 0) {
 		if (restart_syscall)
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 04ede8f04add..2302567578ba 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -525,10 +525,26 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	siginfo_t info;
 	int signr;
 
+	/* It's a lot of work and synchronization to add a new ptrace
+	 * register for GDB to save and restore in order to get
+	 * orig_i0 correct for syscall restarts when debugging.
+	 *
+	 * Although it should be the case that most of the global
+	 * registers are volatile across a system call, glibc already
+	 * depends upon that fact that we preserve them.  So we can't
+	 * just use any global register to save away the orig_i0 value.
+	 *
+	 * In particular %g2, %g3, %g4, and %g5 are all assumed to be
+	 * preserved across a system call trap by various pieces of
+	 * code in glibc.
+	 *
+	 * %g7 is used as the "thread register".   %g6 is not used in
+	 * any fixed manner.  %g6 is used as a scratch register and
+	 * a compiler temporary, but it's value is never used across
+	 * a system call.  Therefore %g6 is usable for orig_i0 storage.
+	 */
 	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C))
-		restart_syscall = 1;
-	else
-		restart_syscall = 0;
+		regs->u_regs[UREG_G6] = orig_i0;
 
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
@@ -541,8 +557,12 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	 * the software "in syscall" bit, directing us to not perform
 	 * a syscall restart.
 	 */
-	if (restart_syscall && !pt_regs_is_syscall(regs))
-		restart_syscall = 0;
+	restart_syscall = 0;
+	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C)) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
+
 
 	if (signr > 0) {
 		if (restart_syscall)
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 47509df3b893..d58260bff2df 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -535,11 +535,27 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	siginfo_t info;
 	int signr;
 	
+	/* It's a lot of work and synchronization to add a new ptrace
+	 * register for GDB to save and restore in order to get
+	 * orig_i0 correct for syscall restarts when debugging.
+	 *
+	 * Although it should be the case that most of the global
+	 * registers are volatile across a system call, glibc already
+	 * depends upon that fact that we preserve them.  So we can't
+	 * just use any global register to save away the orig_i0 value.
+	 *
+	 * In particular %g2, %g3, %g4, and %g5 are all assumed to be
+	 * preserved across a system call trap by various pieces of
+	 * code in glibc.
+	 *
+	 * %g7 is used as the "thread register".   %g6 is not used in
+	 * any fixed manner.  %g6 is used as a scratch register and
+	 * a compiler temporary, but it's value is never used across
+	 * a system call.  Therefore %g6 is usable for orig_i0 storage.
+	 */
 	if (pt_regs_is_syscall(regs) &&
-	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
-		restart_syscall = 1;
-	} else
-		restart_syscall = 0;
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
+		regs->u_regs[UREG_G6] = orig_i0;
 
 	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
 		oldset = &current->saved_sigmask;
@@ -548,22 +564,20 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 
 #ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT)) {
-		extern void do_signal32(sigset_t *, struct pt_regs *,
-					int restart_syscall,
-					unsigned long orig_i0);
-		do_signal32(oldset, regs, restart_syscall, orig_i0);
+		extern void do_signal32(sigset_t *, struct pt_regs *);
+		do_signal32(oldset, regs);
 		return;
 	}
 #endif	
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
-	/* If the debugger messes with the program counter, it clears
-	 * the software "in syscall" bit, directing us to not perform
-	 * a syscall restart.
-	 */
-	if (restart_syscall && !pt_regs_is_syscall(regs))
-		restart_syscall = 0;
+	restart_syscall = 0;
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
 
 	if (signr > 0) {
 		if (restart_syscall)
-- 
cgit v1.2.3


From b3c5fb8252b04ec02654f80988e68852f1a14cb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20K=C3=B6tter?= <nepenthesdev@gmail.com>
Date: Sat, 17 Dec 2011 11:39:08 +0000
Subject: net: bpf_jit: fix an off-one bug in x86_64 cond jump target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit a03ffcf873fe0f2565386ca8ef832144c42e67fa ]

x86 jump instruction size is 2 or 5 bytes (near/long jump), not 2 or 6
bytes.

In case a conditional jump is followed by a long jump, conditional jump
target is one byte past the start of target instruction.

Signed-off-by: Markus Kötter <nepenthesdev@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/net/bpf_jit_comp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index bfab3fa10edc..7b65f752c5f8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -568,8 +568,8 @@ cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
 					break;
 				}
 				if (filter[i].jt != 0) {
-					if (filter[i].jf)
-						t_offset += is_near(f_offset) ? 2 : 6;
+					if (filter[i].jf && f_offset)
+						t_offset += is_near(f_offset) ? 2 : 5;
 					EMIT_COND_JMP(t_op, t_offset);
 					if (filter[i].jf)
 						EMIT_JMP(f_offset);
-- 
cgit v1.2.3


From bfaebb8af017d443937022cf3d5735d726e4f711 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:17 +0000
Subject: powerpc/time: Handle wrapping of decrementer

commit 37fb9a0231ee43d42d069863bdfd567fca2b61af upstream.

When re-enabling interrupts we have code to handle edge sensitive
decrementers by resetting the decrementer to 1 whenever it is negative.
If interrupts were disabled long enough that the decrementer wrapped to
positive we do nothing. This means interrupts can be delayed for a long
time until it finally goes negative again.

While we hope interrupts are never be disabled long enough for the
decrementer to go positive, we have a very good test team that can
drive any kernel into the ground. The softlockup data we get back
from these fails could be seconds in the future, completely missing
the cause of the lockup.

We already keep track of the timebase of the next event so use that
to work out if we should trigger a decrementer exception.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/include/asm/time.h |  2 ++
 arch/powerpc/kernel/irq.c       | 15 ++++++---------
 arch/powerpc/kernel/time.c      |  9 +++++++++
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index fe6f7c2c9c68..bc3c745cb906 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -219,5 +219,7 @@ DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
 extern void secondary_cpu_time_init(void);
 extern void iSeries_time_init_early(void);
 
+extern void decrementer_check_overflow(void);
+
 #endif /* __KERNEL__ */
 #endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5b428e308666..ca2987d939f5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -170,16 +170,13 @@ notrace void arch_local_irq_restore(unsigned long en)
 	 */
 	local_paca->hard_enabled = en;
 
-#ifndef CONFIG_BOOKE
-	/* On server, re-trigger the decrementer if it went negative since
-	 * some processors only trigger on edge transitions of the sign bit.
-	 *
-	 * BookE has a level sensitive decrementer (latches in TSR) so we
-	 * don't need that
+	/*
+	 * Trigger the decrementer if we have a pending event. Some processors
+	 * only trigger on edge transitions of the sign bit. We might also
+	 * have disabled interrupts long enough that the decrementer wrapped
+	 * to positive.
 	 */
-	if ((int)mfspr(SPRN_DEC) < 0)
-		mtspr(SPRN_DEC, 1);
-#endif /* CONFIG_BOOKE */
+	decrementer_check_overflow();
 
 	/*
 	 * Force the delivery of pending soft-disabled interrupts on PS3.
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 03b29a6759ab..2de304af07ab 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -889,6 +889,15 @@ static void __init clocksource_init(void)
 	       clock->name, clock->mult, clock->shift);
 }
 
+void decrementer_check_overflow(void)
+{
+	u64 now = get_tb_or_rtc();
+	struct decrementer_clock *decrementer = &__get_cpu_var(decrementers);
+
+	if (now >= decrementer->next_tb)
+		set_dec(1);
+}
+
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev)
 {
-- 
cgit v1.2.3


From ec0d3233562d591c892841fcfa73edd3700a6ca6 Mon Sep 17 00:00:00 2001
From: Li Zhong <zhong@linux.vnet.ibm.com>
Date: Sun, 18 Dec 2011 16:03:04 +0000
Subject: powerpc: Fix unpaired probe_hcall_entry and probe_hcall_exit

commit e4f387d8db3ba3c2dae4d8bdfe7bb5f4fe1bcb0d upstream.

Unpaired calling of probe_hcall_entry and probe_hcall_exit might happen
as following, which could cause incorrect preempt count.

__trace_hcall_entry => trace_hcall_entry -> probe_hcall_entry =>
get_cpu_var => preempt_disable

__trace_hcall_exit => trace_hcall_exit -> probe_hcall_exit =>
put_cpu_var => preempt_enable

where:
A => B and A -> B means A calls B, but
=> means A will call B through function name, and B will definitely be
called.
-> means A will call B through function pointer, so B might not be
called if the function pointer is not set.

So error happens when only one of probe_hcall_entry and probe_hcall_exit
get called during a hcall.

This patch tries to move the preempt count operations from
probe_hcall_entry and probe_hcall_exit to its callers.

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/platforms/pseries/hvCall_inst.c | 4 +---
 arch/powerpc/platforms/pseries/lpar.c        | 2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index f106662f4381..c9311cfdfcac 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -109,7 +109,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long
 	if (opcode > MAX_HCALL_OPCODE)
 		return;
 
-	h = &get_cpu_var(hcall_stats)[opcode / 4];
+	h = &__get_cpu_var(hcall_stats)[opcode / 4];
 	h->tb_start = mftb();
 	h->purr_start = mfspr(SPRN_PURR);
 }
@@ -126,8 +126,6 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long
 	h->num_calls++;
 	h->tb_total += mftb() - h->tb_start;
 	h->purr_total += mfspr(SPRN_PURR) - h->purr_start;
-
-	put_cpu_var(hcall_stats);
 }
 
 static int __init hcall_inst_init(void)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ed96b3765377..81e30d96f83f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -745,6 +745,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 		goto out;
 
 	(*depth)++;
+	preempt_disable();
 	trace_hcall_entry(opcode, args);
 	(*depth)--;
 
@@ -767,6 +768,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
 
 	(*depth)++;
 	trace_hcall_exit(opcode, retval, retbuf);
+	preempt_enable();
 	(*depth)--;
 
 out:
-- 
cgit v1.2.3


From 0a4179971477550df61b9218e664eb9128abf2e3 Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Mon, 14 Nov 2011 15:42:16 -0800
Subject: x86/PCI: Ignore CPU non-addressable _CRS reserved memory resources

commit ae5cd86455381282ece162966183d3f208c6fad7 upstream.

This assures that a _CRS reserved host bridge window or window region is
not used if it is not addressable by the CPU.  The new code either trims
the window to exclude the non-addressable portion or totally ignores the
window if the entire window is non-addressable.

The current code has been shown to be problematic with 32-bit non-PAE
kernels on systems where _CRS reserves resources above 4GB.

Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Thomas Renninger <trenn@novell.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/pci/acpi.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 50b3f14c59a1..53f9e684c819 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -149,7 +149,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 	struct acpi_resource_address64 addr;
 	acpi_status status;
 	unsigned long flags;
-	u64 start, end;
+	u64 start, orig_end, end;
 
 	status = resource_to_addr(acpi_res, &addr);
 	if (!ACPI_SUCCESS(status))
@@ -165,7 +165,21 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 		return AE_OK;
 
 	start = addr.minimum + addr.translation_offset;
-	end = addr.maximum + addr.translation_offset;
+	orig_end = end = addr.maximum + addr.translation_offset;
+
+	/* Exclude non-addressable range or non-addressable portion of range */
+	end = min(end, (u64)iomem_resource.end);
+	if (end <= start) {
+		dev_info(&info->bridge->dev,
+			"host bridge window [%#llx-%#llx] "
+			"(ignored, not CPU addressable)\n", start, orig_end);
+		return AE_OK;
+	} else if (orig_end != end) {
+		dev_info(&info->bridge->dev,
+			"host bridge window [%#llx-%#llx] "
+			"([%#llx-%#llx] ignored, not CPU addressable)\n",
+			start, orig_end, end + 1, orig_end);
+	}
 
 	res = &info->res[info->res_num];
 	res->name = info->name;
-- 
cgit v1.2.3


From 45e7e24360e799f7753f37ca5642a0d6e29b9c62 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 5 Jan 2012 14:27:19 -0700
Subject: x86/PCI: amd: factor out MMCONFIG discovery

commit 24d25dbfa63c376323096660bfa9ad45a08870ce upstream.

This factors out the AMD native MMCONFIG discovery so we can use it
outside amd_bus.c.

amd_bus.c reads AMD MSRs so it can remove the MMCONFIG area from the
PCI resources.  We may also need the MMCONFIG information to work
around BIOS defects in the ACPI MCFG table.

Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/include/asm/amd_nb.h |  2 ++
 arch/x86/kernel/amd_nb.c      | 31 +++++++++++++++++++++++++++++++
 arch/x86/pci/amd_bus.c        | 42 +++++++++++-------------------------------
 3 files changed, 44 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 67f87f257611..78a1eff74223 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_AMD_NB_H
 #define _ASM_X86_AMD_NB_H
 
+#include <linux/ioport.h>
 #include <linux/pci.h>
 
 struct amd_nb_bus_dev_range {
@@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb_misc_ids[];
 extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
 
 extern bool early_is_amd_nb(u32 value);
+extern struct resource *amd_get_mmconfig_range(struct resource *res);
 extern int amd_cache_northbridges(void);
 extern void amd_flush_garts(void);
 extern int amd_numa_init(void);
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa8facc..bae1efe6d515 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device)
 	return false;
 }
 
+struct resource *amd_get_mmconfig_range(struct resource *res)
+{
+	u32 address;
+	u64 base, msr;
+	unsigned segn_busn_bits;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return NULL;
+
+	/* assume all cpus from fam10h have mmconfig */
+        if (boot_cpu_data.x86 < 0x10)
+		return NULL;
+
+	address = MSR_FAM10H_MMIO_CONF_BASE;
+	rdmsrl(address, msr);
+
+	/* mmconfig is not enabled */
+	if (!(msr & FAM10H_MMIO_CONF_ENABLE))
+		return NULL;
+
+	base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
+
+	segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
+			 FAM10H_MMIO_CONF_BUSRANGE_MASK;
+
+	res->flags = IORESOURCE_MEM;
+	res->start = base;
+	res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
+	return res;
+}
+
 int amd_get_subcaches(int cpu)
 {
 	struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 026e4931d162..385a940b5422 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
 	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
 };
 
-static u64 __initdata fam10h_mmconf_start;
-static u64 __initdata fam10h_mmconf_end;
-static void __init get_pci_mmcfg_amd_fam10h_range(void)
-{
-	u32 address;
-	u64 base, msr;
-	unsigned segn_busn_bits;
-
-	/* assume all cpus from fam10h have mmconf */
-        if (boot_cpu_data.x86 < 0x10)
-		return;
-
-	address = MSR_FAM10H_MMIO_CONF_BASE;
-	rdmsrl(address, msr);
-
-	/* mmconfig is not enable */
-	if (!(msr & FAM10H_MMIO_CONF_ENABLE))
-		return;
-
-	base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
-
-	segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
-			 FAM10H_MMIO_CONF_BUSRANGE_MASK;
-
-	fam10h_mmconf_start = base;
-	fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
-}
-
 #define RANGE_NUM 16
 
 /**
@@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info(void)
 	u64 val;
 	u32 address;
 	bool found;
+	struct resource fam10h_mmconf_res, *fam10h_mmconf;
+	u64 fam10h_mmconf_start;
+	u64 fam10h_mmconf_end;
 
 	if (!early_pci_allowed())
 		return -1;
@@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info(void)
 		subtract_range(range, RANGE_NUM, 0, end);
 
 	/* get mmconfig */
-	get_pci_mmcfg_amd_fam10h_range();
+	fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res);
 	/* need to take out mmconf range */
-	if (fam10h_mmconf_end) {
-		printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
+	if (fam10h_mmconf) {
+		printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf);
+		fam10h_mmconf_start = fam10h_mmconf->start;
+		fam10h_mmconf_end = fam10h_mmconf->end;
 		subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
 				 fam10h_mmconf_end + 1);
+	} else {
+		fam10h_mmconf_start = 0;
+		fam10h_mmconf_end = 0;
 	}
 
 	/* mmio resource */
-- 
cgit v1.2.3


From 72ce943013baaba4af7afec76c3556b16b9f6de4 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 12 Jan 2012 08:01:40 -0700
Subject: x86/PCI: build amd_bus.o only when CONFIG_AMD_NB=y

commit 5cf9a4e69c1ff0ccdd1d2b7404f95c0531355274 upstream.

We only need amd_bus.o for AMD systems with PCI.  arch/x86/pci/Makefile
already depends on CONFIG_PCI=y, so this patch just adds the dependency
on CONFIG_AMD_NB.

Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/pci/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 6b8759f7634e..d24d3da72926 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -18,8 +18,9 @@ obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_MRST)		+= mrst.o
 
 obj-y				+= common.o early.o
-obj-y				+= amd_bus.o bus_numa.o
+obj-y				+= bus_numa.o
 
+obj-$(CONFIG_AMD_NB)		+= amd_bus.o
 obj-$(CONFIG_PCI_CNB20LE_QUIRK)	+= broadcom_bus.o
 
 ifeq ($(CONFIG_PCI_DEBUG),y)
-- 
cgit v1.2.3


From 59c43b2c3ef410e585646825ea552507cd51ccb1 Mon Sep 17 00:00:00 2001
From: Ludwig Nussel <ludwig.nussel@suse.de>
Date: Tue, 15 Nov 2011 14:46:46 -0800
Subject: x86: Fix mmap random address range

commit 9af0c7a6fa860698d080481f24a342ba74b68982 upstream.

On x86_32 casting the unsigned int result of get_random_int() to
long may result in a negative value.  On x86_32 the range of
mmap_rnd() therefore was -255 to 255.  The 32bit mode on x86_64
used 0 to 255 as intended.

The bug was introduced by 675a081 ("x86: unify mmap_{32|64}.c")
in January 2008.

Signed-off-by: Ludwig Nussel <ludwig.nussel@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: harvey.harrison@gmail.com
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/201111152246.pAFMklOB028527@wpaz5.hot.corp.google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/mm/mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1dab5194fd9d..f927429d07ca 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -87,9 +87,9 @@ static unsigned long mmap_rnd(void)
 	*/
 	if (current->flags & PF_RANDOMIZE) {
 		if (mmap_is_ia32())
-			rnd = (long)get_random_int() % (1<<8);
+			rnd = get_random_int() % (1<<8);
 		else
-			rnd = (long)(get_random_int() % (1<<28));
+			rnd = get_random_int() % (1<<28);
 	}
 	return rnd << PAGE_SHIFT;
 }
-- 
cgit v1.2.3


From 15274414badc6ef0a893454e7102f3753f2c1ccd Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Fri, 6 Jan 2012 13:19:00 -0600
Subject: x86, UV: Update Boot messages for SGI UV2 platform

commit da517a08ac5913cd80ce3507cddd00f2a091b13c upstream.

SGI UV systems print a message during boot:

	UV: Found <num> blades

Due to packaging changes, the blade count is not accurate for
on the next generation of the platform. This patch corrects the
count.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Link: http://lkml.kernel.org/r/20120106191900.GA19772@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index cfeb978f49fd..874c20877140 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -779,7 +779,12 @@ void __init uv_system_init(void)
 	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
 		uv_possible_blades +=
 		  hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
-	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+	/* uv_num_possible_blades() is really the hub count */
+	printk(KERN_INFO "UV: Found %d blades, %d hubs\n",
+			is_uv1_hub() ? uv_num_possible_blades() :
+			(uv_num_possible_blades() + 1) / 2,
+			uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
 	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
-- 
cgit v1.2.3


From ef9a04d5b336853aeb3f3975c4e52b07c1c4d3ec Mon Sep 17 00:00:00 2001
From: Kurt Garloff <kurt@garloff.de>
Date: Tue, 17 Jan 2012 04:20:31 -0500
Subject: ACPI, x86: Use SRAT table rev to use 8bit or 32bit PXM fields
 (x86/x86-64)

commit cd298f60a2451a16e0f077404bf69b62ec868733 upstream.

In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides
32bits for these. The new fields were reserved before.
According to the ACPI spec, the OS must disregrard reserved fields.

x86/x86-64 was rather inconsistent prior to this patch; it used 8 bits
for the pxm field in cpu_affinity, but 32 bits in mem_affinity.
This patch makes it consistent: Either use 8 bits consistently (SRAT
rev 1 or lower) or 32 bits (SRAT rev 2 or higher).

cc: x86@kernel.org
Signed-off-by: Kurt Garloff <kurt@garloff.de>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/mm/srat.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 81dbfdeb080d..7efd0c615d58 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -104,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
 		return;
 	pxm = pa->proximity_domain_lo;
+	if (acpi_srat_revision >= 2)
+		pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
 	node = setup_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -155,6 +157,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	start = ma->base_address;
 	end = start + ma->length;
 	pxm = ma->proximity_domain;
+	if (acpi_srat_revision <= 1)
+		pxm &= 0xff;
 	node = setup_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
-- 
cgit v1.2.3


From 0076d42a31bac7853d3c77c3858d2685f6fe8f2d Mon Sep 17 00:00:00 2001
From: Kurt Garloff <kurt@garloff.de>
Date: Tue, 17 Jan 2012 04:21:49 -0500
Subject: ACPI, ia64: Use SRAT table rev to use 8bit or 16/32bit PXM fields
 (ia64)

commit 9f10f6a520deb3639fac78d81151a3ade88b4e7f upstream.

In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides
32bits for these. The new fields were reserved before.
According to the ACPI spec, the OS must disregrard reserved fields.

ia64 did handle the PXM fields almost consistently, but depending on
sgi's sn2 platform. This patch leaves the sn2 logic in, but does also
use 16/32 bits for PXM if the SRAT has rev 2 or higher.

The patch also adds __init to the two pxm accessor functions, as they
access __initdata now and are called from an __init function only anyway.

Note that the code only uses 16 bits for the PXM field in the processor
proximity field; the patch does not address this as 16 bits are more than
enough.

Signed-off-by: Kurt Garloff <kurt@garloff.de>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/ia64/kernel/acpi.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 3be485a300b1..f19de9f7f5f5 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -429,22 +429,24 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 static struct acpi_table_slit __initdata *slit_table;
 cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
 
-static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
+static int __init
+get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
 {
 	int pxm;
 
 	pxm = pa->proximity_domain_lo;
-	if (ia64_platform_is("sn2"))
+	if (ia64_platform_is("sn2") || acpi_srat_revision >= 2)
 		pxm += pa->proximity_domain_hi[0] << 8;
 	return pxm;
 }
 
-static int get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
+static int __init
+get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
 {
 	int pxm;
 
 	pxm = ma->proximity_domain;
-	if (!ia64_platform_is("sn2"))
+	if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1)
 		pxm &= 0xff;
 
 	return pxm;
-- 
cgit v1.2.3


From 415b95df641c998a7cf15c916047f338ab2ce87b Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Mon, 16 Jan 2012 15:18:48 -0600
Subject: x86/UV2: Fix BAU destination timeout initialization

commit d059f9fa84a30e04279c6ff615e9e2cf3b260191 upstream.

Move the call to enable_timeouts() forward so that
BAU_MISC_CONTROL is initialized before using it in
calculate_destination_timeout().

Fix the calculation of a BAU destination timeout
for UV2 (in calculate_destination_timeout()).

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120116211848.GB5767@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/platform/uv/tlb_uv.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 82cff4a25f44..edf435b74e85 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1575,14 +1575,14 @@ static int calculate_destination_timeout(void)
 		ts_ns = base * mult1 * mult2;
 		ret = ts_ns / 1000;
 	} else {
-		/* 4 bits  0/1 for 10/80us, 3 bits of multiplier */
-		mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
+		/* 4 bits  0/1 for 10/80us base, 3 bits of multiplier */
+		mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
 		mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
 		if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
-			mult1 = 80;
+			base = 80;
 		else
-			mult1 = 10;
-		base = mmr_image & UV2_ACK_MASK;
+			base = 10;
+		mult1 = mmr_image & UV2_ACK_MASK;
 		ret = mult1 * base;
 	}
 	return ret;
@@ -1820,6 +1820,8 @@ static int __init uv_bau_init(void)
 			uv_base_pnode = uv_blade_to_pnode(uvhub);
 	}
 
+	enable_timeouts();
+
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
 		nobau = 1;
 		return 0;
@@ -1830,7 +1832,6 @@ static int __init uv_bau_init(void)
 		if (uv_blade_nr_possible_cpus(uvhub))
 			init_uvhub(uvhub, vector, uv_base_pnode);
 
-	enable_timeouts();
 	alloc_intr_gate(vector, uv_bau_message_intr1);
 
 	for_each_possible_blade(uvhub) {
-- 
cgit v1.2.3


From 72a82010500dea88b2d786a76b063a871e2a4603 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 20 Jan 2012 14:34:27 -0800
Subject: score: fix off-by-one index into syscall table

commit c25a785d6647984505fa165b5cd84cfc9a95970b upstream.

If the provided system call number is equal to __NR_syscalls, the
current check will pass and a function pointer just after the system
call table may be called, since sys_call_table is an array with total
size __NR_syscalls.

Whether or not this is a security bug depends on what the compiler puts
immediately after the system call table.  It's likely that this won't do
anything bad because there is an additional NULL check on the syscall
entry, but if there happens to be a non-NULL value immediately after the
system call table, this may result in local privilege escalation.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Eugene Teo <eugeneteo@kernel.sg>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/score/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/score/kernel/entry.S b/arch/score/kernel/entry.S
index 577abba3fac6..83bb96079c43 100644
--- a/arch/score/kernel/entry.S
+++ b/arch/score/kernel/entry.S
@@ -408,7 +408,7 @@ ENTRY(handle_sys)
 	sw	r9, [r0, PT_EPC]
 
 	cmpi.c	r27, __NR_syscalls 	# check syscall number
-	bgtu	illegal_syscall
+	bgeu	illegal_syscall
 
 	slli	r8, r27, 2		# get syscall routine
 	la	r11, sys_call_table
-- 
cgit v1.2.3


From 8c6f009ebfcdae4196be7e456c99ad44eb013420 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Wed, 18 Jan 2012 20:07:54 -0600
Subject: x86/uv: Fix uv_gpa_to_soc_phys_ram() shift

commit 5a51467b146ab7948d2f6812892eac120a30529c upstream.

uv_gpa_to_soc_phys_ram() was inadvertently ignoring the
shift values.  This fix takes the shift into account.

Signed-off-by: Russ Anderson <rja@sgi.com>
Link: http://lkml.kernel.org/r/20120119020753.GA7228@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/uv/uv_hub.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 54a13aaebc40..21f7385badb8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -318,13 +318,13 @@ uv_gpa_in_mmr_space(unsigned long gpa)
 /* UV global physical address --> socket phys RAM */
 static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
 {
-	unsigned long paddr = gpa & uv_hub_info->gpa_mask;
+	unsigned long paddr;
 	unsigned long remap_base = uv_hub_info->lowmem_remap_base;
 	unsigned long remap_top =  uv_hub_info->lowmem_remap_top;
 
 	gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
 		((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
-	gpa = gpa & uv_hub_info->gpa_mask;
+	paddr = gpa & uv_hub_info->gpa_mask;
 	if (paddr >= remap_base && paddr < remap_base + remap_top)
 		paddr -= remap_base;
 	return paddr;
-- 
cgit v1.2.3


From 231f0496129d5d41ae77cc2410e3cea97540cd7b Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 20 Jan 2012 17:44:12 +0100
Subject: x86/microcode_amd: Add support for CPU family specific container
 files

commit 5b68edc91cdc972c46f76f85eded7ffddc3ff5c2 upstream.

We've decided to provide CPU family specific container files
(starting with CPU family 15h). E.g. for family 15h we have to
load microcode_amd_fam15h.bin instead of microcode_amd.bin

Rationale is that starting with family 15h patch size is larger
than 2KB which was hard coded as maximum patch size in various
microcode loaders (not just Linux).

Container files which include patches larger than 2KB cause
different kinds of trouble with such old patch loaders. Thus we
have to ensure that the default container file provides only
patches with size less than 2KB.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/20120120164412.GD24508@alberich.amd.com
[ documented the naming convention and tidied the code a bit. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/microcode_amd.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c5610384ab16..b727450f5d78 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -298,13 +298,33 @@ free_table:
 	return state;
 }
 
+/*
+ * AMD microcode firmware naming convention, up to family 15h they are in
+ * the legacy file:
+ *
+ *    amd-ucode/microcode_amd.bin
+ *
+ * This legacy file is always smaller than 2K in size.
+ *
+ * Starting at family 15h they are in family specific firmware files:
+ *
+ *    amd-ucode/microcode_amd_fam15h.bin
+ *    amd-ucode/microcode_amd_fam16h.bin
+ *    ...
+ *
+ * These might be larger than 2K.
+ */
 static enum ucode_state request_microcode_amd(int cpu, struct device *device)
 {
-	const char *fw_name = "amd-ucode/microcode_amd.bin";
+	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	const struct firmware *fw;
 	enum ucode_state ret = UCODE_NFOUND;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+	if (c->x86 >= 0x15)
+		snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
 
-	if (request_firmware(&fw, fw_name, device)) {
+	if (request_firmware(&fw, (const char *)fw_name, device)) {
 		pr_err("failed to load file %s\n", fw_name);
 		goto out;
 	}
-- 
cgit v1.2.3


From 4e29fa93520b401b81a719fe053f06c6937d5b66 Mon Sep 17 00:00:00 2001
From: Srinidhi KASAGAR <srinidhi.kasagar@stericsson.com>
Date: Thu, 12 Jan 2012 11:07:43 +0530
Subject: mach-ux500: enable ARM errata 764369

commit d65015f7c5c5be9fd3f5e567889c844ba81bdc9c upstream.

This applies ARM errata 764369 for all ux500 platforms.

Signed-off-by: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-ux500/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 9a9706cf1496..6ebdb0d03828 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -7,6 +7,7 @@ config UX500_SOC_COMMON
 	select HAS_MTU
 	select ARM_ERRATA_753970
 	select ARM_ERRATA_754322
+	select ARM_ERRATA_764369
 
 menu "Ux500 SoC"
 
-- 
cgit v1.2.3


From 95086de856ca703588967f63cc54b8059db55888 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 20 Jan 2012 12:10:18 +0100
Subject: ARM: 7296/1: proc-v7.S: remove HARVARD_CACHE preprocessor guards

commit 612539e81f655f6ac73c7af1da8701c1ee618aee upstream.

On v7, we use the same cache maintenance instructions for data lines
as for unified lines. This was not the case for v6, where HARVARD_CACHE
was defined to indicate the L1 cache topology.

This patch removes the erroneous compile-time check for HARVARD_CACHE in
proc-v7.S, ensuring that we perform I-side invalidation at boot.

Reported-and-Acked-by: Shawn Guo <shawn.guo@linaro.org>

Acked-by: Catalin Marinas <Catalin.Marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/proc-v7.S | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 089c0b5e454f..b6ba1032a988 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -270,10 +270,6 @@ cpu_resume_l1_flags:
  *	Initialise TLB, Caches, and MMU state ready to switch the MMU
  *	on.  Return in r0 the new CP15 C1 control register setting.
  *
- *	We automatically detect if we have a Harvard cache, and use the
- *	Harvard cache control instructions insead of the unified cache
- *	control instructions.
- *
  *	This should be able to cover all ARMv7 cores.
  *
  *	It is assumed that:
@@ -363,9 +359,7 @@ __v7_setup:
 #endif
 
 3:	mov	r10, #0
-#ifdef HARVARD_CACHE
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
-#endif
 	dsb
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
-- 
cgit v1.2.3


From d020b1d3d3379d183d0649cdc2f6de9131268419 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 18 Jan 2012 07:21:42 +0000
Subject: net: bpf_jit: fix divide by 0 generation

[ Upstream commit d00a9dd21bdf7908b70866794c8313ee8a5abd5c ]

Several problems fixed in this patch :

1) Target of the conditional jump in case a divide by 0 is performed
   by a bpf is wrong.

2) Must 'generate' the full function prologue/epilogue at pass=0,
   or else we can stop too early in pass=1 if the proglen doesnt change.
   (if the increase of prologue/epilogue equals decrease of all
    instructions length because some jumps are converted to near jumps)

3) Change the wrong length detection at the end of code generation to
   issue a more explicit message, no need for a full stack trace.

Reported-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/net/bpf_jit_comp.c | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7b65f752c5f8..7c1b765ecc59 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -151,17 +151,18 @@ void bpf_jit_compile(struct sk_filter *fp)
 	cleanup_addr = proglen; /* epilogue address */
 
 	for (pass = 0; pass < 10; pass++) {
+		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
 		/* no prologue/epilogue for trivial filters (RET something) */
 		proglen = 0;
 		prog = temp;
 
-		if (seen) {
+		if (seen_or_pass0) {
 			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
 			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
 			/* note : must save %rbx in case bpf_error is hit */
-			if (seen & (SEEN_XREG | SEEN_DATAREF))
+			if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
 				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
-			if (seen & SEEN_XREG)
+			if (seen_or_pass0 & SEEN_XREG)
 				CLEAR_X(); /* make sure we dont leek kernel memory */
 
 			/*
@@ -170,7 +171,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 			 *  r9 = skb->len - skb->data_len
 			 *  r8 = skb->data
 			 */
-			if (seen & SEEN_DATAREF) {
+			if (seen_or_pass0 & SEEN_DATAREF) {
 				if (offsetof(struct sk_buff, len) <= 127)
 					/* mov    off8(%rdi),%r9d */
 					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
@@ -260,9 +261,14 @@ void bpf_jit_compile(struct sk_filter *fp)
 			case BPF_S_ALU_DIV_X: /* A /= X; */
 				seen |= SEEN_XREG;
 				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (pc_ret0 != -1)
-					EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
-				else {
+				if (pc_ret0 > 0) {
+					/* addrs[pc_ret0 - 1] is start address of target
+					 * (addrs[i] - 4) is the address following this jmp
+					 * ("xor %edx,%edx; div %ebx" being 4 bytes long)
+					 */
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
+								(addrs[i] - 4));
+				} else {
 					EMIT_COND_JMP(X86_JNE, 2 + 5);
 					CLEAR_A();
 					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
@@ -335,12 +341,12 @@ void bpf_jit_compile(struct sk_filter *fp)
 				}
 				/* fallinto */
 			case BPF_S_RET_A:
-				if (seen) {
+				if (seen_or_pass0) {
 					if (i != flen - 1) {
 						EMIT_JMP(cleanup_addr - addrs[i]);
 						break;
 					}
-					if (seen & SEEN_XREG)
+					if (seen_or_pass0 & SEEN_XREG)
 						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
 					EMIT1(0xc9);		/* leaveq */
 				}
@@ -483,8 +489,9 @@ common_load:			seen |= SEEN_DATAREF;
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
 				if ((int)K < 0) {
-					if (pc_ret0 != -1) {
-						EMIT_JMP(addrs[pc_ret0] - addrs[i]);
+					if (pc_ret0 > 0) {
+						/* addrs[pc_ret0 - 1] is the start address */
+						EMIT_JMP(addrs[pc_ret0 - 1] - addrs[i]);
 						break;
 					}
 					CLEAR_A();
@@ -599,13 +606,14 @@ cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
 		 * use it to give the cleanup instruction(s) addr
 		 */
 		cleanup_addr = proglen - 1; /* ret */
-		if (seen)
+		if (seen_or_pass0)
 			cleanup_addr -= 1; /* leaveq */
-		if (seen & SEEN_XREG)
+		if (seen_or_pass0 & SEEN_XREG)
 			cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
 
 		if (image) {
-			WARN_ON(proglen != oldproglen);
+			if (proglen != oldproglen)
+				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen);
 			break;
 		}
 		if (proglen == oldproglen) {
-- 
cgit v1.2.3


From 886d462b0c92357a2f0ca0fc19f5877efbb1f638 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 30 Jan 2012 20:21:42 +0100
Subject: ARM: 7306/1: vfp: flush thread hwstate before restoring context from
 sigframe

commit 2af276dfb1722e97b190bd2e646b079a2aa674db upstream.

Following execution of a signal handler, we currently restore the VFP
context from the ucontext in the signal frame. This involves copying
from the user stack into the current thread's vfp_hard_struct and then
flushing the new data out to the hardware registers.

This is problematic when using a preemptible kernel because we could be
context switched whilst updating the vfp_hard_struct. If the current
thread has made use of VFP since the last context switch, the VFP
notifier will copy from the hardware registers into the vfp_hard_struct,
overwriting any data that had been partially copied by the signal code.

Disabling preemption across copy_from_user calls is a terrible idea, so
instead we move the VFP thread flush *before* we update the
vfp_hard_struct. Since the flushing is performed lazily, this has the
effect of disabling VFP and clearing the CPU's VFP state pointer,
therefore preventing the thread from being updated with stale data on
the next context switch.

Tested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/signal.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 0340224cf73c..9e617bd4a146 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -227,6 +227,8 @@ static int restore_vfp_context(struct vfp_sigframe __user *frame)
 	if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
 		return -EINVAL;
 
+	vfp_flush_hwstate(thread);
+
 	/*
 	 * Copy the floating point registers. There can be unused
 	 * registers see asm/hwcap.h for details.
@@ -251,9 +253,6 @@ static int restore_vfp_context(struct vfp_sigframe __user *frame)
 	__get_user_error(h->fpinst, &frame->ufp_exc.fpinst, err);
 	__get_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err);
 
-	if (!err)
-		vfp_flush_hwstate(thread);
-
 	return err ? -EFAULT : 0;
 }
 
-- 
cgit v1.2.3


From ae3939e12cc597be0ba28d5f3b3d8f158f2d6d70 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Mon, 30 Jan 2012 20:22:28 +0100
Subject: ARM: 7307/1: vfp: fix ptrace regset modification race

commit 247f4993a5974e6759606c4d380748eecfd273ff upstream.

In a preemptible kernel, vfp_set() can be preempted, causing the
hardware VFP context to be switched while the thread vfp state is
being read and modified.  This leads to a race condition which can
cause the thread vfp state to become corrupted if lazy VFP context
save occurs due to preemption in between the time thread->vfpstate
is read and the time the modified state is written back.

This may occur if preemption occurs during the execution of a
ptrace() call which modifies the VFP register state of a thread.
Such instances should be very rare in most realistic scenarios --
none has been reported, so far as I am aware.  Only uniprocessor
systems should be affected, since VFP context save is not currently
lazy in SMP kernels.

The problem was introduced by my earlier patch migrating to use
regsets to implement ptrace.

This patch does a vfp_sync_hwstate() before reading
thread->vfpstate, to make sure that the thread's VFP state is not
live in the hardware registers while the registers are modified.

Thanks to Will Deacon for spotting this.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/ptrace.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 97260060bf26..7f1e1338e4dd 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -719,10 +719,13 @@ static int vfp_set(struct task_struct *target,
 {
 	int ret;
 	struct thread_info *thread = task_thread_info(target);
-	struct vfp_hard_struct new_vfp = thread->vfpstate.hard;
+	struct vfp_hard_struct new_vfp;
 	const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs);
 	const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr);
 
+	vfp_sync_hwstate(thread);
+	new_vfp = thread->vfpstate.hard;
+
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				  &new_vfp.fpregs,
 				  user_fpregs_offset,
@@ -743,7 +746,6 @@ static int vfp_set(struct task_struct *target,
 	if (ret)
 		return ret;
 
-	vfp_sync_hwstate(thread);
 	thread->vfpstate.hard = new_vfp;
 	vfp_flush_hwstate(thread);
 
-- 
cgit v1.2.3


From e358073331debaab9e14b3139bc469184288aa48 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 30 Jan 2012 20:23:29 +0100
Subject: ARM: 7308/1: vfp: flush thread hwstate before copying ptrace
 registers

commit 8130b9d7b9d858aa04ce67805e8951e3cb6e9b2f upstream.

If we are context switched whilst copying into a thread's
vfp_hard_struct then the partial copy may be corrupted by the VFP
context switching code (see "ARM: vfp: flush thread hwstate before
restoring context from sigframe").

This patch updates the ptrace VFP set code so that the thread state is
flushed before the copy, therefore disabling VFP and preventing
corruption from occurring.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 7f1e1338e4dd..172ae01c26e0 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -746,8 +746,8 @@ static int vfp_set(struct task_struct *target,
 	if (ret)
 		return ret;
 
-	thread->vfpstate.hard = new_vfp;
 	vfp_flush_hwstate(thread);
+	thread->vfpstate.hard = new_vfp;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 17f272d8e4349543de855ea63e4eea4a67a7abc5 Mon Sep 17 00:00:00 2001
From: Yegor Yefremov <yegor_sub1@visionsystems.de>
Date: Mon, 23 Jan 2012 08:32:23 +0100
Subject: ARM: OMAP2+: GPMC: fix device size setup

commit 8ef5d844cc3a644ea6f7665932a4307e9fad01fa upstream.

following statement can only change device size from 8-bit(0) to 16-bit(1),
but not vice versa:

regval |= GPMC_CONFIG1_DEVICESIZE(wval);

so as this field has 1 reserved bit, that could be used in future,
just clear both bits and then OR with the desired value

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/gpmc.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 130034bf01d5..dfffbbf4c009 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -528,7 +528,13 @@ int gpmc_cs_configure(int cs, int cmd, int wval)
 
 	case GPMC_CONFIG_DEV_SIZE:
 		regval  = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1);
+
+		/* clear 2 target bits */
+		regval &= ~GPMC_CONFIG1_DEVICESIZE(3);
+
+		/* set the proper value */
 		regval |= GPMC_CONFIG1_DEVICESIZE(wval);
+
 		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG1, regval);
 		break;
 
-- 
cgit v1.2.3


From d3a6a79cfe7641d797ab74eea55b93e0f35f66d7 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Mon, 30 Jan 2012 14:31:46 +0000
Subject: xen pvhvm: do not remap pirqs onto evtchns if
 !xen_have_vector_callback

commit 207d543f472c1ac9552df79838dc807cbcaa9740 upstream.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/pci/xen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index f567965c0620..6e96e65e7caa 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -308,7 +308,7 @@ int __init pci_xen_init(void)
 
 int __init pci_xen_hvm_init(void)
 {
-	if (!xen_feature(XENFEAT_hvm_pirqs))
+	if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
 		return 0;
 
 #ifdef CONFIG_ACPI
-- 
cgit v1.2.3


From 49f936fe90802f0923f47d28f755c83d358feac9 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 15 Feb 2012 18:48:22 +0000
Subject: powerpc/perf: power_pmu_start restores incorrect values, breaking
 frequency events

commit 9a45a9407c69d068500923480884661e2b9cc421 upstream.

perf on POWER stopped working after commit e050e3f0a71b (perf: Fix
broken interrupt rate throttling). That patch exposed a bug in
the POWER perf_events code.

Since the PMCs count upwards and take an exception when the top bit
is set, we want to write 0x80000000 - left in power_pmu_start. We were
instead programming in left which effectively disables the counter
until we eventually hit 0x80000000. This could take seconds or longer.

With the patch applied I get the expected number of samples:

          SAMPLE events:       9948

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/perf_event.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 822f63008ae1..5793c4ba5a03 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -865,6 +865,7 @@ static void power_pmu_start(struct perf_event *event, int ef_flags)
 {
 	unsigned long flags;
 	s64 left;
+	unsigned long val;
 
 	if (!event->hw.idx || !event->hw.sample_period)
 		return;
@@ -880,7 +881,12 @@ static void power_pmu_start(struct perf_event *event, int ef_flags)
 
 	event->hw.state = 0;
 	left = local64_read(&event->hw.period_left);
-	write_pmc(event->hw.idx, left);
+
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+
+	write_pmc(event->hw.idx, val);
 
 	perf_event_update_userpage(event);
 	perf_pmu_enable(event->pmu);
-- 
cgit v1.2.3


From ad15d5c6dc9f1272fe1dde3de87ab775f65a6d77 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 7 Feb 2012 19:42:07 +0100
Subject: ARM: 7321/1: cache-v7: Disable preemption when reading CCSIDR

commit b46c0f74657d1fe1c1b0c1452631cc38a9e6987f upstream.

armv7's flush_cache_all() flushes caches via set/way. To
determine the cache attributes (line size, number of sets,
etc.) the assembly first writes the CSSELR register to select a
cache level and then reads the CCSIDR register. The CSSELR register
is banked per-cpu and is used to determine which cache level CCSIDR
reads. If the task is migrated between when the CSSELR is written and
the CCSIDR is read the CCSIDR value may be for an unexpected cache
level (for example L1 instead of L2) and incorrect cache flushing
could occur.

Disable interrupts across the write and read so that the correct
cache attributes are read and used for the cache flushing
routine. We disable interrupts instead of disabling preemption
because the critical section is only 3 instructions and we want
to call v7_dcache_flush_all from __v7_setup which doesn't have a
full kernel stack with a struct thread_info.

This fixes a problem we see in scm_call() when flush_cache_all()
is called from preemptible context and sometimes the L2 cache is
not properly flushed out.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/cache-v7.S | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 35931191c84e..c3e4b863b245 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -54,9 +54,15 @@ loop1:
 	and	r1, r1, #7			@ mask of the bits for current cache only
 	cmp	r1, #2				@ see what cache we have at this level
 	blt	skip				@ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+	save_and_disable_irqs r9		@ make cssr&csidr read atomic
+#endif
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	isb					@ isb to sych the new cssr&csidr
 	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+#ifdef CONFIG_PREEMPT
+	restore_irqs_notrace r9
+#endif
 	and	r2, r1, #7			@ extract the length of the cache lines
 	add	r2, r2, #4			@ add 4 (line length offset)
 	ldr	r4, =0x3ff
-- 
cgit v1.2.3


From bbb8ae42eb210ba2bdff67450f3737e10cb042f3 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Wed, 15 Feb 2012 16:01:42 +0100
Subject: ARM: 7325/1: fix v7 boot with lockdep enabled

commit 8e43a905dd574f54c5715d978318290ceafbe275 upstream.

Bootup with lockdep enabled has been broken on v7 since b46c0f74657d
("ARM: 7321/1: cache-v7: Disable preemption when reading CCSIDR").

This is because v7_setup (which is called very early during boot) calls
v7_flush_dcache_all, and the save_and_disable_irqs added by that patch
ends up attempting to call into lockdep C code (trace_hardirqs_off())
when we are in no position to execute it (no stack, MMU off).

Fix this by using a notrace variant of save_and_disable_irqs.  The code
already uses the notrace variant of restore_irqs.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/assembler.h | 5 +++++
 arch/arm/mm/cache-v7.S           | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 65c3f2474f5e..4e25f1888356 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -137,6 +137,11 @@
 	disable_irq
 	.endm
 
+	.macro	save_and_disable_irqs_notrace, oldcpsr
+	mrs	\oldcpsr, cpsr
+	disable_irq_notrace
+	.endm
+
 /*
  * Restore interrupt state previously stored in a register.  We don't
  * guarantee that this will preserve the flags.
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index c3e4b863b245..1ed1fd361308 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -55,7 +55,7 @@ loop1:
 	cmp	r1, #2				@ see what cache we have at this level
 	blt	skip				@ skip if no cache, or just i-cache
 #ifdef CONFIG_PREEMPT
-	save_and_disable_irqs r9		@ make cssr&csidr read atomic
+	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
 #endif
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	isb					@ isb to sych the new cssr&csidr
-- 
cgit v1.2.3


From 454d147172263d0b022f32b86336f92243f89158 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 13 Feb 2012 13:47:25 -0800
Subject: i387: math_state_restore() isn't called from asm

commit be98c2cdb15ba26148cd2bd58a857d4f7759ed38 upstream.

It was marked asmlinkage for some really old and stale legacy reasons.
Fix that and the equally stale comment.

Noticed when debugging the irq_fpu_usable() bugs.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h | 2 +-
 arch/x86/kernel/traps.c     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea05644..cba143210780 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,7 +29,7 @@ extern unsigned int sig_xstate_size;
 extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
 extern int init_fpu(struct task_struct *child);
-extern asmlinkage void math_state_restore(void);
+extern void math_state_restore(void);
 extern void __math_state_restore(void);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b67166f9de..5878de3fb08d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -745,10 +745,10 @@ void __math_state_restore(void)
  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
  * Don't touch unless you *really* know how it works.
  *
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
+ * Must be called with kernel preemption disabled (eg with local
+ * local interrupts as in the case of do_device_not_available).
  */
-asmlinkage void math_state_restore(void)
+void math_state_restore(void)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
-- 
cgit v1.2.3


From 00717d1f238918b105ed561a466fcd4271206fb2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 13 Feb 2012 13:56:14 -0800
Subject: i387: make irq_fpu_usable() tests more robust

commit 5b1cbac37798805c1fee18c8cebe5c0a13975b17 upstream.

Some code - especially the crypto layer - wants to use the x86
FP/MMX/AVX register set in what may be interrupt (typically softirq)
context.

That *can* be ok, but the tests for when it was ok were somewhat
suspect.  We cannot touch the thread-specific status bits either, so
we'd better check that we're not going to try to save FP state or
anything like that.

Now, it may be that the TS bit is always cleared *before* we set the
USEDFPU bit (and only set when we had already cleared the USEDFP
before), so the TS bit test may actually have been sufficient, but it
certainly was not obviously so.

So this explicitly verifies that we will not touch the TS_USEDFPU bit,
and adds a few related sanity-checks.  Because it seems that somehow
AES-NI is corrupting user FP state.  The cause is not clear, and this
patch doesn't fix it, but while debugging it I really wanted the code to
be more obviously correct and robust.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h | 54 ++++++++++++++++++++++++++++++++++++++-------
 arch/x86/kernel/traps.c     |  1 +
 2 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index cba143210780..a4365829e386 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -307,9 +307,54 @@ static inline void __clear_fpu(struct task_struct *tsk)
 	}
 }
 
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: TS_USEDFPU must be clear (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ */
+static inline bool interrupted_kernel_fpu_idle(void)
+{
+	return !(current_thread_info()->status & TS_USEDFPU) &&
+		(read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static inline bool interrupted_user_mode(void)
+{
+	struct pt_regs *regs = get_irq_regs();
+	return regs && user_mode_vm(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+static inline bool irq_fpu_usable(void)
+{
+	return !in_interrupt() ||
+		interrupted_user_mode() ||
+		interrupted_kernel_fpu_idle();
+}
+
 static inline void kernel_fpu_begin(void)
 {
 	struct thread_info *me = current_thread_info();
+
+	WARN_ON_ONCE(!irq_fpu_usable());
 	preempt_disable();
 	if (me->status & TS_USEDFPU)
 		__save_init_fpu(me->task);
@@ -323,14 +368,6 @@ static inline void kernel_fpu_end(void)
 	preempt_enable();
 }
 
-static inline bool irq_fpu_usable(void)
-{
-	struct pt_regs *regs;
-
-	return !in_interrupt() || !(regs = get_irq_regs()) || \
-		user_mode(regs) || (read_cr0() & X86_CR0_TS);
-}
-
 /*
  * Some instructions like VIA's padlock instructions generate a spurious
  * DNA fault but don't modify SSE registers. And these instructions
@@ -367,6 +404,7 @@ static inline void irq_ts_restore(int TS_state)
  */
 static inline void save_init_fpu(struct task_struct *tsk)
 {
+	WARN_ON_ONCE(task_thread_info(tsk)->status & TS_USEDFPU);
 	preempt_disable();
 	__save_init_fpu(tsk);
 	stts();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5878de3fb08d..15903de01119 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -777,6 +777,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
+	WARN_ON_ONCE(!user_mode_vm(regs));
 #ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
-- 
cgit v1.2.3


From 09ffc93a8a1e8cf06547d20f5a0ddfe880179fe0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 15 Feb 2012 08:05:18 -0800
Subject: i387: fix sense of sanity check

commit c38e23456278e967f094b08247ffc3711b1029b2 upstream.

The check for save_init_fpu() (introduced in commit 5b1cbac37798: "i387:
make irq_fpu_usable() tests more robust") was the wrong way around, but
I hadn't noticed, because my "tests" were bogus: the FPU exceptions are
disabled by default, so even doing a divide by zero never actually
triggers this code at all unless you do extra work to enable them.

So if anybody did enable them, they'd get one spurious warning.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index a4365829e386..262bea981aa5 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -404,7 +404,7 @@ static inline void irq_ts_restore(int TS_state)
  */
 static inline void save_init_fpu(struct task_struct *tsk)
 {
-	WARN_ON_ONCE(task_thread_info(tsk)->status & TS_USEDFPU);
+	WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU));
 	preempt_disable();
 	__save_init_fpu(tsk);
 	stts();
-- 
cgit v1.2.3


From c3cb6440304a2f9afd240bd860860d4a4955d409 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 16 Feb 2012 09:15:04 -0800
Subject: i387: fix x86-64 preemption-unsafe user stack save/restore

commit 15d8791cae75dca27bfda8ecfe87dca9379d6bb0 upstream.

Commit 5b1cbac37798 ("i387: make irq_fpu_usable() tests more robust")
added a sanity check to the #NM handler to verify that we never cause
the "Device Not Available" exception in kernel mode.

However, that check actually pinpointed a (fundamental) race where we do
cause that exception as part of the signal stack FPU state save/restore
code.

Because we use the floating point instructions themselves to save and
restore state directly from user mode, we cannot do that atomically with
testing the TS_USEDFPU bit: the user mode access itself may cause a page
fault, which causes a task switch, which saves and restores the FP/MMX
state from the kernel buffers.

This kind of "recursive" FP state save is fine per se, but it means that
when the signal stack save/restore gets restarted, it will now take the
'#NM' exception we originally tried to avoid.  With preemption this can
happen even without the page fault - but because of the user access, we
cannot just disable preemption around the save/restore instruction.

There are various ways to solve this, including using the
"enable/disable_page_fault()" helpers to not allow page faults at all
during the sequence, and fall back to copying things by hand without the
use of the native FP state save/restore instructions.

However, the simplest thing to do is to just allow the #NM from kernel
space, but fix the race in setting and clearing CR0.TS that this all
exposed: the TS bit changes and the TS_USEDFPU bit absolutely have to be
atomic wrt scheduling, so while the actual state save/restore can be
interrupted and restarted, the act of actually clearing/setting CR0.TS
and the TS_USEDFPU bit together must not.

Instead of just adding random "preempt_disable/enable()" calls to what
is already excessively ugly code, this introduces some helper functions
that mostly mirror the "kernel_fpu_begin/end()" functionality, just for
the user state instead.

Those helper functions should probably eventually replace the other
ad-hoc CR0.TS and TS_USEDFPU tests too, but I'll need to think about it
some more: the task switching functionality in particular needs to
expose the difference between the 'prev' and 'next' threads, while the
new helper functions intentionally were written to only work with
'current'.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/traps.c     |  1 -
 arch/x86/kernel/xsave.c     | 10 +++-------
 3 files changed, 45 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 262bea981aa5..6e87fa43c357 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -399,6 +399,48 @@ static inline void irq_ts_restore(int TS_state)
 		stts();
 }
 
+/*
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
+ *
+ * The actual user_fpu_begin/end() functions
+ * need to be preemption-safe, though.
+ *
+ * NOTE! user_fpu_end() must be used only after you
+ * have saved the FP state, and user_fpu_begin() must
+ * be used only immediately before restoring it.
+ * These functions do not do any save/restore on
+ * their own.
+ */
+static inline int user_has_fpu(void)
+{
+	return current_thread_info()->status & TS_USEDFPU;
+}
+
+static inline void user_fpu_end(void)
+{
+	preempt_disable();
+	current_thread_info()->status &= ~TS_USEDFPU;
+	stts();
+	preempt_enable();
+}
+
+static inline void user_fpu_begin(void)
+{
+	preempt_disable();
+	if (!user_has_fpu()) {
+		clts();
+		current_thread_info()->status |= TS_USEDFPU;
+	}
+	preempt_enable();
+}
+
 /*
  * These disable preemption on their own and are safe
  */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 15903de01119..5878de3fb08d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -777,7 +777,6 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
-	WARN_ON_ONCE(!user_mode_vm(regs));
 #ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a3911343976b..86f1f09a738a 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf)
 	if (!used_math())
 		return 0;
 
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+	if (user_has_fpu()) {
 		if (use_xsave())
 			err = xsave_user(buf);
 		else
@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf)
 
 		if (err)
 			return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
+		user_fpu_end();
 	} else {
 		sanitize_i387_state(tsk);
 		if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf)
 			return err;
 	}
 
-	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
-		clts();
-		task_thread_info(current)->status |= TS_USEDFPU;
-	}
+	user_fpu_begin();
 	if (use_xsave())
 		err = restore_user_xstate(buf);
 	else
-- 
cgit v1.2.3


From 0affff96641db67d43092de85c3c4c54028d62e9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 16 Feb 2012 12:22:48 -0800
Subject: i387: move TS_USEDFPU clearing out of __save_init_fpu and into
 callers

commit b6c66418dcad0fcf83cd1d0a39482db37bf4fc41 upstream.

Touching TS_USEDFPU without touching CR0.TS is confusing, so don't do
it.  By moving it into the callers, we always do the TS_USEDFPU next to
the CR0.TS accesses in the source code, and it's much easier to see how
the two go hand in hand.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 6e87fa43c357..55fb3aa84b0d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -259,7 +259,6 @@ static inline void fpu_save_init(struct fpu *fpu)
 static inline void __save_init_fpu(struct task_struct *tsk)
 {
 	fpu_save_init(&tsk->thread.fpu);
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
 static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -290,6 +289,7 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
 {
 	if (task_thread_info(tsk)->status & TS_USEDFPU) {
 		__save_init_fpu(tsk);
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
 		stts();
 	} else
 		tsk->fpu_counter = 0;
@@ -356,9 +356,11 @@ static inline void kernel_fpu_begin(void)
 
 	WARN_ON_ONCE(!irq_fpu_usable());
 	preempt_disable();
-	if (me->status & TS_USEDFPU)
+	if (me->status & TS_USEDFPU) {
 		__save_init_fpu(me->task);
-	else
+		me->status &= ~TS_USEDFPU;
+		/* We do 'stts()' in kernel_fpu_end() */
+	} else
 		clts();
 }
 
@@ -449,6 +451,7 @@ static inline void save_init_fpu(struct task_struct *tsk)
 	WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU));
 	preempt_disable();
 	__save_init_fpu(tsk);
+	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 	stts();
 	preempt_enable();
 }
-- 
cgit v1.2.3


From 9221484f11c3902bfc84e18e6c6f50f8739134a7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 16 Feb 2012 13:33:12 -0800
Subject: i387: don't ever touch TS_USEDFPU directly, use helper functions

commit 6d59d7a9f5b723a7ac1925c136e93ec83c0c3043 upstream.

This creates three helper functions that do the TS_USEDFPU accesses, and
makes everybody that used to do it by hand use those helpers instead.

In addition, there's a couple of helper functions for the "change both
CR0.TS and TS_USEDFPU at the same time" case, and the places that do
that together have been changed to use those.  That means that we have
fewer random places that open-code this situation.

The intent is partly to clarify the code without actually changing any
semantics yet (since we clearly still have some hard to reproduce bug in
this area), but also to make it much easier to use another approach
entirely to caching the CR0.TS bit for software accesses.

Right now we use a bit in the thread-info 'status' variable (this patch
does not change that), but we might want to make it a full field of its
own or even make it a per-cpu variable.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h | 75 +++++++++++++++++++++++++++++++++------------
 arch/x86/kernel/traps.c     |  2 +-
 arch/x86/kernel/xsave.c     |  2 +-
 arch/x86/kvm/vmx.c          |  2 +-
 4 files changed, 58 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 55fb3aa84b0d..730d7becc97a 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -279,6 +279,47 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
 	return fpu_restore_checking(&tsk->thread.fpu);
 }
 
+/*
+ * Software FPU state helpers. Careful: these need to
+ * be preemption protection *and* they need to be
+ * properly paired with the CR0.TS changes!
+ */
+static inline int __thread_has_fpu(struct thread_info *ti)
+{
+	return ti->status & TS_USEDFPU;
+}
+
+/* Must be paired with an 'stts' after! */
+static inline void __thread_clear_has_fpu(struct thread_info *ti)
+{
+	ti->status &= ~TS_USEDFPU;
+}
+
+/* Must be paired with a 'clts' before! */
+static inline void __thread_set_has_fpu(struct thread_info *ti)
+{
+	ti->status |= TS_USEDFPU;
+}
+
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void __thread_fpu_end(struct thread_info *ti)
+{
+	__thread_clear_has_fpu(ti);
+	stts();
+}
+
+static inline void __thread_fpu_begin(struct thread_info *ti)
+{
+	clts();
+	__thread_set_has_fpu(ti);
+}
+
 /*
  * Signal frame handlers...
  */
@@ -287,23 +328,21 @@ extern int restore_i387_xstate(void __user *buf);
 
 static inline void __unlazy_fpu(struct task_struct *tsk)
 {
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+	if (__thread_has_fpu(task_thread_info(tsk))) {
 		__save_init_fpu(tsk);
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
+		__thread_fpu_end(task_thread_info(tsk));
 	} else
 		tsk->fpu_counter = 0;
 }
 
 static inline void __clear_fpu(struct task_struct *tsk)
 {
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+	if (__thread_has_fpu(task_thread_info(tsk))) {
 		/* Ignore delayed exceptions from user space */
 		asm volatile("1: fwait\n"
 			     "2:\n"
 			     _ASM_EXTABLE(1b, 2b));
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
+		__thread_fpu_end(task_thread_info(tsk));
 	}
 }
 
@@ -311,14 +350,14 @@ static inline void __clear_fpu(struct task_struct *tsk)
  * Were we in an interrupt that interrupted kernel mode?
  *
  * We can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: TS_USEDFPU must be clear (so
+ * pair does nothing at all: the thread must not have fpu (so
  * that we don't try to save the FPU state), and TS must
  * be set (so that the clts/stts pair does nothing that is
  * visible in the interrupted kernel thread).
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
-	return !(current_thread_info()->status & TS_USEDFPU) &&
+	return !__thread_has_fpu(current_thread_info()) &&
 		(read_cr0() & X86_CR0_TS);
 }
 
@@ -356,9 +395,9 @@ static inline void kernel_fpu_begin(void)
 
 	WARN_ON_ONCE(!irq_fpu_usable());
 	preempt_disable();
-	if (me->status & TS_USEDFPU) {
+	if (__thread_has_fpu(me)) {
 		__save_init_fpu(me->task);
-		me->status &= ~TS_USEDFPU;
+		__thread_clear_has_fpu(me);
 		/* We do 'stts()' in kernel_fpu_end() */
 	} else
 		clts();
@@ -422,24 +461,21 @@ static inline void irq_ts_restore(int TS_state)
  */
 static inline int user_has_fpu(void)
 {
-	return current_thread_info()->status & TS_USEDFPU;
+	return __thread_has_fpu(current_thread_info());
 }
 
 static inline void user_fpu_end(void)
 {
 	preempt_disable();
-	current_thread_info()->status &= ~TS_USEDFPU;
-	stts();
+	__thread_fpu_end(current_thread_info());
 	preempt_enable();
 }
 
 static inline void user_fpu_begin(void)
 {
 	preempt_disable();
-	if (!user_has_fpu()) {
-		clts();
-		current_thread_info()->status |= TS_USEDFPU;
-	}
+	if (!user_has_fpu())
+		__thread_fpu_begin(current_thread_info());
 	preempt_enable();
 }
 
@@ -448,11 +484,10 @@ static inline void user_fpu_begin(void)
  */
 static inline void save_init_fpu(struct task_struct *tsk)
 {
-	WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU));
+	WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk)));
 	preempt_disable();
 	__save_init_fpu(tsk);
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-	stts();
+	__thread_fpu_end(task_thread_info(tsk));
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5878de3fb08d..326476d748b5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -734,7 +734,7 @@ void __math_state_restore(void)
 		return;
 	}
 
-	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+	__thread_set_has_fpu(thread);	/* clts in caller! */
 	tsk->fpu_counter++;
 }
 
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 86f1f09a738a..a0bcd0dbc951 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
 	if (!fx)
 		return;
 
-	BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
+	BUG_ON(__thread_has_fpu(task_thread_info(tsk)));
 
 	xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d48ec60ea421..6da2baea08c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -948,7 +948,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
-	if (current_thread_info()->status & TS_USEDFPU)
+	if (__thread_has_fpu(current_thread_info()))
 		clts();
 	load_gdt(&__get_cpu_var(host_gdt));
 }
-- 
cgit v1.2.3


From 06f4bbda338e6aa42497b76a16cf38e2fdd29885 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 16 Feb 2012 15:45:23 -0800
Subject: i387: do not preload FPU state at task switch time

commit b3b0870ef3ffed72b92415423da864f440f57ad6 upstream.

Yes, taking the trap to re-load the FPU/MMX state is expensive, but so
is spending several days looking for a bug in the state save/restore
code.  And the preload code has some rather subtle interactions with
both paravirtualization support and segment state restore, so it's not
nearly as simple as it should be.

Also, now that we no longer necessarily depend on a single bit (ie
TS_USEDFPU) for keeping track of the state of the FPU, we migth be able
to do better.  If we are really switching between two processes that
keep touching the FP state, save/restore is inevitable, but in the case
of having one process that does most of the FPU usage, we may actually
be able to do much better than the preloading.

In particular, we may be able to keep track of which CPU the process ran
on last, and also per CPU keep track of which process' FP state that CPU
has.  For modern CPU's that don't destroy the FPU contents on save time,
that would allow us to do a lazy restore by just re-enabling the
existing FPU state - with no restore cost at all!

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h  |  1 -
 arch/x86/kernel/process_32.c | 20 --------------------
 arch/x86/kernel/process_64.c | 23 -----------------------
 arch/x86/kernel/traps.c      | 35 +++++++++++------------------------
 4 files changed, 11 insertions(+), 68 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 730d7becc97a..3521c2434344 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -30,7 +30,6 @@ extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
 extern int init_fpu(struct task_struct *child);
 extern void math_state_restore(void);
-extern void __math_state_restore(void);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc59067b..74aa377081f0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -293,23 +293,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
-	bool preload_fpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
-	/*
-	 * If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 */
-	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
 	__unlazy_fpu(prev_p);
 
-	/* we're going to use this soon, after a few expensive things */
-	if (preload_fpu)
-		prefetch(next->fpu.state);
-
 	/*
 	 * Reload esp0.
 	 */
@@ -348,11 +336,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
 		__switch_to_xtra(prev_p, next_p, tss);
 
-	/* If we're going to preload the fpu context, make sure clts
-	   is run while we're batching the cpu state updates. */
-	if (preload_fpu)
-		clts();
-
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
@@ -362,9 +345,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	arch_end_context_switch(next_p);
 
-	if (preload_fpu)
-		__math_state_restore();
-
 	/*
 	 * Restore %gs if needed (which is common)
 	 */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab8df33..789cc829f77f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -377,18 +377,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
-	bool preload_fpu;
-
-	/*
-	 * If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 */
-	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
-	/* we're going to use this soon, after a few expensive things */
-	if (preload_fpu)
-		prefetch(next->fpu.state);
 
 	/*
 	 * Reload esp0, LDT and the page table pointer:
@@ -421,10 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Must be after DS reload */
 	__unlazy_fpu(prev_p);
 
-	/* Make sure cpu is ready for new context */
-	if (preload_fpu)
-		clts();
-
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
@@ -483,13 +467,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
-	/*
-	 * Preload the FPU context, now that we've determined that the
-	 * task is likely to be using it. 
-	 */
-	if (preload_fpu)
-		__math_state_restore();
-
 	return prev_p;
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 326476d748b5..4536830b606f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -716,28 +716,6 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
 
-/*
- * __math_state_restore assumes that cr0.TS is already clear and the
- * fpu state is all ready for use.  Used during context switch.
- */
-void __math_state_restore(void)
-{
-	struct thread_info *thread = current_thread_info();
-	struct task_struct *tsk = thread->task;
-
-	/*
-	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-	 */
-	if (unlikely(restore_fpu_checking(tsk))) {
-		stts();
-		force_sig(SIGSEGV, tsk);
-		return;
-	}
-
-	__thread_set_has_fpu(thread);	/* clts in caller! */
-	tsk->fpu_counter++;
-}
-
 /*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
@@ -768,9 +746,18 @@ void math_state_restore(void)
 		local_irq_disable();
 	}
 
-	clts();				/* Allow maths ops (or we recurse) */
+	__thread_fpu_begin(thread);
 
-	__math_state_restore();
+	/*
+	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+	 */
+	if (unlikely(restore_fpu_checking(tsk))) {
+		__thread_fpu_end(thread);
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+	tsk->fpu_counter++;
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
-- 
cgit v1.2.3


From 70b5ef05d889e2be250fd1d963e89f7ca1dd1965 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 16 Feb 2012 19:11:15 -0800
Subject: i387: move AMD K7/K8 fpu fxsave/fxrstor workaround from save to
 restore

commit 4903062b5485f0e2c286a23b44c9b59d9b017d53 upstream.

The AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
pending.  In order to not leak FIP state from one process to another, we
need to do a floating point load after the fxsave of the old process,
and before the fxrstor of the new FPU state.  That resets the state to
the (uninteresting) kernel load, rather than some potentially sensitive
user information.

We used to do this directly after the FPU state save, but that is
actually very inconvenient, since it

 (a) corrupts what is potentially perfectly good FPU state that we might
     want to lazy avoid restoring later and

 (b) on x86-64 it resulted in a very annoying ordering constraint, where
     "__unlazy_fpu()" in the task switch needs to be delayed until after
     the DS segment has been reloaded just to get the new DS value.

Coupling it to the fxrstor instead of the fxsave automatically avoids
both of these issues, and also ensures that we only do it when actually
necessary (the FP state after a save may never actually get used).  It's
simply a much more natural place for the leaked state cleanup.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h  | 19 -------------------
 arch/x86/kernel/process_64.c |  5 ++---
 arch/x86/kernel/traps.c      | 14 ++++++++++++++
 3 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 3521c2434344..01b115d86770 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -211,15 +211,6 @@ static inline void fpu_fxsave(struct fpu *fpu)
 
 #endif	/* CONFIG_X86_64 */
 
-/* We need a safe address that is cheap to find and that is already
-   in L1 during context switch. The best choices are unfortunately
-   different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (kstat_cpu(0).cpustat.user)
-#endif
-
 /*
  * These must be called with preempt disabled
  */
@@ -243,16 +234,6 @@ static inline void fpu_save_init(struct fpu *fpu)
 
 	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
 		asm volatile("fnclex");
-
-	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
-	   is pending.  Clear the x87 state here by setting it to fixed
-	   values. safe_address is a random variable that should be in L1 */
-	alternative_input(
-		ASM_NOP8 ASM_NOP2,
-		"emms\n\t"	  	/* clear stack tags */
-		"fildl %P[addr]",	/* set F?P to defined value */
-		X86_FEATURE_FXSAVE_LEAK,
-		[addr] "m" (safe_address));
 }
 
 static inline void __save_init_fpu(struct task_struct *tsk)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 789cc829f77f..edb791c02c87 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -378,6 +378,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
 
+	__unlazy_fpu(prev_p);
+
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
@@ -406,9 +408,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	load_TLS(next, cpu);
 
-	/* Must be after DS reload */
-	__unlazy_fpu(prev_p);
-
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4536830b606f..1b8128a0f081 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -731,6 +731,10 @@ void math_state_restore(void)
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
 
+	/* We need a safe address that is cheap to find and that is already
+	   in L1. We just brought in "thread->task", so use that */
+#define safe_address (thread->task)
+
 	if (!tsk_used_math(tsk)) {
 		local_irq_enable();
 		/*
@@ -748,6 +752,16 @@ void math_state_restore(void)
 
 	__thread_fpu_begin(thread);
 
+	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+	   is pending.  Clear the x87 state here by setting it to fixed
+	   values. safe_address is a random variable that should be in L1 */
+	alternative_input(
+		ASM_NOP8 ASM_NOP2,
+		"emms\n\t"	  	/* clear stack tags */
+		"fildl %P[addr]",	/* set F?P to defined value */
+		X86_FEATURE_FXSAVE_LEAK,
+		[addr] "m" (safe_address));
+
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
-- 
cgit v1.2.3


From 0a9d89d976531bd5ea7fce618cee886c79b43e07 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 17 Feb 2012 21:48:54 -0800
Subject: i387: move TS_USEDFPU flag from thread_info to task_struct

commit f94edacf998516ac9d849f7bc6949a703977a7f3 upstream.

This moves the bit that indicates whether a thread has ownership of the
FPU from the TS_USEDFPU bit in thread_info->status to a word of its own
(called 'has_fpu') in task_struct->thread.has_fpu.

This fixes two independent bugs at the same time:

 - changing 'thread_info->status' from the scheduler causes nasty
   problems for the other users of that variable, since it is defined to
   be thread-synchronous (that's what the "TS_" part of the naming was
   supposed to indicate).

   So perfectly valid code could (and did) do

	ti->status |= TS_RESTORE_SIGMASK;

   and the compiler was free to do that as separate load, or and store
   instructions.  Which can cause problems with preemption, since a task
   switch could happen in between, and change the TS_USEDFPU bit. The
   change to TS_USEDFPU would be overwritten by the final store.

   In practice, this seldom happened, though, because the 'status' field
   was seldom used more than once, so gcc would generally tend to
   generate code that used a read-modify-write instruction and thus
   happened to avoid this problem - RMW instructions are naturally low
   fat and preemption-safe.

 - On x86-32, the current_thread_info() pointer would, during interrupts
   and softirqs, point to a *copy* of the real thread_info, because
   x86-32 uses %esp to calculate the thread_info address, and thus the
   separate irq (and softirq) stacks would cause these kinds of odd
   thread_info copy aliases.

   This is normally not a problem, since interrupts aren't supposed to
   look at thread information anyway (what thread is running at
   interrupt time really isn't very well-defined), but it confused the
   heck out of irq_fpu_usable() and the code that tried to squirrel
   away the FPU state.

   (It also caused untold confusion for us poor kernel developers).

It also turns out that using 'task_struct' is actually much more natural
for most of the call sites that care about the FPU state, since they
tend to work with the task struct for other reasons anyway (ie
scheduling).  And the FPU data that we are going to save/restore is
found there too.

Thanks to Arjan Van De Ven <arjan@linux.intel.com> for pointing us to
the %esp issue.

Cc: Arjan van de Ven <arjan@linux.intel.com>
Reported-and-tested-by: Raphael Prevost <raphael@buro.asia>
Acked-and-tested-by: Suresh Siddha <suresh.b.siddha@intel.com>
Tested-by: Peter Anvin <hpa@zytor.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h        | 44 +++++++++++++++++++-------------------
 arch/x86/include/asm/processor.h   |  1 +
 arch/x86/include/asm/thread_info.h |  2 --
 arch/x86/kernel/traps.c            | 11 +++++-----
 arch/x86/kernel/xsave.c            |  2 +-
 arch/x86/kvm/vmx.c                 |  2 +-
 6 files changed, 30 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 01b115d86770..f5376676f89c 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -264,21 +264,21 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
  * be preemption protection *and* they need to be
  * properly paired with the CR0.TS changes!
  */
-static inline int __thread_has_fpu(struct thread_info *ti)
+static inline int __thread_has_fpu(struct task_struct *tsk)
 {
-	return ti->status & TS_USEDFPU;
+	return tsk->thread.has_fpu;
 }
 
 /* Must be paired with an 'stts' after! */
-static inline void __thread_clear_has_fpu(struct thread_info *ti)
+static inline void __thread_clear_has_fpu(struct task_struct *tsk)
 {
-	ti->status &= ~TS_USEDFPU;
+	tsk->thread.has_fpu = 0;
 }
 
 /* Must be paired with a 'clts' before! */
-static inline void __thread_set_has_fpu(struct thread_info *ti)
+static inline void __thread_set_has_fpu(struct task_struct *tsk)
 {
-	ti->status |= TS_USEDFPU;
+	tsk->thread.has_fpu = 1;
 }
 
 /*
@@ -288,16 +288,16 @@ static inline void __thread_set_has_fpu(struct thread_info *ti)
  * These generally need preemption protection to work,
  * do try to avoid using these on their own.
  */
-static inline void __thread_fpu_end(struct thread_info *ti)
+static inline void __thread_fpu_end(struct task_struct *tsk)
 {
-	__thread_clear_has_fpu(ti);
+	__thread_clear_has_fpu(tsk);
 	stts();
 }
 
-static inline void __thread_fpu_begin(struct thread_info *ti)
+static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
 	clts();
-	__thread_set_has_fpu(ti);
+	__thread_set_has_fpu(tsk);
 }
 
 /*
@@ -308,21 +308,21 @@ extern int restore_i387_xstate(void __user *buf);
 
 static inline void __unlazy_fpu(struct task_struct *tsk)
 {
-	if (__thread_has_fpu(task_thread_info(tsk))) {
+	if (__thread_has_fpu(tsk)) {
 		__save_init_fpu(tsk);
-		__thread_fpu_end(task_thread_info(tsk));
+		__thread_fpu_end(tsk);
 	} else
 		tsk->fpu_counter = 0;
 }
 
 static inline void __clear_fpu(struct task_struct *tsk)
 {
-	if (__thread_has_fpu(task_thread_info(tsk))) {
+	if (__thread_has_fpu(tsk)) {
 		/* Ignore delayed exceptions from user space */
 		asm volatile("1: fwait\n"
 			     "2:\n"
 			     _ASM_EXTABLE(1b, 2b));
-		__thread_fpu_end(task_thread_info(tsk));
+		__thread_fpu_end(tsk);
 	}
 }
 
@@ -337,7 +337,7 @@ static inline void __clear_fpu(struct task_struct *tsk)
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
-	return !__thread_has_fpu(current_thread_info()) &&
+	return !__thread_has_fpu(current) &&
 		(read_cr0() & X86_CR0_TS);
 }
 
@@ -371,12 +371,12 @@ static inline bool irq_fpu_usable(void)
 
 static inline void kernel_fpu_begin(void)
 {
-	struct thread_info *me = current_thread_info();
+	struct task_struct *me = current;
 
 	WARN_ON_ONCE(!irq_fpu_usable());
 	preempt_disable();
 	if (__thread_has_fpu(me)) {
-		__save_init_fpu(me->task);
+		__save_init_fpu(me);
 		__thread_clear_has_fpu(me);
 		/* We do 'stts()' in kernel_fpu_end() */
 	} else
@@ -441,13 +441,13 @@ static inline void irq_ts_restore(int TS_state)
  */
 static inline int user_has_fpu(void)
 {
-	return __thread_has_fpu(current_thread_info());
+	return __thread_has_fpu(current);
 }
 
 static inline void user_fpu_end(void)
 {
 	preempt_disable();
-	__thread_fpu_end(current_thread_info());
+	__thread_fpu_end(current);
 	preempt_enable();
 }
 
@@ -455,7 +455,7 @@ static inline void user_fpu_begin(void)
 {
 	preempt_disable();
 	if (!user_has_fpu())
-		__thread_fpu_begin(current_thread_info());
+		__thread_fpu_begin(current);
 	preempt_enable();
 }
 
@@ -464,10 +464,10 @@ static inline void user_fpu_begin(void)
  */
 static inline void save_init_fpu(struct task_struct *tsk)
 {
-	WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk)));
+	WARN_ON_ONCE(!__thread_has_fpu(tsk));
 	preempt_disable();
 	__save_init_fpu(tsk);
-	__thread_fpu_end(task_thread_info(tsk));
+	__thread_fpu_end(tsk);
 	preempt_enable();
 }
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 219371546afd..5d9c61d0b270 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -454,6 +454,7 @@ struct thread_struct {
 	unsigned long		trap_no;
 	unsigned long		error_code;
 	/* floating point and extended processor state */
+	unsigned long		has_fpu;
 	struct fpu		fpu;
 #ifdef CONFIG_X86_32
 	/* Virtual 86 mode info */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1f2e61e28981..278d3d5f9062 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void)
  * ever touches our thread-synchronous status, so we don't
  * have to worry about atomic accesses.
  */
-#define TS_USEDFPU		0x0001	/* FPU was used by this task
-					   this quantum (SMP) */
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 #define TS_POLLING		0x0004	/* idle task polling need_resched,
 					   skip sending interrupt */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1b8128a0f081..5622d4e115d2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -728,12 +728,11 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
  */
 void math_state_restore(void)
 {
-	struct thread_info *thread = current_thread_info();
-	struct task_struct *tsk = thread->task;
+	struct task_struct *tsk = current;
 
 	/* We need a safe address that is cheap to find and that is already
-	   in L1. We just brought in "thread->task", so use that */
-#define safe_address (thread->task)
+	   in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
+#define safe_address (tsk->thread.has_fpu)
 
 	if (!tsk_used_math(tsk)) {
 		local_irq_enable();
@@ -750,7 +749,7 @@ void math_state_restore(void)
 		local_irq_disable();
 	}
 
-	__thread_fpu_begin(thread);
+	__thread_fpu_begin(tsk);
 
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
@@ -766,7 +765,7 @@ void math_state_restore(void)
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
 	if (unlikely(restore_fpu_checking(tsk))) {
-		__thread_fpu_end(thread);
+		__thread_fpu_end(tsk);
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a0bcd0dbc951..711091114119 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
 	if (!fx)
 		return;
 
-	BUG_ON(__thread_has_fpu(task_thread_info(tsk)));
+	BUG_ON(__thread_has_fpu(tsk));
 
 	xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6da2baea08c8..2ad060acc445 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -948,7 +948,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
-	if (__thread_has_fpu(current_thread_info()))
+	if (__thread_has_fpu(current))
 		clts();
 	load_gdt(&__get_cpu_var(host_gdt));
 }
-- 
cgit v1.2.3


From f4def3f88dc57648d1603656f1ffdf498bfce1ee Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 18 Feb 2012 12:56:35 -0800
Subject: i387: re-introduce FPU state preloading at context switch time

commit 34ddc81a230b15c0e345b6b253049db731499f7e upstream.

After all the FPU state cleanups and finally finding the problem that
caused all our FPU save/restore problems, this re-introduces the
preloading of FPU state that was removed in commit b3b0870ef3ff ("i387:
do not preload FPU state at task switch time").

However, instead of simply reverting the removal, this reimplements
preloading with several fixes, most notably

 - properly abstracted as a true FPU state switch, rather than as
   open-coded save and restore with various hacks.

   In particular, implementing it as a proper FPU state switch allows us
   to optimize the CR0.TS flag accesses: there is no reason to set the
   TS bit only to then almost immediately clear it again.  CR0 accesses
   are quite slow and expensive, don't flip the bit back and forth for
   no good reason.

 - Make sure that the same model works for both x86-32 and x86-64, so
   that there are no gratuitous differences between the two due to the
   way they save and restore segment state differently due to
   architectural differences that really don't matter to the FPU state.

 - Avoid exposing the "preload" state to the context switch routines,
   and in particular allow the concept of lazy state restore: if nothing
   else has used the FPU in the meantime, and the process is still on
   the same CPU, we can avoid restoring state from memory entirely, just
   re-expose the state that is still in the FPU unit.

   That optimized lazy restore isn't actually implemented here, but the
   infrastructure is set up for it.  Of course, older CPU's that use
   'fnsave' to save the state cannot take advantage of this, since the
   state saving also trashes the state.

In other words, there is now an actual _design_ to the FPU state saving,
rather than just random historical baggage.  Hopefully it's easier to
follow as a result.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i387.h  | 110 ++++++++++++++++++++++++++++++++++++-------
 arch/x86/kernel/process_32.c |   5 +-
 arch/x86/kernel/process_64.c |   5 +-
 arch/x86/kernel/traps.c      |  55 +++++++++++++---------
 4 files changed, 133 insertions(+), 42 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index f5376676f89c..a850b4d8d14d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
 extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
 extern int init_fpu(struct task_struct *child);
+extern void __math_state_restore(struct task_struct *);
 extern void math_state_restore(void);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #endif	/* CONFIG_X86_64 */
 
 /*
- * These must be called with preempt disabled
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact.
  */
-static inline void fpu_save_init(struct fpu *fpu)
+static inline int fpu_save_init(struct fpu *fpu)
 {
 	if (use_xsave()) {
 		fpu_xsave(fpu);
@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
 		 * xsave header may indicate the init state of the FP.
 		 */
 		if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
-			return;
+			return 1;
 	} else if (use_fxsr()) {
 		fpu_fxsave(fpu);
 	} else {
 		asm volatile("fnsave %[fx]; fwait"
 			     : [fx] "=m" (fpu->state->fsave));
-		return;
+		return 0;
 	}
 
-	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
+	/*
+	 * If exceptions are pending, we need to clear them so
+	 * that we don't randomly get exceptions later.
+	 *
+	 * FIXME! Is this perhaps only true for the old-style
+	 * irq13 case? Maybe we could leave the x87 state
+	 * intact otherwise?
+	 */
+	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
 		asm volatile("fnclex");
+		return 0;
+	}
+	return 1;
 }
 
-static inline void __save_init_fpu(struct task_struct *tsk)
+static inline int __save_init_fpu(struct task_struct *tsk)
 {
-	fpu_save_init(&tsk->thread.fpu);
+	return fpu_save_init(&tsk->thread.fpu);
 }
 
 static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
 }
 
 /*
- * Signal frame handlers...
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ *  - switch_fpu_prepare() saves the old state and
+ *    sets the new state of the CR0.TS bit. This is
+ *    done within the context of the old process.
+ *
+ *  - switch_fpu_finish() restores the new state as
+ *    necessary.
  */
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
+typedef struct { int preload; } fpu_switch_t;
+
+/*
+ * FIXME! We could do a totally lazy restore, but we need to
+ * add a per-cpu "this was the task that last touched the FPU
+ * on this CPU" variable, and the task needs to have a "I last
+ * touched the FPU on this CPU" and check them.
+ *
+ * We don't do that yet, so "fpu_lazy_restore()" always returns
+ * false, but some day..
+ */
+#define fpu_lazy_restore(tsk) (0)
+#define fpu_lazy_state_intact(tsk) do { } while (0)
+
+static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
+{
+	fpu_switch_t fpu;
+
+	fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+	if (__thread_has_fpu(old)) {
+		if (__save_init_fpu(old))
+			fpu_lazy_state_intact(old);
+		__thread_clear_has_fpu(old);
+		old->fpu_counter++;
+
+		/* Don't change CR0.TS if we just switch! */
+		if (fpu.preload) {
+			__thread_set_has_fpu(new);
+			prefetch(new->thread.fpu.state);
+		} else
+			stts();
+	} else {
+		old->fpu_counter = 0;
+		if (fpu.preload) {
+			if (fpu_lazy_restore(new))
+				fpu.preload = 0;
+			else
+				prefetch(new->thread.fpu.state);
+			__thread_fpu_begin(new);
+		}
+	}
+	return fpu;
+}
 
-static inline void __unlazy_fpu(struct task_struct *tsk)
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
 {
-	if (__thread_has_fpu(tsk)) {
-		__save_init_fpu(tsk);
-		__thread_fpu_end(tsk);
-	} else
-		tsk->fpu_counter = 0;
+	if (fpu.preload)
+		__math_state_restore(new);
 }
 
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387_xstate(void __user *buf);
+extern int restore_i387_xstate(void __user *buf);
+
 static inline void __clear_fpu(struct task_struct *tsk)
 {
 	if (__thread_has_fpu(tsk)) {
@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk)
 static inline void unlazy_fpu(struct task_struct *tsk)
 {
 	preempt_disable();
-	__unlazy_fpu(tsk);
+	if (__thread_has_fpu(tsk)) {
+		__save_init_fpu(tsk);
+		__thread_fpu_end(tsk);
+	} else
+		tsk->fpu_counter = 0;
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 74aa377081f0..fcdb1b34aa1c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -293,10 +293,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	fpu_switch_t fpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
-	__unlazy_fpu(prev_p);
+	fpu = switch_fpu_prepare(prev_p, next_p);
 
 	/*
 	 * Reload esp0.
@@ -351,6 +352,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		lazy_load_gs(next->gs);
 
+	switch_fpu_finish(next_p, fpu);
+
 	percpu_write(current_task, next_p);
 
 	return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index edb791c02c87..b01898d2744b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -377,8 +377,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
+	fpu_switch_t fpu;
 
-	__unlazy_fpu(prev_p);
+	fpu = switch_fpu_prepare(prev_p, next_p);
 
 	/*
 	 * Reload esp0, LDT and the page table pointer:
@@ -448,6 +449,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 	prev->gsindex = gsindex;
 
+	switch_fpu_finish(next_p, fpu);
+
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5622d4e115d2..1b26e01047b5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -716,6 +716,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
 
+/*
+ * This gets called with the process already owning the
+ * FPU state, and with CR0.TS cleared. It just needs to
+ * restore the FPU register state.
+ */
+void __math_state_restore(struct task_struct *tsk)
+{
+	/* We need a safe address that is cheap to find and that is already
+	   in L1. We've just brought in "tsk->thread.has_fpu", so use that */
+#define safe_address (tsk->thread.has_fpu)
+
+	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+	   is pending.  Clear the x87 state here by setting it to fixed
+	   values. safe_address is a random variable that should be in L1 */
+	alternative_input(
+		ASM_NOP8 ASM_NOP2,
+		"emms\n\t"	  	/* clear stack tags */
+		"fildl %P[addr]",	/* set F?P to defined value */
+		X86_FEATURE_FXSAVE_LEAK,
+		[addr] "m" (safe_address));
+
+	/*
+	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+	 */
+	if (unlikely(restore_fpu_checking(tsk))) {
+		__thread_fpu_end(tsk);
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+}
+
 /*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
@@ -730,10 +761,6 @@ void math_state_restore(void)
 {
 	struct task_struct *tsk = current;
 
-	/* We need a safe address that is cheap to find and that is already
-	   in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
-#define safe_address (tsk->thread.has_fpu)
-
 	if (!tsk_used_math(tsk)) {
 		local_irq_enable();
 		/*
@@ -750,25 +777,7 @@ void math_state_restore(void)
 	}
 
 	__thread_fpu_begin(tsk);
-
-	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
-	   is pending.  Clear the x87 state here by setting it to fixed
-	   values. safe_address is a random variable that should be in L1 */
-	alternative_input(
-		ASM_NOP8 ASM_NOP2,
-		"emms\n\t"	  	/* clear stack tags */
-		"fildl %P[addr]",	/* set F?P to defined value */
-		X86_FEATURE_FXSAVE_LEAK,
-		[addr] "m" (safe_address));
-
-	/*
-	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-	 */
-	if (unlikely(restore_fpu_checking(tsk))) {
-		__thread_fpu_end(tsk);
-		force_sig(SIGSEGV, tsk);
-		return;
-	}
+	__math_state_restore(tsk);
 
 	tsk->fpu_counter++;
 }
-- 
cgit v1.2.3


From 534b465e1cf6e3bbceebbc7866a204107b83eb95 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Wed, 8 Feb 2012 20:52:29 +0100
Subject: x86/amd: Fix L1i and L2 cache sharing information for AMD family 15h
 processors

commit 32c3233885eb10ac9cb9410f2f8cd64b8df2b2a1 upstream.

For L1 instruction cache and L2 cache the shared CPU information
is wrong. On current AMD family 15h CPUs those caches are shared
between both cores of a compute unit.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=42607

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Petkov Borislav <Borislav.Petkov@amd.com>
Cc: Dave Jones <davej@redhat.com>
Link: http://lkml.kernel.org/r/20120208195229.GA17523@alberich.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 44 ++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c105c533ed94..fde44284cf21 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -330,8 +330,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
-					int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 {
 	static struct amd_l3_cache *__cpuinitdata l3_caches;
 	int node;
@@ -748,14 +747,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 #define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
 
 #ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+
+static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
 {
-	struct _cpuid4_info	*this_leaf, *sibling_leaf;
-	unsigned long num_threads_sharing;
-	int index_msb, i, sibling;
+	struct _cpuid4_info *this_leaf;
+	int ret, i, sibling;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+	ret = 0;
+	if (index == 3) {
+		ret = 1;
 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 			if (!per_cpu(ici_cpuid4_info, i))
 				continue;
@@ -766,8 +767,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 				set_bit(sibling, this_leaf->shared_cpu_map);
 			}
 		}
-		return;
+	} else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
+		ret = 1;
+		for_each_cpu(i, cpu_sibling_mask(cpu)) {
+			if (!per_cpu(ici_cpuid4_info, i))
+				continue;
+			this_leaf = CPUID4_INFO_IDX(i, index);
+			for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+				if (!cpu_online(sibling))
+					continue;
+				set_bit(sibling, this_leaf->shared_cpu_map);
+			}
+		}
 	}
+
+	return ret;
+}
+
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+	struct _cpuid4_info *this_leaf, *sibling_leaf;
+	unsigned long num_threads_sharing;
+	int index_msb, i;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		if (cache_shared_amd_cpu_map_setup(cpu, index))
+			return;
+	}
+
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 
-- 
cgit v1.2.3


From 9eb9e47632735bb3bb3da343663ea10a6954ef31 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 27 Feb 2012 10:01:52 +0100
Subject: compat: fix compile breakage on s390

commit 048cd4e51d24ebf7f3552226d03c769d6ad91658 upstream.

The new is_compat_task() define for the !COMPAT case in
include/linux/compat.h conflicts with a similar define in
arch/s390/include/asm/compat.h.

This is the minimal patch which fixes the build issues.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/compat.h | 7 -------
 arch/s390/kernel/process.c     | 1 -
 arch/s390/kernel/ptrace.c      | 2 +-
 arch/s390/kernel/setup.c       | 2 +-
 arch/s390/mm/fault.c           | 1 -
 arch/s390/mm/mmap.c            | 2 +-
 6 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index da359ca6fe55..f7b74bcce10c 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -172,13 +172,6 @@ static inline int is_compat_task(void)
 	return is_32bit_task();
 }
 
-#else
-
-static inline int is_compat_task(void)
-{
-	return 0;
-}
-
 #endif
 
 static inline void __user *arch_compat_alloc_user_space(long len)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 541a7509faeb..abdc2b1063ed 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -28,7 +28,6 @@
 #include <asm/irq.h>
 #include <asm/timer.h>
 #include <asm/nmi.h>
-#include <asm/compat.h>
 #include <asm/smp.h>
 #include "entry.h"
 
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 5804cfa7cba7..5c55466e78e6 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -20,8 +20,8 @@
 #include <linux/regset.h>
 #include <linux/tracehook.h>
 #include <linux/seccomp.h>
+#include <linux/compat.h>
 #include <trace/syscall.h>
-#include <asm/compat.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 0c35dee10b00..7547f57e2913 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -42,6 +42,7 @@
 #include <linux/reboot.h>
 #include <linux/topology.h>
 #include <linux/ftrace.h>
+#include <linux/compat.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -55,7 +56,6 @@
 #include <asm/ptrace.h>
 #include <asm/sections.h>
 #include <asm/ebcdic.h>
-#include <asm/compat.h>
 #include <asm/kvm_virtio.h>
 
 long psw_kernel_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index fe103e891e7a..d814f7914003 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -36,7 +36,6 @@
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
-#include <asm/compat.h>
 #include "../kernel/entry.h"
 
 #ifndef CONFIG_64BIT
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c9a9f7f18188..c0cf9ceb3833 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -28,8 +28,8 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/compat.h>
 #include <asm/pgalloc.h>
-#include <asm/compat.h>
 
 static unsigned long stack_maxrandom_size(void)
 {
-- 
cgit v1.2.3


From fd3fb91a55d4a8f7eff63b26631a4a34c4ba76d0 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Mon, 27 Feb 2012 17:28:02 +0100
Subject: ARM: LPC32xx: serial.c: HW bug workaround

commit 2707208ee8a80dbbd5426f5aa1a934f766825bb5 upstream.

This patch fixes a HW bug by flushing RX FIFOs of the UARTs on init. It was
ported from NXP's git.lpclinux.com tree.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-lpc32xx/serial.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-lpc32xx/serial.c b/arch/arm/mach-lpc32xx/serial.c
index 429cfdbb2b3d..1a3fd4ce0469 100644
--- a/arch/arm/mach-lpc32xx/serial.c
+++ b/arch/arm/mach-lpc32xx/serial.c
@@ -88,6 +88,7 @@ struct uartinit {
 	char *uart_ck_name;
 	u32 ck_mode_mask;
 	void __iomem *pdiv_clk_reg;
+	resource_size_t mapbase;
 };
 
 static struct uartinit uartinit_data[] __initdata = {
@@ -97,6 +98,7 @@ static struct uartinit uartinit_data[] __initdata = {
 		.ck_mode_mask =
 			LPC32XX_UART_CLKMODE_LOAD(LPC32XX_UART_CLKMODE_ON, 5),
 		.pdiv_clk_reg = LPC32XX_CLKPWR_UART5_CLK_CTRL,
+		.mapbase = LPC32XX_UART5_BASE,
 	},
 #endif
 #ifdef CONFIG_ARCH_LPC32XX_UART3_SELECT
@@ -105,6 +107,7 @@ static struct uartinit uartinit_data[] __initdata = {
 		.ck_mode_mask =
 			LPC32XX_UART_CLKMODE_LOAD(LPC32XX_UART_CLKMODE_ON, 3),
 		.pdiv_clk_reg = LPC32XX_CLKPWR_UART3_CLK_CTRL,
+		.mapbase = LPC32XX_UART3_BASE,
 	},
 #endif
 #ifdef CONFIG_ARCH_LPC32XX_UART4_SELECT
@@ -113,6 +116,7 @@ static struct uartinit uartinit_data[] __initdata = {
 		.ck_mode_mask =
 			LPC32XX_UART_CLKMODE_LOAD(LPC32XX_UART_CLKMODE_ON, 4),
 		.pdiv_clk_reg = LPC32XX_CLKPWR_UART4_CLK_CTRL,
+		.mapbase = LPC32XX_UART4_BASE,
 	},
 #endif
 #ifdef CONFIG_ARCH_LPC32XX_UART6_SELECT
@@ -121,6 +125,7 @@ static struct uartinit uartinit_data[] __initdata = {
 		.ck_mode_mask =
 			LPC32XX_UART_CLKMODE_LOAD(LPC32XX_UART_CLKMODE_ON, 6),
 		.pdiv_clk_reg = LPC32XX_CLKPWR_UART6_CLK_CTRL,
+		.mapbase = LPC32XX_UART6_BASE,
 	},
 #endif
 };
@@ -165,6 +170,19 @@ void __init lpc32xx_serial_init(void)
 
 		/* pre-UART clock divider set to 1 */
 		__raw_writel(0x0101, uartinit_data[i].pdiv_clk_reg);
+
+		/*
+		 * Force a flush of the RX FIFOs to work around a
+		 * HW bug
+		 */
+		puart = uartinit_data[i].mapbase;
+		__raw_writel(0xC1, LPC32XX_UART_IIR_FCR(puart));
+		__raw_writel(0x00, LPC32XX_UART_DLL_FIFO(puart));
+		j = LPC32XX_SUART_FIFO_SIZE;
+		while (j--)
+			tmp = __raw_readl(
+				LPC32XX_UART_DLL_FIFO(puart));
+		__raw_writel(0, LPC32XX_UART_IIR_FCR(puart));
 	}
 
 	/* This needs to be done after all UART clocks are setup */
-- 
cgit v1.2.3


From 280e54b164802662ff59ba9cc24b66595f9a6bb9 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Mon, 27 Feb 2012 17:28:03 +0100
Subject: ARM: LPC32xx: serial.c: Fixed loop limit

commit ff424aa4c89d19082e8ae5a3351006bc8a4cd91b upstream.

This patch fixes a wrong loop limit on UART init.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-lpc32xx/serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-lpc32xx/serial.c b/arch/arm/mach-lpc32xx/serial.c
index 1a3fd4ce0469..f2735281616a 100644
--- a/arch/arm/mach-lpc32xx/serial.c
+++ b/arch/arm/mach-lpc32xx/serial.c
@@ -187,7 +187,7 @@ void __init lpc32xx_serial_init(void)
 
 	/* This needs to be done after all UART clocks are setup */
 	__raw_writel(clkmodes, LPC32XX_UARTCTL_CLKMODE);
-	for (i = 0; i < ARRAY_SIZE(uartinit_data) - 1; i++) {
+	for (i = 0; i < ARRAY_SIZE(uartinit_data); i++) {
 		/* Force a flush of the RX FIFOs to work around a HW bug */
 		puart = serial_std_platform_data[i].mapbase;
 		__raw_writel(0xC1, LPC32XX_UART_IIR_FCR(puart));
-- 
cgit v1.2.3


From 19399c348f59bb2aa2321efe6e29ece9797717f7 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Mon, 27 Feb 2012 17:28:02 +0100
Subject: ARM: LPC32xx: irq.c: Clear latched event

commit 94ed7830cba4dce57b18a2926b5d826bfd184bd6 upstream.

This patch fixes the wakeup disable function by clearing latched events.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-lpc32xx/irq.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c
index 4eae566dfdc7..965b1d6959e7 100644
--- a/arch/arm/mach-lpc32xx/irq.c
+++ b/arch/arm/mach-lpc32xx/irq.c
@@ -305,9 +305,18 @@ static int lpc32xx_irq_wake(struct irq_data *d, unsigned int state)
 
 		if (state)
 			eventreg |= lpc32xx_events[d->irq].mask;
-		else
+		else {
 			eventreg &= ~lpc32xx_events[d->irq].mask;
 
+			/*
+			 * When disabling the wakeup, clear the latched
+			 * event
+			 */
+			__raw_writel(lpc32xx_events[d->irq].mask,
+				lpc32xx_events[d->irq].
+				event_group->rawstat_reg);
+		}
+
 		__raw_writel(eventreg,
 			lpc32xx_events[d->irq].event_group->enab_reg);
 
-- 
cgit v1.2.3


From 53fcf6ba805ffd82171a4f9459c2767ce0b3b580 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Mon, 27 Feb 2012 17:28:02 +0100
Subject: ARM: LPC32xx: Fix interrupt controller init

commit 35dd0a75d4a382e7f769dd0277732e7aa5235718 upstream.

This patch fixes the initialization of the interrupt controller of the LPC32xx
by correctly setting up SIC1 and SIC2 instead of (wrongly) using the same value
as for the Main Interrupt Controller (MIC).

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-lpc32xx/irq.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c
index 965b1d6959e7..198de470ca99 100644
--- a/arch/arm/mach-lpc32xx/irq.c
+++ b/arch/arm/mach-lpc32xx/irq.c
@@ -389,13 +389,15 @@ void __init lpc32xx_init_irq(void)
 
 	/* Setup SIC1 */
 	__raw_writel(0, LPC32XX_INTC_MASK(LPC32XX_SIC1_BASE));
-	__raw_writel(MIC_APR_DEFAULT, LPC32XX_INTC_POLAR(LPC32XX_SIC1_BASE));
-	__raw_writel(MIC_ATR_DEFAULT, LPC32XX_INTC_ACT_TYPE(LPC32XX_SIC1_BASE));
+	__raw_writel(SIC1_APR_DEFAULT, LPC32XX_INTC_POLAR(LPC32XX_SIC1_BASE));
+	__raw_writel(SIC1_ATR_DEFAULT,
+				LPC32XX_INTC_ACT_TYPE(LPC32XX_SIC1_BASE));
 
 	/* Setup SIC2 */
 	__raw_writel(0, LPC32XX_INTC_MASK(LPC32XX_SIC2_BASE));
-	__raw_writel(MIC_APR_DEFAULT, LPC32XX_INTC_POLAR(LPC32XX_SIC2_BASE));
-	__raw_writel(MIC_ATR_DEFAULT, LPC32XX_INTC_ACT_TYPE(LPC32XX_SIC2_BASE));
+	__raw_writel(SIC2_APR_DEFAULT, LPC32XX_INTC_POLAR(LPC32XX_SIC2_BASE));
+	__raw_writel(SIC2_ATR_DEFAULT,
+				LPC32XX_INTC_ACT_TYPE(LPC32XX_SIC2_BASE));
 
 	/* Configure supported IRQ's */
 	for (i = 0; i < NR_IRQS; i++) {
-- 
cgit v1.2.3


From bb3b47ceebd8a5b8a8684fde8e2ba0b8f0127d05 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Mon, 27 Feb 2012 17:28:02 +0100
Subject: ARM: LPC32xx: Fix irq on GPI_28

commit f6737055c1c432a9628a9a731f9881ad8e0a9eee upstream.

The GPI_28 IRQ was not registered properly. The registration of
IRQ_LPC32XX_GPI_28 was added and the (wrong) IRQ_LPC32XX_GPI_11 at
LPC32XX_SIC1_IRQ(4) was replaced by IRQ_LPC32XX_GPI_28 (see manual of
LPC32xx / interrupt controller).

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-lpc32xx/include/mach/irqs.h | 2 +-
 arch/arm/mach-lpc32xx/irq.c               | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-lpc32xx/include/mach/irqs.h b/arch/arm/mach-lpc32xx/include/mach/irqs.h
index 2667f52e3b04..9e3b90df32e1 100644
--- a/arch/arm/mach-lpc32xx/include/mach/irqs.h
+++ b/arch/arm/mach-lpc32xx/include/mach/irqs.h
@@ -61,7 +61,7 @@
  */
 #define IRQ_LPC32XX_JTAG_COMM_TX	LPC32XX_SIC1_IRQ(1)
 #define IRQ_LPC32XX_JTAG_COMM_RX	LPC32XX_SIC1_IRQ(2)
-#define IRQ_LPC32XX_GPI_11		LPC32XX_SIC1_IRQ(4)
+#define IRQ_LPC32XX_GPI_28		LPC32XX_SIC1_IRQ(4)
 #define IRQ_LPC32XX_TS_P		LPC32XX_SIC1_IRQ(6)
 #define IRQ_LPC32XX_TS_IRQ		LPC32XX_SIC1_IRQ(7)
 #define IRQ_LPC32XX_TS_AUX		LPC32XX_SIC1_IRQ(8)
diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c
index 198de470ca99..c74de01ab5b6 100644
--- a/arch/arm/mach-lpc32xx/irq.c
+++ b/arch/arm/mach-lpc32xx/irq.c
@@ -118,6 +118,10 @@ static const struct lpc32xx_event_info lpc32xx_events[NR_IRQS] = {
 		.event_group = &lpc32xx_event_pin_regs,
 		.mask = LPC32XX_CLKPWR_EXTSRC_GPI_06_BIT,
 	},
+	[IRQ_LPC32XX_GPI_28] = {
+		.event_group = &lpc32xx_event_pin_regs,
+		.mask = LPC32XX_CLKPWR_EXTSRC_GPI_28_BIT,
+	},
 	[IRQ_LPC32XX_GPIO_00] = {
 		.event_group = &lpc32xx_event_int_regs,
 		.mask = LPC32XX_CLKPWR_INTSRC_GPIO_00_BIT,
-- 
cgit v1.2.3


From a8e161f4c38c371974319359165d544604e06b78 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 24 Feb 2012 18:01:27 +0100
Subject: S390: KEYS: Enable the compat keyctl wrapper on s390x

commit 1d057720609ed052a6371fe1d53300e5e6328e94 upstream.

Enable the compat keyctl wrapper on s390x so that 32-bit s390 userspace can
call the keyctl() syscall.

There's an s390x assembly wrapper that truncates all the register values to
32-bits and this then calls compat_sys_keyctl() - but the latter only exists if
CONFIG_KEYS_COMPAT is enabled, and the s390 Kconfig doesn't enable it.

Without this patch, 32-bit calls to the keyctl() syscall are given an ENOSYS
error:

	[root@devel4 ~]# keyctl show
	Session Keyring
	-3: key inaccessible (Function not implemented)

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: dan@danny.cz
Cc: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c03fef7a9c22..9b922b12e9f0 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -228,6 +228,9 @@ config COMPAT
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
+config KEYS_COMPAT
+	def_bool y if COMPAT && KEYS
+
 config AUDIT_ARCH
 	def_bool y
 
-- 
cgit v1.2.3


From 0cd0b02bfce1497e21e11b33311fd4da6d10ea55 Mon Sep 17 00:00:00 2001
From: Gusakov Andrey <dron0gus@gmail.com>
Date: Sat, 3 Mar 2012 07:32:36 +0900
Subject: ARM: S3C24XX: DMA resume regression fix

commit e39d40c65dfd8390b50c03482ae9e289b8a8f351 upstream.

s3c2410_dma_suspend suspends channels from 0 to dma_channels.
s3c2410_dma_resume resumes channels in reverse order. So
pointer should be decremented instead of being incremented.

Signed-off-by: Gusakov Andrey <dron0gus@gmail.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/plat-s3c24xx/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 539bd0e3defd..0719f49defb2 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -1249,7 +1249,7 @@ static void s3c2410_dma_resume(void)
 	struct s3c2410_dma_chan *cp = s3c2410_chans + dma_channels - 1;
 	int channel;
 
-	for (channel = dma_channels - 1; channel >= 0; cp++, channel--)
+	for (channel = dma_channels - 1; channel >= 0; cp--, channel--)
 		s3c2410_dma_resume_chan(cp);
 }
 
-- 
cgit v1.2.3


From f148e8a6cf51419b4dc7c99941054ed3ddad5a87 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 5 Mar 2012 14:59:19 -0800
Subject: alpha: fix 32/64-bit bug in futex support

commit 62aca403657fe30e5235c5331e9871e676d9ea0a upstream.

Michael Cree said:

: : I have noticed some user space problems (pulseaudio crashes in pthread
: : code, glibc/nptl test suite failures, java compiler freezes on SMP alpha
: : systems) that arise when using a 2.6.39 or later kernel on Alpha.
: : Bisecting between 2.6.38 and 2.6.39 (using glibc/nptl test suite as
: : criterion for good/bad kernel) eventually leads to:
: :
: : 8d7718aa082aaf30a0b4989e1f04858952f941bc is the first bad commit
: : commit 8d7718aa082aaf30a0b4989e1f04858952f941bc
: : Author: Michel Lespinasse <walken@google.com>
: : Date:   Thu Mar 10 18:50:58 2011 -0800
: :
: :     futex: Sanitize futex ops argument types
: :
: :     Change futex_atomic_op_inuser and futex_atomic_cmpxchg_inatomic
: :     prototypes to use u32 types for the futex as this is the data type the
: :     futex core code uses all over the place.
: :
: : Looking at the commit I see there is a change of the uaddr argument in
: : the Alpha architecture specific code for futexes from int to u32, but I
: : don't see why this should cause a problem.

Richard Henderson said:

: futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
:                               u32 oldval, u32 newval)
: ...
:         :       "r"(uaddr), "r"((long)oldval), "r"(newval)
:
:
: There is no 32-bit compare instruction.  These are implemented by
: consistently extending the values to a 64-bit type.  Since the
: load instruction sign-extends, we want to sign-extend the other
: quantity as well (despite the fact it's logically unsigned).
:
: So:
:
: -        :       "r"(uaddr), "r"((long)oldval), "r"(newval)
: +        :       "r"(uaddr), "r"((long)(int)oldval), "r"(newval)
:
: should do the trick.

Michael said:

: This fixes the glibc test suite failures and the pulseaudio related
: crashes, but it does not fix the java compiiler lockups that I was (and
: are still) observing.  That is some other problem.

Reported-by: Michael Cree <mcree@orcon.net.nz>
Tested-by: Michael Cree <mcree@orcon.net.nz>
Acked-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/include/asm/futex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index e8a761aee088..f939794363ac 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -108,7 +108,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	"	lda	$31,3b-2b(%0)\n"
 	"	.previous\n"
 	:	"+r"(ret), "=&r"(prev), "=&r"(cmp)
-	:	"r"(uaddr), "r"((long)oldval), "r"(newval)
+	:	"r"(uaddr), "r"((long)(int)oldval), "r"(newval)
 	:	"memory");
 
 	*uval = prev;
-- 
cgit v1.2.3


From 896903eb8ed86754935bb65b377e69dc048eac34 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@gmail.com>
Date: Fri, 3 Feb 2012 15:37:14 -0800
Subject: avr32: select generic atomic64_t support

commit 31e0017e6f6fb5cfdfaf932c1f98c9bef8d57688 upstream.

Enable use of the generic atomic64 implementation on AVR32 platforms.
Without this the kernel fails to build as the architecture does not
provide its version.

Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/avr32/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e9d689b7c833..c614484f0fca 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -8,6 +8,7 @@ config AVR32
 	select HAVE_KPROBES
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
+	select GENERIC_ATOMIC64
 	select HARDIRQS_SW_RESEND
 	select GENERIC_IRQ_SHOW
 	help
-- 
cgit v1.2.3


From 90bb7234900ec12f36028c96fdb320a36f08d383 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 8 Feb 2012 15:52:47 +0100
Subject: ARM: orion: Fix USB phy for orion5x.

commit 72053353583230952c4b187e110e9da00dfc3afb upstream.

The patch "ARM: orion: Consolidate USB platform setup code.", commit
4fcd3f374a928081d391cd9a570afe3b2c692fdc broke USB on TS-7800 and
other orion5x boards, because the wrong type of PHY was being passed
to the EHCI driver in the platform data. Orion5x needs EHCI_PHY_ORION
and all the others want EHCI_PHY_NA.

Allow the mach- code to tell the generic plat-orion code which USB PHY
enum to place into the platform data.

Version 2: Rebase to v3.3-rc2.

Reported-by: Ambroz Bizjak <ambrop7@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Ambroz Bizjak <ambrop7@gmail.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-dove/common.c               | 3 ++-
 arch/arm/mach-kirkwood/common.c           | 3 ++-
 arch/arm/mach-mv78xx0/common.c            | 3 ++-
 arch/arm/mach-orion5x/common.c            | 4 +++-
 arch/arm/plat-orion/common.c              | 9 ++++-----
 arch/arm/plat-orion/include/plat/common.h | 3 ++-
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index cf7e5985eebf..46c044986294 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -31,6 +31,7 @@
 #include <asm/mach/arch.h>
 #include <linux/irq.h>
 #include <plat/time.h>
+#include <plat/ehci-orion.h>
 #include <plat/common.h>
 #include "common.h"
 
@@ -74,7 +75,7 @@ void __init dove_map_io(void)
 void __init dove_ehci0_init(void)
 {
 	orion_ehci_init(&dove_mbus_dram_info,
-			DOVE_USB0_PHYS_BASE, IRQ_DOVE_USB0);
+			DOVE_USB0_PHYS_BASE, IRQ_DOVE_USB0, EHCI_PHY_NA);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index f3248cfbe51d..c5dbbb35e0b1 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -28,6 +28,7 @@
 #include <plat/cache-feroceon-l2.h>
 #include <plat/mvsdio.h>
 #include <plat/orion_nand.h>
+#include <plat/ehci-orion.h>
 #include <plat/common.h>
 #include <plat/time.h>
 #include "common.h"
@@ -74,7 +75,7 @@ void __init kirkwood_ehci_init(void)
 {
 	kirkwood_clk_ctrl |= CGC_USB0;
 	orion_ehci_init(&kirkwood_mbus_dram_info,
-			USB_PHYS_BASE, IRQ_KIRKWOOD_USB);
+			USB_PHYS_BASE, IRQ_KIRKWOOD_USB, EHCI_PHY_NA);
 }
 
 
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 23d3980ef59d..d90e244e05e7 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -20,6 +20,7 @@
 #include <mach/mv78xx0.h>
 #include <mach/bridge-regs.h>
 #include <plat/cache-feroceon-l2.h>
+#include <plat/ehci-orion.h>
 #include <plat/orion_nand.h>
 #include <plat/time.h>
 #include <plat/common.h>
@@ -170,7 +171,7 @@ void __init mv78xx0_map_io(void)
 void __init mv78xx0_ehci0_init(void)
 {
 	orion_ehci_init(&mv78xx0_mbus_dram_info,
-			USB0_PHYS_BASE, IRQ_MV78XX0_USB_0);
+			USB0_PHYS_BASE, IRQ_MV78XX0_USB_0, EHCI_PHY_NA);
 }
 
 
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 0ab531d047fc..8a98da0b3f8e 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -29,6 +29,7 @@
 #include <mach/hardware.h>
 #include <mach/orion5x.h>
 #include <plat/orion_nand.h>
+#include <plat/ehci-orion.h>
 #include <plat/time.h>
 #include <plat/common.h>
 #include "common.h"
@@ -72,7 +73,8 @@ void __init orion5x_map_io(void)
 void __init orion5x_ehci0_init(void)
 {
 	orion_ehci_init(&orion5x_mbus_dram_info,
-			ORION5X_USB0_PHYS_BASE, IRQ_ORION5X_USB0_CTRL);
+			ORION5X_USB0_PHYS_BASE, IRQ_ORION5X_USB0_CTRL,
+			EHCI_PHY_ORION);
 }
 
 
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 9e5451b3c8e3..11dce87c2487 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -806,10 +806,7 @@ void __init orion_xor1_init(unsigned long mapbase_low,
 /*****************************************************************************
  * EHCI
  ****************************************************************************/
-static struct orion_ehci_data orion_ehci_data = {
-	.phy_version	= EHCI_PHY_NA,
-};
-
+static struct orion_ehci_data orion_ehci_data;
 static u64 ehci_dmamask = DMA_BIT_MASK(32);
 
 
@@ -830,9 +827,11 @@ static struct platform_device orion_ehci = {
 
 void __init orion_ehci_init(struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
-			    unsigned long irq)
+			    unsigned long irq,
+			    enum orion_ehci_phy_ver phy_version)
 {
 	orion_ehci_data.dram = mbus_dram_info;
+	orion_ehci_data.phy_version = phy_version;
 	fill_resources(&orion_ehci, orion_ehci_resources, mapbase, SZ_4K - 1,
 		       irq);
 
diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h
index a63c357e2ab1..a2c0e31ce0dc 100644
--- a/arch/arm/plat-orion/include/plat/common.h
+++ b/arch/arm/plat-orion/include/plat/common.h
@@ -95,7 +95,8 @@ void __init orion_xor1_init(unsigned long mapbase_low,
 
 void __init orion_ehci_init(struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
-			    unsigned long irq);
+			    unsigned long irq,
+			    enum orion_ehci_phy_ver phy_version);
 
 void __init orion_ehci_1_init(struct mbus_dram_target_info *mbus_dram_info,
 			      unsigned long mapbase,
-- 
cgit v1.2.3


From 2d339926c077870a29181d37839920ce3ecd7409 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 8 Feb 2012 15:52:07 +0100
Subject: ARM: orion: Fix Orion5x GPIO regression from MPP cleanup

commit b06540371063f0f07aafc1d1ac5e974da85c973c upstream.

Patchset "ARM: orion: Refactor the MPP code common in the orion
platform" broke at least Orion5x based platforms. These platforms have
pins configured as GPIO when the selector is not 0x0. However the
common code assumes the selector is always 0x0 for a GPIO lines. It
then ignores the GPIO bits in the MPP definitions, resulting in that
Orion5x machines cannot correctly configure there GPIO lines.

The Fix removes the assumption that the selector is always 0x0.
In order that none GPIO configurations are correctly blocked,
Kirkwood and mv78xx0 MPP definitions are corrected to only set the
GPIO bits for GPIO configurations.

This third version, which does not contain any whitespace changes,
and is rebased on v3.3-rc2.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-kirkwood/mpp.h | 320 +++++++++++++++++++++----------------------
 arch/arm/mach-mv78xx0/mpp.h  | 226 +++++++++++++++---------------
 arch/arm/plat-orion/mpp.c    |   3 +-
 3 files changed, 274 insertions(+), 275 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-kirkwood/mpp.h b/arch/arm/mach-kirkwood/mpp.h
index ac787957e2d9..7afccf472205 100644
--- a/arch/arm/mach-kirkwood/mpp.h
+++ b/arch/arm/mach-kirkwood/mpp.h
@@ -31,313 +31,313 @@
 #define MPP_F6282_MASK		MPP(  0, 0x0, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP0_GPIO		MPP(  0, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP0_NF_IO2		MPP(  0, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP0_SPI_SCn		MPP(  0, 0x2, 0, 1, 1,   1,   1,   1,   1 )
+#define MPP0_NF_IO2		MPP(  0, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP0_SPI_SCn		MPP(  0, 0x2, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP1_GPO		MPP(  1, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP1_NF_IO3		MPP(  1, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP1_SPI_MOSI		MPP(  1, 0x2, 0, 1, 1,   1,   1,   1,   1 )
+#define MPP1_NF_IO3		MPP(  1, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP1_SPI_MOSI		MPP(  1, 0x2, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP2_GPO		MPP(  2, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP2_NF_IO4		MPP(  2, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP2_SPI_SCK		MPP(  2, 0x2, 0, 1, 1,   1,   1,   1,   1 )
+#define MPP2_NF_IO4		MPP(  2, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP2_SPI_SCK		MPP(  2, 0x2, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP3_GPO		MPP(  3, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP3_NF_IO5		MPP(  3, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP3_SPI_MISO		MPP(  3, 0x2, 1, 0, 1,   1,   1,   1,   1 )
+#define MPP3_NF_IO5		MPP(  3, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP3_SPI_MISO		MPP(  3, 0x2, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP4_GPIO		MPP(  4, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP4_NF_IO6		MPP(  4, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP4_UART0_RXD		MPP(  4, 0x2, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP4_SATA1_ACTn		MPP(  4, 0x5, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP4_NF_IO6		MPP(  4, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP4_UART0_RXD		MPP(  4, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP4_SATA1_ACTn		MPP(  4, 0x5, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP4_LCD_VGA_HSYNC	MPP(  4, 0xb, 0, 0, 0,   0,   0,   0,   1 )
-#define MPP4_PTP_CLK		MPP(  4, 0xd, 1, 0, 1,   1,   1,   1,   0 )
+#define MPP4_PTP_CLK		MPP(  4, 0xd, 0, 0, 1,   1,   1,   1,   0 )
 
 #define MPP5_GPO		MPP(  5, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP5_NF_IO7		MPP(  5, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP5_UART0_TXD		MPP(  5, 0x2, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP5_PTP_TRIG_GEN	MPP(  5, 0x4, 0, 1, 1,   1,   1,   1,   0 )
-#define MPP5_SATA0_ACTn		MPP(  5, 0x5, 0, 1, 0,   1,   1,   1,   1 )
+#define MPP5_NF_IO7		MPP(  5, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP5_UART0_TXD		MPP(  5, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP5_PTP_TRIG_GEN	MPP(  5, 0x4, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP5_SATA0_ACTn		MPP(  5, 0x5, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP5_LCD_VGA_VSYNC	MPP(  5, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
-#define MPP6_SYSRST_OUTn	MPP(  6, 0x1, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP6_SPI_MOSI		MPP(  6, 0x2, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP6_PTP_TRIG_GEN	MPP(  6, 0x3, 0, 1, 1,   1,   1,   1,   0 )
+#define MPP6_SYSRST_OUTn	MPP(  6, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP6_SPI_MOSI		MPP(  6, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP6_PTP_TRIG_GEN	MPP(  6, 0x3, 0, 0, 1,   1,   1,   1,   0 )
 
 #define MPP7_GPO		MPP(  7, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP7_PEX_RST_OUTn	MPP(  7, 0x1, 0, 1, 1,   1,   1,   1,   0 )
-#define MPP7_SPI_SCn		MPP(  7, 0x2, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP7_PTP_TRIG_GEN	MPP(  7, 0x3, 0, 1, 1,   1,   1,   1,   0 )
-#define MPP7_LCD_PWM		MPP(  7, 0xb, 0, 1, 0,   0,   0,   0,   1 )
+#define MPP7_PEX_RST_OUTn	MPP(  7, 0x1, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP7_SPI_SCn		MPP(  7, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP7_PTP_TRIG_GEN	MPP(  7, 0x3, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP7_LCD_PWM		MPP(  7, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP8_GPIO		MPP(  8, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP8_TW0_SDA		MPP(  8, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP8_UART0_RTS		MPP(  8, 0x2, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP8_UART1_RTS		MPP(  8, 0x3, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP8_MII0_RXERR		MPP(  8, 0x4, 1, 0, 0,   1,   1,   1,   1 )
-#define MPP8_SATA1_PRESENTn	MPP(  8, 0x5, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP8_PTP_CLK		MPP(  8, 0xc, 1, 0, 1,   1,   1,   1,   0 )
-#define MPP8_MII0_COL		MPP(  8, 0xd, 1, 0, 1,   1,   1,   1,   1 )
+#define MPP8_TW0_SDA		MPP(  8, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP8_UART0_RTS		MPP(  8, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP8_UART1_RTS		MPP(  8, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP8_MII0_RXERR		MPP(  8, 0x4, 0, 0, 0,   1,   1,   1,   1 )
+#define MPP8_SATA1_PRESENTn	MPP(  8, 0x5, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP8_PTP_CLK		MPP(  8, 0xc, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP8_MII0_COL		MPP(  8, 0xd, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP9_GPIO		MPP(  9, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP9_TW0_SCK		MPP(  9, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP9_UART0_CTS		MPP(  9, 0x2, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP9_UART1_CTS		MPP(  9, 0x3, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP9_SATA0_PRESENTn	MPP(  9, 0x5, 0, 1, 0,   1,   1,   1,   1 )
-#define MPP9_PTP_EVENT_REQ	MPP(  9, 0xc, 1, 0, 1,   1,   1,   1,   0 )
-#define MPP9_MII0_CRS		MPP(  9, 0xd, 1, 0, 1,   1,   1,   1,   1 )
+#define MPP9_TW0_SCK		MPP(  9, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP9_UART0_CTS		MPP(  9, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP9_UART1_CTS		MPP(  9, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP9_SATA0_PRESENTn	MPP(  9, 0x5, 0, 0, 0,   1,   1,   1,   1 )
+#define MPP9_PTP_EVENT_REQ	MPP(  9, 0xc, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP9_MII0_CRS		MPP(  9, 0xd, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP10_GPO		MPP( 10, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP10_SPI_SCK		MPP( 10, 0x2, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP10_UART0_TXD		MPP( 10, 0X3, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP10_SATA1_ACTn	MPP( 10, 0x5, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP10_PTP_TRIG_GEN	MPP( 10, 0xc, 0, 1, 1,   1,   1,   1,   0 )
+#define MPP10_SPI_SCK		MPP( 10, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP10_UART0_TXD		MPP( 10, 0X3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP10_SATA1_ACTn	MPP( 10, 0x5, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP10_PTP_TRIG_GEN	MPP( 10, 0xc, 0, 0, 1,   1,   1,   1,   0 )
 
 #define MPP11_GPIO		MPP( 11, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP11_SPI_MISO		MPP( 11, 0x2, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP11_UART0_RXD		MPP( 11, 0x3, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP11_PTP_EVENT_REQ	MPP( 11, 0x4, 1, 0, 1,   1,   1,   1,   0 )
-#define MPP11_PTP_TRIG_GEN	MPP( 11, 0xc, 0, 1, 1,   1,   1,   1,   0 )
-#define MPP11_PTP_CLK		MPP( 11, 0xd, 1, 0, 1,   1,   1,   1,   0 )
-#define MPP11_SATA0_ACTn	MPP( 11, 0x5, 0, 1, 0,   1,   1,   1,   1 )
+#define MPP11_SPI_MISO		MPP( 11, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP11_UART0_RXD		MPP( 11, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP11_PTP_EVENT_REQ	MPP( 11, 0x4, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP11_PTP_TRIG_GEN	MPP( 11, 0xc, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP11_PTP_CLK		MPP( 11, 0xd, 0, 0, 1,   1,   1,   1,   0 )
+#define MPP11_SATA0_ACTn	MPP( 11, 0x5, 0, 0, 0,   1,   1,   1,   1 )
 
 #define MPP12_GPO		MPP( 12, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP12_SD_CLK		MPP( 12, 0x1, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP12_AU_SPDIF0		MPP( 12, 0xa, 0, 1, 0,   0,   0,   0,   1 )
-#define MPP12_SPI_MOSI		MPP( 12, 0xb, 0, 1, 0,   0,   0,   0,   1 )
-#define MPP12_TW1_SDA		MPP( 12, 0xd, 1, 0, 0,   0,   0,   0,   1 )
+#define MPP12_SD_CLK		MPP( 12, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP12_AU_SPDIF0		MPP( 12, 0xa, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP12_SPI_MOSI		MPP( 12, 0xb, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP12_TW1_SDA		MPP( 12, 0xd, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP13_GPIO		MPP( 13, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP13_SD_CMD		MPP( 13, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP13_UART1_TXD		MPP( 13, 0x3, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP13_AU_SPDIFRMCLK	MPP( 13, 0xa, 0, 1, 0,   0,   0,   0,   1 )
-#define MPP13_LCDPWM		MPP( 13, 0xb, 0, 1, 0,   0,   0,   0,   1 )
+#define MPP13_SD_CMD		MPP( 13, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP13_UART1_TXD		MPP( 13, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP13_AU_SPDIFRMCLK	MPP( 13, 0xa, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP13_LCDPWM		MPP( 13, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP14_GPIO		MPP( 14, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP14_SD_D0		MPP( 14, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP14_UART1_RXD		MPP( 14, 0x3, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP14_SATA1_PRESENTn	MPP( 14, 0x4, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP14_AU_SPDIFI		MPP( 14, 0xa, 1, 0, 0,   0,   0,   0,   1 )
-#define MPP14_AU_I2SDI		MPP( 14, 0xb, 1, 0, 0,   0,   0,   0,   1 )
-#define MPP14_MII0_COL		MPP( 14, 0xd, 1, 0, 1,   1,   1,   1,   1 )
+#define MPP14_SD_D0		MPP( 14, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP14_UART1_RXD		MPP( 14, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP14_SATA1_PRESENTn	MPP( 14, 0x4, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP14_AU_SPDIFI		MPP( 14, 0xa, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP14_AU_I2SDI		MPP( 14, 0xb, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP14_MII0_COL		MPP( 14, 0xd, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP15_GPIO		MPP( 15, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP15_SD_D1		MPP( 15, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP15_UART0_RTS		MPP( 15, 0x2, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP15_UART1_TXD		MPP( 15, 0x3, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP15_SATA0_ACTn	MPP( 15, 0x4, 0, 1, 0,   1,   1,   1,   1 )
-#define MPP15_SPI_CSn		MPP( 15, 0xb, 0, 1, 0,   0,   0,   0,   1 )
+#define MPP15_SD_D1		MPP( 15, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP15_UART0_RTS		MPP( 15, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP15_UART1_TXD		MPP( 15, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP15_SATA0_ACTn	MPP( 15, 0x4, 0, 0, 0,   1,   1,   1,   1 )
+#define MPP15_SPI_CSn		MPP( 15, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP16_GPIO		MPP( 16, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP16_SD_D2		MPP( 16, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP16_UART0_CTS		MPP( 16, 0x2, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP16_UART1_RXD		MPP( 16, 0x3, 1, 0, 1,   1,   1,   1,   1 )
-#define MPP16_SATA1_ACTn	MPP( 16, 0x4, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP16_LCD_EXT_REF_CLK	MPP( 16, 0xb, 1, 0, 0,   0,   0,   0,   1 )
-#define MPP16_MII0_CRS		MPP( 16, 0xd, 1, 0, 1,   1,   1,   1,   1 )
+#define MPP16_SD_D2		MPP( 16, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP16_UART0_CTS		MPP( 16, 0x2, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP16_UART1_RXD		MPP( 16, 0x3, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP16_SATA1_ACTn	MPP( 16, 0x4, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP16_LCD_EXT_REF_CLK	MPP( 16, 0xb, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP16_MII0_CRS		MPP( 16, 0xd, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP17_GPIO		MPP( 17, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP17_SD_D3		MPP( 17, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP17_SATA0_PRESENTn	MPP( 17, 0x4, 0, 1, 0,   1,   1,   1,   1 )
-#define MPP17_SATA1_ACTn	MPP( 17, 0xa, 0, 1, 0,   0,   0,   0,   1 )
-#define MPP17_TW1_SCK		MPP( 17, 0xd, 1, 1, 0,   0,   0,   0,   1 )
+#define MPP17_SD_D3		MPP( 17, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP17_SATA0_PRESENTn	MPP( 17, 0x4, 0, 0, 0,   1,   1,   1,   1 )
+#define MPP17_SATA1_ACTn	MPP( 17, 0xa, 0, 0, 0,   0,   0,   0,   1 )
+#define MPP17_TW1_SCK		MPP( 17, 0xd, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP18_GPO		MPP( 18, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP18_NF_IO0		MPP( 18, 0x1, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP18_PEX0_CLKREQ	MPP( 18, 0x2, 0, 1, 0,   0,   0,   0,   1 )
+#define MPP18_NF_IO0		MPP( 18, 0x1, 0, 0, 1,   1,   1,   1,   1 )
+#define MPP18_PEX0_CLKREQ	MPP( 18, 0x2, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP19_GPO		MPP( 19, 0x0, 0, 1, 1,   1,   1,   1,   1 )
-#define MPP19_NF_IO1		MPP( 19, 0x1, 1, 1, 1,   1,   1,   1,   1 )
+#define MPP19_NF_IO1		MPP( 19, 0x1, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP20_GPIO		MPP( 20, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP20_TSMP0		MPP( 20, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP20_TDM_CH0_TX_QL	MPP( 20, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP20_TSMP0		MPP( 20, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP20_TDM_CH0_TX_QL	MPP( 20, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP20_GE1_TXD0		MPP( 20, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP20_AU_SPDIFI		MPP( 20, 0x4, 1, 0, 0,   0,   1,   1,   1 )
-#define MPP20_SATA1_ACTn	MPP( 20, 0x5, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP20_AU_SPDIFI		MPP( 20, 0x4, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP20_SATA1_ACTn	MPP( 20, 0x5, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP20_LCD_D0		MPP( 20, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP21_GPIO		MPP( 21, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP21_TSMP1		MPP( 21, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP21_TDM_CH0_RX_QL	MPP( 21, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP21_TSMP1		MPP( 21, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP21_TDM_CH0_RX_QL	MPP( 21, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP21_GE1_TXD1		MPP( 21, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP21_AU_SPDIFO		MPP( 21, 0x4, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP21_SATA0_ACTn	MPP( 21, 0x5, 0, 1, 0,   1,   1,   1,   1 )
+#define MPP21_AU_SPDIFO		MPP( 21, 0x4, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP21_SATA0_ACTn	MPP( 21, 0x5, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP21_LCD_D1		MPP( 21, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP22_GPIO		MPP( 22, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP22_TSMP2		MPP( 22, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP22_TDM_CH2_TX_QL	MPP( 22, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP22_TSMP2		MPP( 22, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP22_TDM_CH2_TX_QL	MPP( 22, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP22_GE1_TXD2		MPP( 22, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP22_AU_SPDIFRMKCLK	MPP( 22, 0x4, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP22_SATA1_PRESENTn	MPP( 22, 0x5, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP22_AU_SPDIFRMKCLK	MPP( 22, 0x4, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP22_SATA1_PRESENTn	MPP( 22, 0x5, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP22_LCD_D2		MPP( 22, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP23_GPIO		MPP( 23, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP23_TSMP3		MPP( 23, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP23_TDM_CH2_RX_QL	MPP( 23, 0x2, 1, 0, 0,   0,   1,   1,   1 )
+#define MPP23_TSMP3		MPP( 23, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP23_TDM_CH2_RX_QL	MPP( 23, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP23_GE1_TXD3		MPP( 23, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP23_AU_I2SBCLK	MPP( 23, 0x4, 0, 1, 0,   0,   1,   1,   1 )
-#define MPP23_SATA0_PRESENTn	MPP( 23, 0x5, 0, 1, 0,   1,   1,   1,   1 )
+#define MPP23_AU_I2SBCLK	MPP( 23, 0x4, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP23_SATA0_PRESENTn	MPP( 23, 0x5, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP23_LCD_D3		MPP( 23, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP24_GPIO		MPP( 24, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP24_TSMP4		MPP( 24, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP24_TDM_SPI_CS0	MPP( 24, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP24_TSMP4		MPP( 24, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP24_TDM_SPI_CS0	MPP( 24, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP24_GE1_RXD0		MPP( 24, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP24_AU_I2SDO		MPP( 24, 0x4, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP24_AU_I2SDO		MPP( 24, 0x4, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP24_LCD_D4		MPP( 24, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP25_GPIO		MPP( 25, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP25_TSMP5		MPP( 25, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP25_TDM_SPI_SCK	MPP( 25, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP25_TSMP5		MPP( 25, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP25_TDM_SPI_SCK	MPP( 25, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP25_GE1_RXD1		MPP( 25, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP25_AU_I2SLRCLK	MPP( 25, 0x4, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP25_AU_I2SLRCLK	MPP( 25, 0x4, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP25_LCD_D5		MPP( 25, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP26_GPIO		MPP( 26, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP26_TSMP6		MPP( 26, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP26_TDM_SPI_MISO	MPP( 26, 0x2, 1, 0, 0,   0,   1,   1,   1 )
+#define MPP26_TSMP6		MPP( 26, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP26_TDM_SPI_MISO	MPP( 26, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP26_GE1_RXD2		MPP( 26, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP26_AU_I2SMCLK	MPP( 26, 0x4, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP26_AU_I2SMCLK	MPP( 26, 0x4, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP26_LCD_D6		MPP( 26, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP27_GPIO		MPP( 27, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP27_TSMP7		MPP( 27, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP27_TDM_SPI_MOSI	MPP( 27, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP27_TSMP7		MPP( 27, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP27_TDM_SPI_MOSI	MPP( 27, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP27_GE1_RXD3		MPP( 27, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP27_AU_I2SDI		MPP( 27, 0x4, 1, 0, 0,   0,   1,   1,   1 )
+#define MPP27_AU_I2SDI		MPP( 27, 0x4, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP27_LCD_D7		MPP( 27, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP28_GPIO		MPP( 28, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP28_TSMP8		MPP( 28, 0x1, 1, 1, 0,   0,   1,   1,   1 )
+#define MPP28_TSMP8		MPP( 28, 0x1, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP28_TDM_CODEC_INTn	MPP( 28, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP28_GE1_COL		MPP( 28, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP28_AU_EXTCLK		MPP( 28, 0x4, 1, 0, 0,   0,   1,   1,   1 )
+#define MPP28_AU_EXTCLK		MPP( 28, 0x4, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP28_LCD_D8		MPP( 28, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP29_GPIO		MPP( 29, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP29_TSMP9		MPP( 29, 0x1, 1, 1, 0,   0,   1,   1,   1 )
+#define MPP29_TSMP9		MPP( 29, 0x1, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP29_TDM_CODEC_RSTn	MPP( 29, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP29_GE1_TCLK		MPP( 29, 0x3, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP29_LCD_D9		MPP( 29, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP30_GPIO		MPP( 30, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP30_TSMP10		MPP( 30, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP30_TDM_PCLK		MPP( 30, 0x2, 1, 1, 0,   0,   1,   1,   1 )
+#define MPP30_TSMP10		MPP( 30, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP30_TDM_PCLK		MPP( 30, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP30_GE1_RXCTL		MPP( 30, 0x3, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP30_LCD_D10		MPP( 30, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP31_GPIO		MPP( 31, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP31_TSMP11		MPP( 31, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP31_TDM_FS		MPP( 31, 0x2, 1, 1, 0,   0,   1,   1,   1 )
+#define MPP31_TSMP11		MPP( 31, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP31_TDM_FS		MPP( 31, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP31_GE1_RXCLK		MPP( 31, 0x3, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP31_LCD_D11		MPP( 31, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP32_GPIO		MPP( 32, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP32_TSMP12		MPP( 32, 0x1, 1, 1, 0,   0,   1,   1,   1 )
-#define MPP32_TDM_DRX		MPP( 32, 0x2, 1, 0, 0,   0,   1,   1,   1 )
+#define MPP32_TSMP12		MPP( 32, 0x1, 0, 0, 0,   0,   1,   1,   1 )
+#define MPP32_TDM_DRX		MPP( 32, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP32_GE1_TCLKOUT	MPP( 32, 0x3, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP32_LCD_D12		MPP( 32, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP33_GPO		MPP( 33, 0x0, 0, 1, 0,   1,   1,   1,   1 )
-#define MPP33_TDM_DTX		MPP( 33, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP33_TDM_DTX		MPP( 33, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP33_GE1_TXCTL		MPP( 33, 0x3, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP33_LCD_D13		MPP( 33, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP34_GPIO		MPP( 34, 0x0, 1, 1, 0,   1,   1,   1,   1 )
-#define MPP34_TDM_SPI_CS1	MPP( 34, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP34_TDM_SPI_CS1	MPP( 34, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP34_GE1_TXEN		MPP( 34, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP34_SATA1_ACTn	MPP( 34, 0x5, 0, 1, 0,   0,   0,   1,   1 )
+#define MPP34_SATA1_ACTn	MPP( 34, 0x5, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP34_LCD_D14		MPP( 34, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP35_GPIO		MPP( 35, 0x0, 1, 1, 1,   1,   1,   1,   1 )
-#define MPP35_TDM_CH0_TX_QL	MPP( 35, 0x2, 0, 1, 0,   0,   1,   1,   1 )
+#define MPP35_TDM_CH0_TX_QL	MPP( 35, 0x2, 0, 0, 0,   0,   1,   1,   1 )
 #define MPP35_GE1_RXERR		MPP( 35, 0x3, 0, 0, 0,   1,   1,   1,   1 )
-#define MPP35_SATA0_ACTn	MPP( 35, 0x5, 0, 1, 0,   1,   1,   1,   1 )
+#define MPP35_SATA0_ACTn	MPP( 35, 0x5, 0, 0, 0,   1,   1,   1,   1 )
 #define MPP35_LCD_D15		MPP( 22, 0xb, 0, 0, 0,   0,   0,   0,   1 )
-#define MPP35_MII0_RXERR	MPP( 35, 0xc, 1, 0, 1,   1,   1,   1,   1 )
+#define MPP35_MII0_RXERR	MPP( 35, 0xc, 0, 0, 1,   1,   1,   1,   1 )
 
 #define MPP36_GPIO		MPP( 36, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP36_TSMP0		MPP( 36, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP36_TDM_SPI_CS1	MPP( 36, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP36_AU_SPDIFI		MPP( 36, 0x4, 1, 0, 1,   0,   0,   1,   1 )
-#define MPP36_TW1_SDA		MPP( 36, 0xb, 1, 1, 0,   0,   0,   0,   1 )
+#define MPP36_TSMP0		MPP( 36, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP36_TDM_SPI_CS1	MPP( 36, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP36_AU_SPDIFI		MPP( 36, 0x4, 0, 0, 1,   0,   0,   1,   1 )
+#define MPP36_TW1_SDA		MPP( 36, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP37_GPIO		MPP( 37, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP37_TSMP1		MPP( 37, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP37_TDM_CH2_TX_QL	MPP( 37, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP37_AU_SPDIFO		MPP( 37, 0x4, 0, 1, 1,   0,   0,   1,   1 )
-#define MPP37_TW1_SCK		MPP( 37, 0xb, 1, 1, 0,   0,   0,   0,   1 )
+#define MPP37_TSMP1		MPP( 37, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP37_TDM_CH2_TX_QL	MPP( 37, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP37_AU_SPDIFO		MPP( 37, 0x4, 0, 0, 1,   0,   0,   1,   1 )
+#define MPP37_TW1_SCK		MPP( 37, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP38_GPIO		MPP( 38, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP38_TSMP2		MPP( 38, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP38_TDM_CH2_RX_QL	MPP( 38, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP38_AU_SPDIFRMLCLK	MPP( 38, 0x4, 0, 1, 1,   0,   0,   1,   1 )
+#define MPP38_TSMP2		MPP( 38, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP38_TDM_CH2_RX_QL	MPP( 38, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP38_AU_SPDIFRMLCLK	MPP( 38, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP38_LCD_D18		MPP( 38, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP39_GPIO		MPP( 39, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP39_TSMP3		MPP( 39, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP39_TDM_SPI_CS0	MPP( 39, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP39_AU_I2SBCLK	MPP( 39, 0x4, 0, 1, 1,   0,   0,   1,   1 )
+#define MPP39_TSMP3		MPP( 39, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP39_TDM_SPI_CS0	MPP( 39, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP39_AU_I2SBCLK	MPP( 39, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP39_LCD_D19		MPP( 39, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP40_GPIO		MPP( 40, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP40_TSMP4		MPP( 40, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP40_TDM_SPI_SCK	MPP( 40, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP40_AU_I2SDO		MPP( 40, 0x4, 0, 1, 1,   0,   0,   1,   1 )
+#define MPP40_TSMP4		MPP( 40, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP40_TDM_SPI_SCK	MPP( 40, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP40_AU_I2SDO		MPP( 40, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP40_LCD_D20		MPP( 40, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP41_GPIO		MPP( 41, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP41_TSMP5		MPP( 41, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP41_TDM_SPI_MISO	MPP( 41, 0x2, 1, 0, 0,   0,   0,   1,   1 )
-#define MPP41_AU_I2SLRCLK	MPP( 41, 0x4, 0, 1, 1,   0,   0,   1,   1 )
+#define MPP41_TSMP5		MPP( 41, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP41_TDM_SPI_MISO	MPP( 41, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP41_AU_I2SLRCLK	MPP( 41, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP41_LCD_D21		MPP( 41, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP42_GPIO		MPP( 42, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP42_TSMP6		MPP( 42, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP42_TDM_SPI_MOSI	MPP( 42, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP42_AU_I2SMCLK	MPP( 42, 0x4, 0, 1, 1,   0,   0,   1,   1 )
+#define MPP42_TSMP6		MPP( 42, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP42_TDM_SPI_MOSI	MPP( 42, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP42_AU_I2SMCLK	MPP( 42, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP42_LCD_D22		MPP( 42, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP43_GPIO		MPP( 43, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP43_TSMP7		MPP( 43, 0x1, 1, 1, 0,   0,   0,   1,   1 )
+#define MPP43_TSMP7		MPP( 43, 0x1, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP43_TDM_CODEC_INTn	MPP( 43, 0x2, 0, 0, 0,   0,   0,   1,   1 )
-#define MPP43_AU_I2SDI		MPP( 43, 0x4, 1, 0, 1,   0,   0,   1,   1 )
+#define MPP43_AU_I2SDI		MPP( 43, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP43_LCD_D23		MPP( 22, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP44_GPIO		MPP( 44, 0x0, 1, 1, 1,   0,   0,   1,   1 )
-#define MPP44_TSMP8		MPP( 44, 0x1, 1, 1, 0,   0,   0,   1,   1 )
+#define MPP44_TSMP8		MPP( 44, 0x1, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP44_TDM_CODEC_RSTn	MPP( 44, 0x2, 0, 0, 0,   0,   0,   1,   1 )
-#define MPP44_AU_EXTCLK		MPP( 44, 0x4, 1, 0, 1,   0,   0,   1,   1 )
+#define MPP44_AU_EXTCLK		MPP( 44, 0x4, 0, 0, 1,   0,   0,   1,   1 )
 #define MPP44_LCD_CLK		MPP( 44, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP45_GPIO		MPP( 45, 0x0, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP45_TSMP9		MPP( 45, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP45_TDM_PCLK		MPP( 45, 0x2, 1, 1, 0,   0,   0,   1,   1 )
+#define MPP45_TSMP9		MPP( 45, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP45_TDM_PCLK		MPP( 45, 0x2, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP245_LCD_E		MPP( 45, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP46_GPIO		MPP( 46, 0x0, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP46_TSMP10		MPP( 46, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP46_TDM_FS		MPP( 46, 0x2, 1, 1, 0,   0,   0,   1,   1 )
+#define MPP46_TSMP10		MPP( 46, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP46_TDM_FS		MPP( 46, 0x2, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP46_LCD_HSYNC		MPP( 46, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP47_GPIO		MPP( 47, 0x0, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP47_TSMP11		MPP( 47, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP47_TDM_DRX		MPP( 47, 0x2, 1, 0, 0,   0,   0,   1,   1 )
+#define MPP47_TSMP11		MPP( 47, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP47_TDM_DRX		MPP( 47, 0x2, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP47_LCD_VSYNC		MPP( 47, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP48_GPIO		MPP( 48, 0x0, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP48_TSMP12		MPP( 48, 0x1, 1, 1, 0,   0,   0,   1,   1 )
-#define MPP48_TDM_DTX		MPP( 48, 0x2, 0, 1, 0,   0,   0,   1,   1 )
+#define MPP48_TSMP12		MPP( 48, 0x1, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP48_TDM_DTX		MPP( 48, 0x2, 0, 0, 0,   0,   0,   1,   1 )
 #define MPP48_LCD_D16		MPP( 22, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP49_GPIO		MPP( 49, 0x0, 1, 1, 0,   0,   0,   1,   0 )
 #define MPP49_GPO		MPP( 49, 0x0, 0, 1, 0,   0,   0,   0,   1 )
-#define MPP49_TSMP9		MPP( 49, 0x1, 1, 1, 0,   0,   0,   1,   0 )
-#define MPP49_TDM_CH0_RX_QL	MPP( 49, 0x2, 0, 1, 0,   0,   0,   1,   1 )
-#define MPP49_PTP_CLK		MPP( 49, 0x5, 1, 0, 0,   0,   0,   1,   0 )
-#define MPP49_PEX0_CLKREQ	MPP( 49, 0xa, 0, 1, 0,   0,   0,   0,   1 )
+#define MPP49_TSMP9		MPP( 49, 0x1, 0, 0, 0,   0,   0,   1,   0 )
+#define MPP49_TDM_CH0_RX_QL	MPP( 49, 0x2, 0, 0, 0,   0,   0,   1,   1 )
+#define MPP49_PTP_CLK		MPP( 49, 0x5, 0, 0, 0,   0,   0,   1,   0 )
+#define MPP49_PEX0_CLKREQ	MPP( 49, 0xa, 0, 0, 0,   0,   0,   0,   1 )
 #define MPP49_LCD_D17		MPP( 49, 0xb, 0, 0, 0,   0,   0,   0,   1 )
 
 #define MPP_MAX			49
diff --git a/arch/arm/mach-mv78xx0/mpp.h b/arch/arm/mach-mv78xx0/mpp.h
index b61b50927123..3752302ae2ee 100644
--- a/arch/arm/mach-mv78xx0/mpp.h
+++ b/arch/arm/mach-mv78xx0/mpp.h
@@ -24,296 +24,296 @@
 #define MPP_78100_A0_MASK    MPP(0, 0x0, 0, 0, 1)
 
 #define MPP0_GPIO        MPP(0, 0x0, 1, 1, 1)
-#define MPP0_GE0_COL        MPP(0, 0x1, 1, 0, 1)
-#define MPP0_GE1_TXCLK        MPP(0, 0x2, 0, 1, 1)
+#define MPP0_GE0_COL        MPP(0, 0x1, 0, 0, 1)
+#define MPP0_GE1_TXCLK        MPP(0, 0x2, 0, 0, 1)
 #define MPP0_UNUSED        MPP(0, 0x3, 0, 0, 1)
 
 #define MPP1_GPIO        MPP(1, 0x0, 1, 1, 1)
-#define MPP1_GE0_RXERR        MPP(1, 0x1, 1, 0, 1)
-#define MPP1_GE1_TXCTL        MPP(1, 0x2, 0, 1, 1)
+#define MPP1_GE0_RXERR        MPP(1, 0x1, 0, 0, 1)
+#define MPP1_GE1_TXCTL        MPP(1, 0x2, 0, 0, 1)
 #define MPP1_UNUSED        MPP(1, 0x3, 0, 0, 1)
 
 #define MPP2_GPIO        MPP(2, 0x0, 1, 1, 1)
-#define MPP2_GE0_CRS        MPP(2, 0x1, 1, 0, 1)
-#define MPP2_GE1_RXCTL        MPP(2, 0x2, 1, 0, 1)
+#define MPP2_GE0_CRS        MPP(2, 0x1, 0, 0, 1)
+#define MPP2_GE1_RXCTL        MPP(2, 0x2, 0, 0, 1)
 #define MPP2_UNUSED        MPP(2, 0x3, 0, 0, 1)
 
 #define MPP3_GPIO        MPP(3, 0x0, 1, 1, 1)
-#define MPP3_GE0_TXERR        MPP(3, 0x1, 0, 1, 1)
-#define MPP3_GE1_RXCLK        MPP(3, 0x2, 1, 0, 1)
+#define MPP3_GE0_TXERR        MPP(3, 0x1, 0, 0, 1)
+#define MPP3_GE1_RXCLK        MPP(3, 0x2, 0, 0, 1)
 #define MPP3_UNUSED        MPP(3, 0x3, 0, 0, 1)
 
 #define MPP4_GPIO        MPP(4, 0x0, 1, 1, 1)
-#define MPP4_GE0_TXD4        MPP(4, 0x1, 0, 1, 1)
-#define MPP4_GE1_TXD0        MPP(4, 0x2, 0, 1, 1)
+#define MPP4_GE0_TXD4        MPP(4, 0x1, 0, 0, 1)
+#define MPP4_GE1_TXD0        MPP(4, 0x2, 0, 0, 1)
 #define MPP4_UNUSED        MPP(4, 0x3, 0, 0, 1)
 
 #define MPP5_GPIO        MPP(5, 0x0, 1, 1, 1)
-#define MPP5_GE0_TXD5        MPP(5, 0x1, 0, 1, 1)
-#define MPP5_GE1_TXD1        MPP(5, 0x2, 0, 1, 1)
+#define MPP5_GE0_TXD5        MPP(5, 0x1, 0, 0, 1)
+#define MPP5_GE1_TXD1        MPP(5, 0x2, 0, 0, 1)
 #define MPP5_UNUSED        MPP(5, 0x3, 0, 0, 1)
 
 #define MPP6_GPIO        MPP(6, 0x0, 1, 1, 1)
-#define MPP6_GE0_TXD6        MPP(6, 0x1, 0, 1, 1)
-#define MPP6_GE1_TXD2        MPP(6, 0x2, 0, 1, 1)
+#define MPP6_GE0_TXD6        MPP(6, 0x1, 0, 0, 1)
+#define MPP6_GE1_TXD2        MPP(6, 0x2, 0, 0, 1)
 #define MPP6_UNUSED        MPP(6, 0x3, 0, 0, 1)
 
 #define MPP7_GPIO        MPP(7, 0x0, 1, 1, 1)
-#define MPP7_GE0_TXD7        MPP(7, 0x1, 0, 1, 1)
-#define MPP7_GE1_TXD3        MPP(7, 0x2, 0, 1, 1)
+#define MPP7_GE0_TXD7        MPP(7, 0x1, 0, 0, 1)
+#define MPP7_GE1_TXD3        MPP(7, 0x2, 0, 0, 1)
 #define MPP7_UNUSED        MPP(7, 0x3, 0, 0, 1)
 
 #define MPP8_GPIO        MPP(8, 0x0, 1, 1, 1)
-#define MPP8_GE0_RXD4        MPP(8, 0x1, 1, 0, 1)
-#define MPP8_GE1_RXD0        MPP(8, 0x2, 1, 0, 1)
+#define MPP8_GE0_RXD4        MPP(8, 0x1, 0, 0, 1)
+#define MPP8_GE1_RXD0        MPP(8, 0x2, 0, 0, 1)
 #define MPP8_UNUSED        MPP(8, 0x3, 0, 0, 1)
 
 #define MPP9_GPIO        MPP(9, 0x0, 1, 1, 1)
-#define MPP9_GE0_RXD5        MPP(9, 0x1, 1, 0, 1)
-#define MPP9_GE1_RXD1        MPP(9, 0x2, 1, 0, 1)
+#define MPP9_GE0_RXD5        MPP(9, 0x1, 0, 0, 1)
+#define MPP9_GE1_RXD1        MPP(9, 0x2, 0, 0, 1)
 #define MPP9_UNUSED        MPP(9, 0x3, 0, 0, 1)
 
 #define MPP10_GPIO        MPP(10, 0x0, 1, 1, 1)
-#define MPP10_GE0_RXD6        MPP(10, 0x1, 1, 0, 1)
-#define MPP10_GE1_RXD2        MPP(10, 0x2, 1, 0, 1)
+#define MPP10_GE0_RXD6        MPP(10, 0x1, 0, 0, 1)
+#define MPP10_GE1_RXD2        MPP(10, 0x2, 0, 0, 1)
 #define MPP10_UNUSED        MPP(10, 0x3, 0, 0, 1)
 
 #define MPP11_GPIO        MPP(11, 0x0, 1, 1, 1)
-#define MPP11_GE0_RXD7        MPP(11, 0x1, 1, 0, 1)
-#define MPP11_GE1_RXD3        MPP(11, 0x2, 1, 0, 1)
+#define MPP11_GE0_RXD7        MPP(11, 0x1, 0, 0, 1)
+#define MPP11_GE1_RXD3        MPP(11, 0x2, 0, 0, 1)
 #define MPP11_UNUSED        MPP(11, 0x3, 0, 0, 1)
 
 #define MPP12_GPIO        MPP(12, 0x0, 1, 1, 1)
-#define MPP12_M_BB        MPP(12, 0x3, 1, 0, 1)
-#define MPP12_UA0_CTSn        MPP(12, 0x4, 1, 0, 1)
-#define MPP12_NAND_FLASH_REn0    MPP(12, 0x5, 0, 1, 1)
-#define MPP12_TDM0_SCSn        MPP(12, 0X6, 0, 1, 1)
+#define MPP12_M_BB        MPP(12, 0x3, 0, 0, 1)
+#define MPP12_UA0_CTSn        MPP(12, 0x4, 0, 0, 1)
+#define MPP12_NAND_FLASH_REn0    MPP(12, 0x5, 0, 0, 1)
+#define MPP12_TDM0_SCSn        MPP(12, 0X6, 0, 0, 1)
 #define MPP12_UNUSED        MPP(12, 0x1, 0, 0, 1)
 
 #define MPP13_GPIO        MPP(13, 0x0, 1, 1, 1)
-#define MPP13_SYSRST_OUTn    MPP(13, 0x3, 0, 1, 1)
-#define MPP13_UA0_RTSn        MPP(13, 0x4, 0, 1, 1)
-#define MPP13_NAN_FLASH_WEn0    MPP(13, 0x5, 0, 1, 1)
-#define MPP13_TDM_SCLK        MPP(13, 0x6, 0, 1, 1)
+#define MPP13_SYSRST_OUTn    MPP(13, 0x3, 0, 0, 1)
+#define MPP13_UA0_RTSn        MPP(13, 0x4, 0, 0, 1)
+#define MPP13_NAN_FLASH_WEn0    MPP(13, 0x5, 0, 0, 1)
+#define MPP13_TDM_SCLK        MPP(13, 0x6, 0, 0, 1)
 #define MPP13_UNUSED        MPP(13, 0x1, 0, 0, 1)
 
 #define MPP14_GPIO        MPP(14, 0x0, 1, 1, 1)
-#define MPP14_SATA1_ACTn    MPP(14, 0x3, 0, 1, 1)
-#define MPP14_UA1_CTSn        MPP(14, 0x4, 1, 0, 1)
-#define MPP14_NAND_FLASH_REn1    MPP(14, 0x5, 0, 1, 1)
-#define MPP14_TDM_SMOSI        MPP(14, 0x6, 0, 1, 1)
+#define MPP14_SATA1_ACTn    MPP(14, 0x3, 0, 0, 1)
+#define MPP14_UA1_CTSn        MPP(14, 0x4, 0, 0, 1)
+#define MPP14_NAND_FLASH_REn1    MPP(14, 0x5, 0, 0, 1)
+#define MPP14_TDM_SMOSI        MPP(14, 0x6, 0, 0, 1)
 #define MPP14_UNUSED        MPP(14, 0x1, 0, 0, 1)
 
 #define MPP15_GPIO        MPP(15, 0x0, 1, 1, 1)
-#define MPP15_SATA0_ACTn    MPP(15, 0x3, 0, 1, 1)
-#define MPP15_UA1_RTSn        MPP(15, 0x4, 0, 1, 1)
-#define MPP15_NAND_FLASH_WEn1    MPP(15, 0x5, 0, 1, 1)
-#define MPP15_TDM_SMISO        MPP(15, 0x6, 1, 0, 1)
+#define MPP15_SATA0_ACTn    MPP(15, 0x3, 0, 0, 1)
+#define MPP15_UA1_RTSn        MPP(15, 0x4, 0, 0, 1)
+#define MPP15_NAND_FLASH_WEn1    MPP(15, 0x5, 0, 0, 1)
+#define MPP15_TDM_SMISO        MPP(15, 0x6, 0, 0, 1)
 #define MPP15_UNUSED        MPP(15, 0x1, 0, 0, 1)
 
 #define MPP16_GPIO        MPP(16, 0x0, 1, 1, 1)
-#define MPP16_SATA1_PRESENTn    MPP(16, 0x3, 0, 1, 1)
-#define MPP16_UA2_TXD        MPP(16, 0x4, 0, 1, 1)
-#define MPP16_NAND_FLASH_REn3    MPP(16, 0x5, 0, 1, 1)
-#define MPP16_TDM_INTn        MPP(16, 0x6, 1, 0, 1)
+#define MPP16_SATA1_PRESENTn    MPP(16, 0x3, 0, 0, 1)
+#define MPP16_UA2_TXD        MPP(16, 0x4, 0, 0, 1)
+#define MPP16_NAND_FLASH_REn3    MPP(16, 0x5, 0, 0, 1)
+#define MPP16_TDM_INTn        MPP(16, 0x6, 0, 0, 1)
 #define MPP16_UNUSED        MPP(16, 0x1, 0, 0, 1)
 
 
 #define MPP17_GPIO        MPP(17, 0x0, 1, 1, 1)
-#define MPP17_SATA0_PRESENTn    MPP(17, 0x3, 0, 1, 1)
-#define MPP17_UA2_RXD        MPP(17, 0x4, 1, 0, 1)
-#define MPP17_NAND_FLASH_WEn3    MPP(17, 0x5, 0, 1, 1)
-#define MPP17_TDM_RSTn        MPP(17, 0x6, 0, 1, 1)
+#define MPP17_SATA0_PRESENTn    MPP(17, 0x3, 0, 0, 1)
+#define MPP17_UA2_RXD        MPP(17, 0x4, 0, 0, 1)
+#define MPP17_NAND_FLASH_WEn3    MPP(17, 0x5, 0, 0, 1)
+#define MPP17_TDM_RSTn        MPP(17, 0x6, 0, 0, 1)
 #define MPP17_UNUSED        MPP(17, 0x1, 0, 0, 1)
 
 
 #define MPP18_GPIO        MPP(18, 0x0, 1, 1, 1)
-#define MPP18_UA0_CTSn        MPP(18, 0x4, 1, 0, 1)
-#define MPP18_BOOT_FLASH_REn    MPP(18, 0x5, 0, 1, 1)
+#define MPP18_UA0_CTSn        MPP(18, 0x4, 0, 0, 1)
+#define MPP18_BOOT_FLASH_REn    MPP(18, 0x5, 0, 0, 1)
 #define MPP18_UNUSED        MPP(18, 0x1, 0, 0, 1)
 
 
 #define MPP19_GPIO        MPP(19, 0x0, 1, 1, 1)
-#define MPP19_UA0_CTSn        MPP(19, 0x4, 0, 1, 1)
-#define MPP19_BOOT_FLASH_WEn    MPP(19, 0x5, 0, 1, 1)
+#define MPP19_UA0_CTSn        MPP(19, 0x4, 0, 0, 1)
+#define MPP19_BOOT_FLASH_WEn    MPP(19, 0x5, 0, 0, 1)
 #define MPP19_UNUSED        MPP(19, 0x1, 0, 0, 1)
 
 
 #define MPP20_GPIO        MPP(20, 0x0, 1, 1, 1)
-#define MPP20_UA1_CTSs        MPP(20, 0x4, 1, 0, 1)
-#define MPP20_TDM_PCLK        MPP(20, 0x6, 1, 1, 0)
+#define MPP20_UA1_CTSs        MPP(20, 0x4, 0, 0, 1)
+#define MPP20_TDM_PCLK        MPP(20, 0x6, 0, 0, 0)
 #define MPP20_UNUSED        MPP(20, 0x1, 0, 0, 1)
 
 
 #define MPP21_GPIO        MPP(21, 0x0, 1, 1, 1)
-#define MPP21_UA1_CTSs        MPP(21, 0x4, 0, 1, 1)
-#define MPP21_TDM_FSYNC        MPP(21, 0x6, 1, 1, 0)
+#define MPP21_UA1_CTSs        MPP(21, 0x4, 0, 0, 1)
+#define MPP21_TDM_FSYNC        MPP(21, 0x6, 0, 0, 0)
 #define MPP21_UNUSED        MPP(21, 0x1, 0, 0, 1)
 
 
 #define MPP22_GPIO        MPP(22, 0x0, 1, 1, 1)
-#define MPP22_UA3_TDX        MPP(22, 0x4, 0, 1, 1)
-#define MPP22_NAND_FLASH_REn2    MPP(22, 0x5, 0, 1, 1)
-#define MPP22_TDM_DRX        MPP(22, 0x6, 1, 0, 1)
+#define MPP22_UA3_TDX        MPP(22, 0x4, 0, 0, 1)
+#define MPP22_NAND_FLASH_REn2    MPP(22, 0x5, 0, 0, 1)
+#define MPP22_TDM_DRX        MPP(22, 0x6, 0, 0, 1)
 #define MPP22_UNUSED        MPP(22, 0x1, 0, 0, 1)
 
 
 #define MPP23_GPIO        MPP(23, 0x0, 1, 1, 1)
-#define MPP23_UA3_RDX        MPP(23, 0x4, 1, 0, 1)
-#define MPP23_NAND_FLASH_WEn2    MPP(23, 0x5, 0, 1, 1)
-#define MPP23_TDM_DTX        MPP(23, 0x6, 0, 1, 1)
+#define MPP23_UA3_RDX        MPP(23, 0x4, 0, 0, 1)
+#define MPP23_NAND_FLASH_WEn2    MPP(23, 0x5, 0, 0, 1)
+#define MPP23_TDM_DTX        MPP(23, 0x6, 0, 0, 1)
 #define MPP23_UNUSED        MPP(23, 0x1, 0, 0, 1)
 
 
 #define MPP24_GPIO        MPP(24, 0x0, 1, 1, 1)
-#define MPP24_UA2_TXD        MPP(24, 0x4, 0, 1, 1)
-#define MPP24_TDM_INTn        MPP(24, 0x6, 1, 0, 1)
+#define MPP24_UA2_TXD        MPP(24, 0x4, 0, 0, 1)
+#define MPP24_TDM_INTn        MPP(24, 0x6, 0, 0, 1)
 #define MPP24_UNUSED        MPP(24, 0x1, 0, 0, 1)
 
 
 #define MPP25_GPIO        MPP(25, 0x0, 1, 1, 1)
-#define MPP25_UA2_RXD        MPP(25, 0x4, 1, 0, 1)
-#define MPP25_TDM_RSTn        MPP(25, 0x6, 0, 1, 1)
+#define MPP25_UA2_RXD        MPP(25, 0x4, 0, 0, 1)
+#define MPP25_TDM_RSTn        MPP(25, 0x6, 0, 0, 1)
 #define MPP25_UNUSED        MPP(25, 0x1, 0, 0, 1)
 
 
 #define MPP26_GPIO        MPP(26, 0x0, 1, 1, 1)
-#define MPP26_UA2_CTSn        MPP(26, 0x4, 1, 0, 1)
-#define MPP26_TDM_PCLK        MPP(26, 0x6, 1, 1, 1)
+#define MPP26_UA2_CTSn        MPP(26, 0x4, 0, 0, 1)
+#define MPP26_TDM_PCLK        MPP(26, 0x6, 0, 0, 1)
 #define MPP26_UNUSED        MPP(26, 0x1, 0, 0, 1)
 
 
 #define MPP27_GPIO        MPP(27, 0x0, 1, 1, 1)
-#define MPP27_UA2_RTSn        MPP(27, 0x4, 0, 1, 1)
-#define MPP27_TDM_FSYNC        MPP(27, 0x6, 1, 1, 1)
+#define MPP27_UA2_RTSn        MPP(27, 0x4, 0, 0, 1)
+#define MPP27_TDM_FSYNC        MPP(27, 0x6, 0, 0, 1)
 #define MPP27_UNUSED        MPP(27, 0x1, 0, 0, 1)
 
 
 #define MPP28_GPIO        MPP(28, 0x0, 1, 1, 1)
-#define MPP28_UA3_TXD        MPP(28, 0x4, 0, 1, 1)
-#define MPP28_TDM_DRX        MPP(28, 0x6, 1, 0, 1)
+#define MPP28_UA3_TXD        MPP(28, 0x4, 0, 0, 1)
+#define MPP28_TDM_DRX        MPP(28, 0x6, 0, 0, 1)
 #define MPP28_UNUSED        MPP(28, 0x1, 0, 0, 1)
 
 #define MPP29_GPIO        MPP(29, 0x0, 1, 1, 1)
-#define MPP29_UA3_RXD        MPP(29, 0x4, 1, 0, 1)
-#define MPP29_SYSRST_OUTn    MPP(29, 0x5, 0, 1, 1)
-#define MPP29_TDM_DTX        MPP(29, 0x6, 0, 1, 1)
+#define MPP29_UA3_RXD        MPP(29, 0x4, 0, 0, 1)
+#define MPP29_SYSRST_OUTn    MPP(29, 0x5, 0, 0, 1)
+#define MPP29_TDM_DTX        MPP(29, 0x6, 0, 0, 1)
 #define MPP29_UNUSED        MPP(29, 0x1, 0, 0, 1)
 
 #define MPP30_GPIO        MPP(30, 0x0, 1, 1, 1)
-#define MPP30_UA3_CTSn        MPP(30, 0x4, 1, 0, 1)
+#define MPP30_UA3_CTSn        MPP(30, 0x4, 0, 0, 1)
 #define MPP30_UNUSED        MPP(30, 0x1, 0, 0, 1)
 
 #define MPP31_GPIO        MPP(31, 0x0, 1, 1, 1)
-#define MPP31_UA3_RTSn        MPP(31, 0x4, 0, 1, 1)
-#define MPP31_TDM1_SCSn        MPP(31, 0x6, 0, 1, 1)
+#define MPP31_UA3_RTSn        MPP(31, 0x4, 0, 0, 1)
+#define MPP31_TDM1_SCSn        MPP(31, 0x6, 0, 0, 1)
 #define MPP31_UNUSED        MPP(31, 0x1, 0, 0, 1)
 
 
 #define MPP32_GPIO        MPP(32, 0x1, 1, 1, 1)
-#define MPP32_UA3_TDX        MPP(32, 0x4, 0, 1, 1)
-#define MPP32_SYSRST_OUTn    MPP(32, 0x5, 0, 1, 1)
-#define MPP32_TDM0_RXQ        MPP(32, 0x6, 0, 1, 1)
+#define MPP32_UA3_TDX        MPP(32, 0x4, 0, 0, 1)
+#define MPP32_SYSRST_OUTn    MPP(32, 0x5, 0, 0, 1)
+#define MPP32_TDM0_RXQ        MPP(32, 0x6, 0, 0, 1)
 #define MPP32_UNUSED        MPP(32, 0x3, 0, 0, 1)
 
 
 #define MPP33_GPIO        MPP(33, 0x1, 1, 1, 1)
-#define MPP33_UA3_RDX        MPP(33, 0x4, 1, 0, 1)
-#define MPP33_TDM0_TXQ        MPP(33, 0x6, 0, 1, 1)
+#define MPP33_UA3_RDX        MPP(33, 0x4, 0, 0, 1)
+#define MPP33_TDM0_TXQ        MPP(33, 0x6, 0, 0, 1)
 #define MPP33_UNUSED        MPP(33, 0x3, 0, 0, 1)
 
 
 #define MPP34_GPIO        MPP(34, 0x1, 1, 1, 1)
-#define MPP34_UA2_TDX        MPP(34, 0x4, 0, 1, 1)
-#define MPP34_TDM1_RXQ        MPP(34, 0x6, 0, 1, 1)
+#define MPP34_UA2_TDX        MPP(34, 0x4, 0, 0, 1)
+#define MPP34_TDM1_RXQ        MPP(34, 0x6, 0, 0, 1)
 #define MPP34_UNUSED        MPP(34, 0x3, 0, 0, 1)
 
 
 #define MPP35_GPIO        MPP(35, 0x1, 1, 1, 1)
-#define MPP35_UA2_RDX        MPP(35, 0x4, 1, 0, 1)
-#define MPP35_TDM1_TXQ        MPP(35, 0x6, 0, 1, 1)
+#define MPP35_UA2_RDX        MPP(35, 0x4, 0, 0, 1)
+#define MPP35_TDM1_TXQ        MPP(35, 0x6, 0, 0, 1)
 #define MPP35_UNUSED        MPP(35, 0x3, 0, 0, 1)
 
 #define MPP36_GPIO        MPP(36, 0x1, 1, 1, 1)
-#define MPP36_UA0_CTSn        MPP(36, 0x2, 1, 0, 1)
-#define MPP36_UA2_TDX        MPP(36, 0x4, 0, 1, 1)
-#define MPP36_TDM0_SCSn        MPP(36, 0x6, 0, 1, 1)
+#define MPP36_UA0_CTSn        MPP(36, 0x2, 0, 0, 1)
+#define MPP36_UA2_TDX        MPP(36, 0x4, 0, 0, 1)
+#define MPP36_TDM0_SCSn        MPP(36, 0x6, 0, 0, 1)
 #define MPP36_UNUSED        MPP(36, 0x3, 0, 0, 1)
 
 
 #define MPP37_GPIO        MPP(37, 0x1, 1, 1, 1)
-#define MPP37_UA0_RTSn        MPP(37, 0x2, 0, 1, 1)
-#define MPP37_UA2_RXD        MPP(37, 0x4, 1, 0, 1)
-#define MPP37_SYSRST_OUTn    MPP(37, 0x5, 0, 1, 1)
-#define MPP37_TDM_SCLK        MPP(37, 0x6, 0, 1, 1)
+#define MPP37_UA0_RTSn        MPP(37, 0x2, 0, 0, 1)
+#define MPP37_UA2_RXD        MPP(37, 0x4, 0, 0, 1)
+#define MPP37_SYSRST_OUTn    MPP(37, 0x5, 0, 0, 1)
+#define MPP37_TDM_SCLK        MPP(37, 0x6, 0, 0, 1)
 #define MPP37_UNUSED        MPP(37, 0x3, 0, 0, 1)
 
 
 #define MPP38_GPIO        MPP(38, 0x1, 1, 1, 1)
-#define MPP38_UA1_CTSn        MPP(38, 0x2, 1, 0, 1)
-#define MPP38_UA3_TXD        MPP(38, 0x4, 0, 1, 1)
-#define MPP38_SYSRST_OUTn    MPP(38, 0x5, 0, 1, 1)
-#define MPP38_TDM_SMOSI        MPP(38, 0x6, 0, 1, 1)
+#define MPP38_UA1_CTSn        MPP(38, 0x2, 0, 0, 1)
+#define MPP38_UA3_TXD        MPP(38, 0x4, 0, 0, 1)
+#define MPP38_SYSRST_OUTn    MPP(38, 0x5, 0, 0, 1)
+#define MPP38_TDM_SMOSI        MPP(38, 0x6, 0, 0, 1)
 #define MPP38_UNUSED        MPP(38, 0x3, 0, 0, 1)
 
 
 #define MPP39_GPIO        MPP(39, 0x1, 1, 1, 1)
-#define MPP39_UA1_RTSn        MPP(39, 0x2, 0, 1, 1)
-#define MPP39_UA3_RXD        MPP(39, 0x4, 1, 0, 1)
-#define MPP39_SYSRST_OUTn    MPP(39, 0x5, 0, 1, 1)
-#define MPP39_TDM_SMISO        MPP(39, 0x6, 1, 0, 1)
+#define MPP39_UA1_RTSn        MPP(39, 0x2, 0, 0, 1)
+#define MPP39_UA3_RXD        MPP(39, 0x4, 0, 0, 1)
+#define MPP39_SYSRST_OUTn    MPP(39, 0x5, 0, 0, 1)
+#define MPP39_TDM_SMISO        MPP(39, 0x6, 0, 0, 1)
 #define MPP39_UNUSED        MPP(39, 0x3, 0, 0, 1)
 
 
 #define MPP40_GPIO        MPP(40, 0x1, 1, 1, 1)
-#define MPP40_TDM_INTn        MPP(40, 0x6, 1, 0, 1)
+#define MPP40_TDM_INTn        MPP(40, 0x6, 0, 0, 1)
 #define MPP40_UNUSED        MPP(40, 0x0, 0, 0, 1)
 
 
 #define MPP41_GPIO        MPP(41, 0x1, 1, 1, 1)
-#define MPP41_TDM_RSTn        MPP(41, 0x6, 0, 1, 1)
+#define MPP41_TDM_RSTn        MPP(41, 0x6, 0, 0, 1)
 #define MPP41_UNUSED        MPP(41, 0x0, 0, 0, 1)
 
 
 #define MPP42_GPIO        MPP(42, 0x1, 1, 1, 1)
-#define MPP42_TDM_PCLK        MPP(42, 0x6, 1, 1, 1)
+#define MPP42_TDM_PCLK        MPP(42, 0x6, 0, 0, 1)
 #define MPP42_UNUSED        MPP(42, 0x0, 0, 0, 1)
 
 
 #define MPP43_GPIO        MPP(43, 0x1, 1, 1, 1)
-#define MPP43_TDM_FSYNC        MPP(43, 0x6, 1, 1, 1)
+#define MPP43_TDM_FSYNC        MPP(43, 0x6, 0, 0, 1)
 #define MPP43_UNUSED        MPP(43, 0x0, 0, 0, 1)
 
 
 #define MPP44_GPIO        MPP(44, 0x1, 1, 1, 1)
-#define MPP44_TDM_DRX        MPP(44, 0x6, 1, 0, 1)
+#define MPP44_TDM_DRX        MPP(44, 0x6, 0, 0, 1)
 #define MPP44_UNUSED        MPP(44, 0x0, 0, 0, 1)
 
 
 #define MPP45_GPIO        MPP(45, 0x1, 1, 1, 1)
-#define MPP45_SATA0_ACTn    MPP(45, 0x3, 0, 1, 1)
-#define MPP45_TDM_DRX        MPP(45, 0x6, 0, 1, 1)
+#define MPP45_SATA0_ACTn    MPP(45, 0x3, 0, 0, 1)
+#define MPP45_TDM_DRX        MPP(45, 0x6, 0, 0, 1)
 #define MPP45_UNUSED        MPP(45, 0x0, 0, 0, 1)
 
 
 #define MPP46_GPIO        MPP(46, 0x1, 1, 1, 1)
-#define MPP46_TDM_SCSn        MPP(46, 0x6, 0, 1, 1)
+#define MPP46_TDM_SCSn        MPP(46, 0x6, 0, 0, 1)
 #define MPP46_UNUSED        MPP(46, 0x0, 0, 0, 1)
 
 
@@ -323,14 +323,14 @@
 
 
 #define MPP48_GPIO        MPP(48, 0x1, 1, 1, 1)
-#define MPP48_SATA1_ACTn    MPP(48, 0x3, 0, 1, 1)
+#define MPP48_SATA1_ACTn    MPP(48, 0x3, 0, 0, 1)
 #define MPP48_UNUSED        MPP(48, 0x2, 0, 0, 1)
 
 
 #define MPP49_GPIO        MPP(49, 0x1, 1, 1, 1)
-#define MPP49_SATA0_ACTn    MPP(49, 0x3, 0, 1, 1)
-#define MPP49_M_BB        MPP(49, 0x4, 1, 0, 1)
+#define MPP49_SATA0_ACTn    MPP(49, 0x3, 0, 0, 1)
+#define MPP49_M_BB        MPP(49, 0x4, 0, 0, 1)
 #define MPP49_UNUSED        MPP(49, 0x2, 0, 0, 1)
 
 
diff --git a/arch/arm/plat-orion/mpp.c b/arch/arm/plat-orion/mpp.c
index 91553432711d..3b1e17bd3d17 100644
--- a/arch/arm/plat-orion/mpp.c
+++ b/arch/arm/plat-orion/mpp.c
@@ -64,8 +64,7 @@ void __init orion_mpp_conf(unsigned int *mpp_list, unsigned int variant_mask,
 			gpio_mode |= GPIO_INPUT_OK;
 		if (*mpp_list & MPP_OUTPUT_MASK)
 			gpio_mode |= GPIO_OUTPUT_OK;
-		if (sel != 0)
-			gpio_mode = 0;
+
 		orion_gpio_set_valid(num, gpio_mode);
 	}
 
-- 
cgit v1.2.3


From 1825e65dbb5370d5cebd13dbdfe4bce86caf351c Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Mon, 22 Aug 2011 13:02:52 +0300
Subject: OMAP: DSS2: HDMI: use default dividers

commit 8d88767a4377171752c22ac39bcb2b505eb751da upstream.

Use default regn and regm2 dividers in the hdmi driver if the board file
does not define them.

Cc: Mythri P K <mythripk@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 63de2d396e2d..f515fa29a105 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -617,15 +617,6 @@ static struct omap_dss_device sdp4430_hdmi_device = {
 	.name = "hdmi",
 	.driver_name = "hdmi_panel",
 	.type = OMAP_DISPLAY_TYPE_HDMI,
-	.clocks	= {
-		.dispc	= {
-			.dispc_fclk_src	= OMAP_DSS_CLK_SRC_FCK,
-		},
-		.hdmi	= {
-			.regn	= 15,
-			.regm2	= 1,
-		},
-	},
 	.platform_enable = sdp4430_panel_enable_hdmi,
 	.platform_disable = sdp4430_panel_disable_hdmi,
 	.channel = OMAP_DSS_CHANNEL_DIGIT,
-- 
cgit v1.2.3


From 850e968cce74f2e242e06d09dd072e51bd85aee3 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 17 Jan 2012 11:04:53 +0200
Subject: OMAP: 4430SDP/Panda: use gpio_free_array to free HDMI gpios

commit 575753e3bea3b67eef8e454fb87f719e3f7da599 upstream.

Instead of freeing the GPIOs individually, use gpio_free_array().

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 3 +--
 arch/arm/mach-omap2/board-omap4panda.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index f515fa29a105..7feefb888a81 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -609,8 +609,7 @@ static int sdp4430_panel_enable_hdmi(struct omap_dss_device *dssdev)
 
 static void sdp4430_panel_disable_hdmi(struct omap_dss_device *dssdev)
 {
-	gpio_free(HDMI_GPIO_LS_OE);
-	gpio_free(HDMI_GPIO_HPD);
+	gpio_free_array(sdp4430_hdmi_gpios, ARRAY_SIZE(sdp4430_hdmi_gpios));
 }
 
 static struct omap_dss_device sdp4430_hdmi_device = {
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 0cfe2005cb50..2e9a6f361286 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -645,8 +645,7 @@ static int omap4_panda_panel_enable_hdmi(struct omap_dss_device *dssdev)
 
 static void omap4_panda_panel_disable_hdmi(struct omap_dss_device *dssdev)
 {
-	gpio_free(HDMI_GPIO_LS_OE);
-	gpio_free(HDMI_GPIO_HPD);
+	gpio_free_array(panda_hdmi_gpios, ARRAY_SIZE(panda_hdmi_gpios));
 }
 
 static struct omap_dss_device  omap4_panda_hdmi_device = {
-- 
cgit v1.2.3


From 61826fb9866fc86c9832263d1f5bb427f4329277 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 17 Jan 2012 10:49:38 +0200
Subject: OMAP: 4430SDP/Panda: rename HPD GPIO to CT_CP_HPD

commit 3932a32fcf5393f8be70ac99dc718ad7ad0a415b upstream.

The GPIO 60 on 4430sdp and Panda is not HPD GPIO, as currently marked in
the board files, but CT_CP_HPD, which is used to enable/disable HPD
functionality.

This patch renames the GPIO.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 4 ++--
 arch/arm/mach-omap2/board-omap4panda.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 7feefb888a81..d0a49c3792b3 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -49,7 +49,7 @@
 #define ETH_KS8851_QUART		138
 #define OMAP4_SFH7741_SENSOR_OUTPUT_GPIO	184
 #define OMAP4_SFH7741_ENABLE_GPIO		188
-#define HDMI_GPIO_HPD 60 /* Hot plug pin for HDMI */
+#define HDMI_GPIO_CT_CP_HPD 60 /* HPD mode enable/disable */
 #define HDMI_GPIO_LS_OE 41 /* Level shifter for HDMI */
 
 static const int sdp4430_keymap[] = {
@@ -591,7 +591,7 @@ static void sdp4430_hdmi_mux_init(void)
 }
 
 static struct gpio sdp4430_hdmi_gpios[] = {
-	{ HDMI_GPIO_HPD,	GPIOF_OUT_INIT_HIGH,	"hdmi_gpio_hpd"   },
+	{ HDMI_GPIO_CT_CP_HPD, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ct_cp_hpd" },
 	{ HDMI_GPIO_LS_OE,	GPIOF_OUT_INIT_HIGH,	"hdmi_gpio_ls_oe" },
 };
 
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 2e9a6f361286..bc9874930b94 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -52,7 +52,7 @@
 #define GPIO_HUB_NRESET		62
 #define GPIO_WIFI_PMENA		43
 #define GPIO_WIFI_IRQ		53
-#define HDMI_GPIO_HPD 60 /* Hot plug pin for HDMI */
+#define HDMI_GPIO_CT_CP_HPD 60 /* HPD mode enable/disable */
 #define HDMI_GPIO_LS_OE 41 /* Level shifter for HDMI */
 
 /* wl127x BT, FM, GPS connectivity chip */
@@ -627,7 +627,7 @@ static void omap4_panda_hdmi_mux_init(void)
 }
 
 static struct gpio panda_hdmi_gpios[] = {
-	{ HDMI_GPIO_HPD,	GPIOF_OUT_INIT_HIGH, "hdmi_gpio_hpd"   },
+	{ HDMI_GPIO_CT_CP_HPD, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ct_cp_hpd" },
 	{ HDMI_GPIO_LS_OE,	GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ls_oe" },
 };
 
-- 
cgit v1.2.3


From ed3984fec88b0a39f6e66ee40f124e1568f75c12 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 17 Jan 2012 10:59:00 +0200
Subject: OMAPDSS: remove wrong HDMI HPD muxing

commit 7bb122d155f742fe2d79849090c825be7b4a247e upstream.

"hdmi_hpd" pin is muxed to INPUT and PULLUP, but the pin is not
currently used, and in the future when it is used, the pin is used as a
GPIO and is board specific, not an OMAP4 wide thing.

So remove the muxing for now.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 4 ----
 arch/arm/mach-omap2/board-omap4panda.c | 4 ----
 2 files changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index d0a49c3792b3..9bc418e138dc 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -578,12 +578,8 @@ static void __init omap_sfh7741prox_init(void)
 
 static void sdp4430_hdmi_mux_init(void)
 {
-	/* PAD0_HDMI_HPD_PAD1_HDMI_CEC */
-	omap_mux_init_signal("hdmi_hpd",
-			OMAP_PIN_INPUT_PULLUP);
 	omap_mux_init_signal("hdmi_cec",
 			OMAP_PIN_INPUT_PULLUP);
-	/* PAD0_HDMI_DDC_SCL_PAD1_HDMI_DDC_SDA */
 	omap_mux_init_signal("hdmi_ddc_scl",
 			OMAP_PIN_INPUT_PULLUP);
 	omap_mux_init_signal("hdmi_ddc_sda",
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index bc9874930b94..86a2d30a9fc2 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -614,12 +614,8 @@ int __init omap4_panda_dvi_init(void)
 
 static void omap4_panda_hdmi_mux_init(void)
 {
-	/* PAD0_HDMI_HPD_PAD1_HDMI_CEC */
-	omap_mux_init_signal("hdmi_hpd",
-			OMAP_PIN_INPUT_PULLUP);
 	omap_mux_init_signal("hdmi_cec",
 			OMAP_PIN_INPUT_PULLUP);
-	/* PAD0_HDMI_DDC_SCL_PAD1_HDMI_DDC_SDA */
 	omap_mux_init_signal("hdmi_ddc_scl",
 			OMAP_PIN_INPUT_PULLUP);
 	omap_mux_init_signal("hdmi_ddc_sda",
-- 
cgit v1.2.3


From bdd91fa44646ae9e80e7a716320e9d1e7287766b Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 17 Jan 2012 11:02:36 +0200
Subject: OMAP: 4430SDP/Panda: setup HDMI GPIO muxes

commit 78a1ad8f12db70b8b0a4548b90704de08ee216ce upstream.

The HDMI GPIO pins LS_OE and CT_CP_HPD are not currently configured.
This patch configures them as output pins.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 3 +++
 arch/arm/mach-omap2/board-omap4panda.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 9bc418e138dc..669e3293780a 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -631,6 +631,9 @@ void omap_4430sdp_display_init(void)
 {
 	sdp4430_hdmi_mux_init();
 	omap_display_init(&sdp4430_dss_data);
+
+	omap_mux_init_gpio(HDMI_GPIO_LS_OE, OMAP_PIN_OUTPUT);
+	omap_mux_init_gpio(HDMI_GPIO_CT_CP_HPD, OMAP_PIN_OUTPUT);
 }
 
 #ifdef CONFIG_OMAP_MUX
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 86a2d30a9fc2..5d519fb01a89 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -674,6 +674,9 @@ void omap4_panda_display_init(void)
 
 	omap4_panda_hdmi_mux_init();
 	omap_display_init(&omap4_panda_dss_data);
+
+	omap_mux_init_gpio(HDMI_GPIO_LS_OE, OMAP_PIN_OUTPUT);
+	omap_mux_init_gpio(HDMI_GPIO_CT_CP_HPD, OMAP_PIN_OUTPUT);
 }
 
 static void __init omap4_panda_init(void)
-- 
cgit v1.2.3


From 8d1b18d569d59c8aa95964e2e5e4c5d544928258 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 17 Jan 2012 11:05:32 +0200
Subject: OMAP: 4430SDP/Panda: add HDMI HPD gpio

commit aa74274b464d4aa24703963ac89a0ee942d5d267 upstream.

Both Panda and 4430SDP use GPIO 63 as HDMI hot-plug-detect. Configure
this GPIO in the board files.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 3 +++
 arch/arm/mach-omap2/board-omap4panda.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 669e3293780a..0ee578a2251a 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -51,6 +51,7 @@
 #define OMAP4_SFH7741_ENABLE_GPIO		188
 #define HDMI_GPIO_CT_CP_HPD 60 /* HPD mode enable/disable */
 #define HDMI_GPIO_LS_OE 41 /* Level shifter for HDMI */
+#define HDMI_GPIO_HPD  63 /* Hotplug detect */
 
 static const int sdp4430_keymap[] = {
 	KEY(0, 0, KEY_E),
@@ -589,6 +590,7 @@ static void sdp4430_hdmi_mux_init(void)
 static struct gpio sdp4430_hdmi_gpios[] = {
 	{ HDMI_GPIO_CT_CP_HPD, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ct_cp_hpd" },
 	{ HDMI_GPIO_LS_OE,	GPIOF_OUT_INIT_HIGH,	"hdmi_gpio_ls_oe" },
+	{ HDMI_GPIO_HPD, GPIOF_DIR_IN, "hdmi_gpio_hpd" },
 };
 
 static int sdp4430_panel_enable_hdmi(struct omap_dss_device *dssdev)
@@ -634,6 +636,7 @@ void omap_4430sdp_display_init(void)
 
 	omap_mux_init_gpio(HDMI_GPIO_LS_OE, OMAP_PIN_OUTPUT);
 	omap_mux_init_gpio(HDMI_GPIO_CT_CP_HPD, OMAP_PIN_OUTPUT);
+	omap_mux_init_gpio(HDMI_GPIO_HPD, OMAP_PIN_INPUT_PULLDOWN);
 }
 
 #ifdef CONFIG_OMAP_MUX
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 5d519fb01a89..2b3f7e0f5d34 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -54,6 +54,7 @@
 #define GPIO_WIFI_IRQ		53
 #define HDMI_GPIO_CT_CP_HPD 60 /* HPD mode enable/disable */
 #define HDMI_GPIO_LS_OE 41 /* Level shifter for HDMI */
+#define HDMI_GPIO_HPD  63 /* Hotplug detect */
 
 /* wl127x BT, FM, GPS connectivity chip */
 static int wl1271_gpios[] = {46, -1, -1};
@@ -625,6 +626,7 @@ static void omap4_panda_hdmi_mux_init(void)
 static struct gpio panda_hdmi_gpios[] = {
 	{ HDMI_GPIO_CT_CP_HPD, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ct_cp_hpd" },
 	{ HDMI_GPIO_LS_OE,	GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ls_oe" },
+	{ HDMI_GPIO_HPD, GPIOF_DIR_IN, "hdmi_gpio_hpd" },
 };
 
 static int omap4_panda_panel_enable_hdmi(struct omap_dss_device *dssdev)
@@ -677,6 +679,7 @@ void omap4_panda_display_init(void)
 
 	omap_mux_init_gpio(HDMI_GPIO_LS_OE, OMAP_PIN_OUTPUT);
 	omap_mux_init_gpio(HDMI_GPIO_CT_CP_HPD, OMAP_PIN_OUTPUT);
+	omap_mux_init_gpio(HDMI_GPIO_HPD, OMAP_PIN_INPUT_PULLDOWN);
 }
 
 static void __init omap4_panda_init(void)
-- 
cgit v1.2.3


From 21189f03d3ec3a74d9949907c828410d7a9a86d5 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 17 Jan 2012 11:09:57 +0200
Subject: OMAPDSS: HDMI: PHY burnout fix

commit c49d005b6cc8491fad5b24f82805be2d6bcbd3dd upstream.

A hardware bug in the OMAP4 HDMI PHY causes physical damage to the board
if the HDMI PHY is kept powered on when the cable is not connected.

This patch solves the problem by adding hot-plug-detection into the HDMI
IP driver. This is not a real HPD support in the sense that nobody else
than the IP driver gets to know about the HPD events, but is only meant
to fix the HW bug.

The strategy is simple: If the display device is turned off by the user,
the PHY power is set to OFF. When the display device is turned on by the
user, the PHY power is set either to LDOON or TXON, depending on whether
the HDMI cable is connected.

The reason to avoid PHY OFF when the display device is on, but the cable
is disconnected, is that when the PHY is turned OFF, the HDMI IP is not
"ticking" and thus the DISPC does not receive pixel clock from the HDMI
IP. This would, for example, prevent any VSYNCs from happening, and
would thus affect the users of omapdss. By using LDOON when the cable is
disconnected we'll avoid the HW bug, but keep the HDMI working as usual
from the user's point of view.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 5 +++++
 arch/arm/mach-omap2/board-omap4panda.c | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 0ee578a2251a..14a5971d0d48 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -610,6 +610,10 @@ static void sdp4430_panel_disable_hdmi(struct omap_dss_device *dssdev)
 	gpio_free_array(sdp4430_hdmi_gpios, ARRAY_SIZE(sdp4430_hdmi_gpios));
 }
 
+static struct omap_dss_hdmi_data sdp4430_hdmi_data = {
+	.hpd_gpio = HDMI_GPIO_HPD,
+};
+
 static struct omap_dss_device sdp4430_hdmi_device = {
 	.name = "hdmi",
 	.driver_name = "hdmi_panel",
@@ -617,6 +621,7 @@ static struct omap_dss_device sdp4430_hdmi_device = {
 	.platform_enable = sdp4430_panel_enable_hdmi,
 	.platform_disable = sdp4430_panel_disable_hdmi,
 	.channel = OMAP_DSS_CHANNEL_DIGIT,
+	.data = &sdp4430_hdmi_data,
 };
 
 static struct omap_dss_device *sdp4430_dss_devices[] = {
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 2b3f7e0f5d34..107dfc377a8a 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -646,6 +646,10 @@ static void omap4_panda_panel_disable_hdmi(struct omap_dss_device *dssdev)
 	gpio_free_array(panda_hdmi_gpios, ARRAY_SIZE(panda_hdmi_gpios));
 }
 
+static struct omap_dss_hdmi_data omap4_panda_hdmi_data = {
+	.hpd_gpio = HDMI_GPIO_HPD,
+};
+
 static struct omap_dss_device  omap4_panda_hdmi_device = {
 	.name = "hdmi",
 	.driver_name = "hdmi_panel",
@@ -653,6 +657,7 @@ static struct omap_dss_device  omap4_panda_hdmi_device = {
 	.platform_enable = omap4_panda_panel_enable_hdmi,
 	.platform_disable = omap4_panda_panel_disable_hdmi,
 	.channel = OMAP_DSS_CHANNEL_DIGIT,
+	.data = &omap4_panda_hdmi_data,
 };
 
 static struct omap_dss_device *omap4_panda_dss_devices[] = {
-- 
cgit v1.2.3


From a55848dfa041f2a2d1e96243fcb242ebadabeb0a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 24 Feb 2012 12:12:38 +0100
Subject: ARM: 7345/1: errata: update workaround for A9 erratum #743622

commit efbc74ace95338484f8d732037b99c7c77098fce upstream.

Erratum #743622 affects all r2 variants of the Cortex-A9 processor, so
ensure that the workaround is applied regardless of the revision.

Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/Kconfig      | 2 +-
 arch/arm/mm/proc-v7.S | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2456badc8634..f9b212e0bc4d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1179,7 +1179,7 @@ config ARM_ERRATA_743622
 	depends on CPU_V7
 	help
 	  This option enables the workaround for the 743622 Cortex-A9
-	  (r2p0..r2p2) erratum. Under very rare conditions, a faulty
+	  (r2p*) erratum. Under very rare conditions, a faulty
 	  optimisation in the Cortex-A9 Store Buffer may lead to data
 	  corruption. This workaround sets a specific bit in the diagnostic
 	  register of the Cortex-A9 which disables the Store Buffer
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b6ba1032a988..7e47888101a1 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -344,9 +344,7 @@ __v7_setup:
 	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
 #endif
 #ifdef CONFIG_ARM_ERRATA_743622
-	teq	r6, #0x20			@ present in r2p0
-	teqne	r6, #0x21			@ present in r2p1
-	teqne	r6, #0x22			@ present in r2p2
+	teq	r5, #0x00200000			@ only present in r2p*
 	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
 	orreq	r10, r10, #1 << 6		@ set bit #6
 	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-- 
cgit v1.2.3


From 5bcb3f882ab97f3ebb4c4abd435704ad24c565cb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 9 Mar 2012 20:55:10 +0100
Subject: x86: Derandom delay_tsc for 64 bit

commit a7f4255f906f60f72e00aad2fb000939449ff32e upstream.

Commit f0fbf0abc093 ("x86: integrate delay functions") converted
delay_tsc() into a random delay generator for 64 bit.  The reason is
that it merged the mostly identical versions of delay_32.c and
delay_64.c.  Though the subtle difference of the result was:

 static void delay_tsc(unsigned long loops)
 {
-	unsigned bclock, now;
+	unsigned long bclock, now;

Now the function uses rdtscl() which returns the lower 32bit of the
TSC. On 32bit that's not problematic as unsigned long is 32bit. On 64
bit this fails when the lower 32bit are close to wrap around when
bclock is read, because the following check

       if ((now - bclock) >= loops)
       	  	break;

evaluated to true on 64bit for e.g. bclock = 0xffffffff and now = 0
because the unsigned long (now - bclock) of these values results in
0xffffffff00000001 which is definitely larger than the loops
value. That explains Tvortkos observation:

"Because I am seeing udelay(500) (_occasionally_) being short, and
 that by delaying for some duration between 0us (yep) and 491us."

Make those variables explicitely u32 again, so this works for both 32
and 64 bit.

Reported-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/lib/delay.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fc45ba887d05..e395693abdb1 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
 }
 
 /* TSC based delay: */
-static void delay_tsc(unsigned long loops)
+static void delay_tsc(unsigned long __loops)
 {
-	unsigned long bclock, now;
+	u32 bclock, now, loops = __loops;
 	int cpu;
 
 	preempt_disable();
-- 
cgit v1.2.3


From 508e8376b63e20e9191d3b80a3b32a31d0566aee Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 5 Mar 2012 14:06:38 +0100
Subject: =?UTF-8?q?compat:=20Re-add=20missing=20asm/compat.h=20include=20t?=
 =?UTF-8?q?o=20fix=20compile=C2=A0breakage=20on=20s390?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For kernels <= 3.0 the backport of 048cd4e51d24ebf7f3552226d03c769d6ad91658
"compat: fix compile breakage on s390" will break compilation...

Re-add a single #include <asm/compat.h> in order to fix this.

This patch is _not_ necessary for upstream, only for stable kernels
which include the "build fix" mentioned above.

Reported-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7547f57e2913..0260051c08f2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -56,6 +56,7 @@
 #include <asm/ptrace.h>
 #include <asm/sections.h>
 #include <asm/ebcdic.h>
+#include <asm/compat.h>
 #include <asm/kvm_virtio.h>
 
 long psw_kernel_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
-- 
cgit v1.2.3


From d17a17faf0334f2c95e922fb21a612cfea5b99ce Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 13 Mar 2012 18:19:51 -0700
Subject: sparc32: Add -Av8 to assembler command line.

commit e0adb9902fb338a9fe634c3c2a3e474075c733ba upstream.

Newer version of binutils are more strict about specifying the
correct options to enable certain classes of instructions.

The sparc32 build is done for v7 in order to support sun4c systems
which lack hardware integer multiply and divide instructions.

So we have to pass -Av8 when building the assembler routines that
use these instructions and get patched into the kernel when we find
out that we have a v8 capable cpu.

Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index ad1fb5d969f3..eddcfb36aafb 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -31,7 +31,7 @@ UTS_MACHINE    := sparc
 
 #KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7
 KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
-KBUILD_AFLAGS += -m32
+KBUILD_AFLAGS += -m32 -Wa,-Av8
 
 #LDFLAGS_vmlinux = -N -Ttext 0xf0004000
 #  Since 2.5.40, the first stage is left not btfix-ed.
-- 
cgit v1.2.3


From 029a9375e40af22c2f75d7a15615d2c87731f366 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 9 Dec 2011 15:06:18 +1100
Subject: powerpc/pmac: Fix SMP kernels on pre-core99 UP machines

commit 78c5c68a4cf4329d17abfa469345ddf323d4fd62 upstream.

The code for "powersurge" SMP would kick in and cause a crash
at boot due to the lack of a NULL test.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Reported-by: Adam Conrad <adconrad@ubuntu.com>
Tested-by: Adam Conrad <adconrad@ubuntu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powermac/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index db092d7c4c5b..53a6be7ebe3c 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -414,7 +414,7 @@ static struct irqaction psurge_irqaction = {
 
 static void __init smp_psurge_setup_cpu(int cpu_nr)
 {
-	if (cpu_nr != 0)
+	if (cpu_nr != 0 || !psurge_start)
 		return;
 
 	/* reset the entry point so if we get another intr we won't
-- 
cgit v1.2.3


From c2ec63edaf48c90c3495eeb0b75bb05102fbf71a Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 12 Mar 2012 11:36:33 -0700
Subject: x86/ioapic: Add register level checks to detect bogus io-apic entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 73d63d038ee9f769f5e5b46792d227fe20e442c5 upstream.

With the recent changes to clear_IO_APIC_pin() which tries to
clear remoteIRR bit explicitly, some of the users started to see
"Unable to reset IRR for apic .." messages.

Close look shows that these are related to bogus IO-APIC entries
which return's all 1's for their io-apic registers. And the
above mentioned error messages are benign. But kernel should
have ignored such io-apic's in the first place.

Check if register 0, 1, 2 of the listed io-apic are all 1's and
ignore such io-apic.

Reported-by: Álvaro Castillo <midgoon@gmail.com>
Tested-by: Jon Dufresne <jon@jondufresne.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: yinghai@kernel.org
Cc: kernel-team@fedoraproject.org
Cc: Josh Boyer <jwboyer@redhat.com>
Link: http://lkml.kernel.org/r/1331577393.31585.94.camel@sbsiddha-desk.sc.intel.com
[ Performed minor cleanup of affected code. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/io_apic.c | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e5293394b548..16e9d94bfdd3 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3927,18 +3927,36 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
 static __init int bad_ioapic(unsigned long address)
 {
 	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
-		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+		pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+			MAX_IO_APICS, nr_ioapics);
 		return 1;
 	}
 	if (!address) {
-		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
-		       " found in table, skipping!\n");
+		pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
 		return 1;
 	}
 	return 0;
 }
 
+static __init int bad_ioapic_register(int idx)
+{
+	union IO_APIC_reg_00 reg_00;
+	union IO_APIC_reg_01 reg_01;
+	union IO_APIC_reg_02 reg_02;
+
+	reg_00.raw = io_apic_read(idx, 0);
+	reg_01.raw = io_apic_read(idx, 1);
+	reg_02.raw = io_apic_read(idx, 2);
+
+	if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
+		pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
+			mpc_ioapic_addr(idx));
+		return 1;
+	}
+
+	return 0;
+}
+
 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 {
 	int idx = 0;
@@ -3955,6 +3973,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	ioapics[idx].mp_config.apicaddr = address;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+
+	if (bad_ioapic_register(idx)) {
+		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+		return;
+	}
+
 	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
 
@@ -3975,10 +3999,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
 
-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
-	       "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
-	       mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
-	       gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
+		idx, mpc_ioapic_id(idx),
+		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
 	nr_ioapics++;
 }
-- 
cgit v1.2.3


From 5a3e1f550cfc86a68729770bcfa28f36b238b34d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 21 Mar 2012 16:33:42 -0700
Subject: mm: thp: fix pmd_bad() triggering in code paths holding mmap_sem read
 mode

commit 1a5a9906d4e8d1976b701f889d8f35d54b928f25 upstream.

In some cases it may happen that pmd_none_or_clear_bad() is called with
the mmap_sem hold in read mode.  In those cases the huge page faults can
allocate hugepmds under pmd_none_or_clear_bad() and that can trigger a
false positive from pmd_bad() that will not like to see a pmd
materializing as trans huge.

It's not khugepaged causing the problem, khugepaged holds the mmap_sem
in write mode (and all those sites must hold the mmap_sem in read mode
to prevent pagetables to go away from under them, during code review it
seems vm86 mode on 32bit kernels requires that too unless it's
restricted to 1 thread per process or UP builds).  The race is only with
the huge pagefaults that can convert a pmd_none() into a
pmd_trans_huge().

Effectively all these pmd_none_or_clear_bad() sites running with
mmap_sem in read mode are somewhat speculative with the page faults, and
the result is always undefined when they run simultaneously.  This is
probably why it wasn't common to run into this.  For example if the
madvise(MADV_DONTNEED) runs zap_page_range() shortly before the page
fault, the hugepage will not be zapped, if the page fault runs first it
will be zapped.

Altering pmd_bad() not to error out if it finds hugepmds won't be enough
to fix this, because zap_pmd_range would then proceed to call
zap_pte_range (which would be incorrect if the pmd become a
pmd_trans_huge()).

The simplest way to fix this is to read the pmd in the local stack
(regardless of what we read, no need of actual CPU barriers, only
compiler barrier needed), and be sure it is not changing under the code
that computes its value.  Even if the real pmd is changing under the
value we hold on the stack, we don't care.  If we actually end up in
zap_pte_range it means the pmd was not none already and it was not huge,
and it can't become huge from under us (khugepaged locking explained
above).

All we need is to enforce that there is no way anymore that in a code
path like below, pmd_trans_huge can be false, but pmd_none_or_clear_bad
can run into a hugepmd.  The overhead of a barrier() is just a compiler
tweak and should not be measurable (I only added it for THP builds).  I
don't exclude different compiler versions may have prevented the race
too by caching the value of *pmd on the stack (that hasn't been
verified, but it wouldn't be impossible considering
pmd_none_or_clear_bad, pmd_bad, pmd_trans_huge, pmd_none are all inlines
and there's no external function called in between pmd_trans_huge and
pmd_none_or_clear_bad).

		if (pmd_trans_huge(*pmd)) {
			if (next-addr != HPAGE_PMD_SIZE) {
				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
				split_huge_page_pmd(vma->vm_mm, pmd);
			} else if (zap_huge_pmd(tlb, vma, pmd, addr))
				continue;
			/* fall through */
		}
		if (pmd_none_or_clear_bad(pmd))

Because this race condition could be exercised without special
privileges this was reported in CVE-2012-1179.

The race was identified and fully explained by Ulrich who debugged it.
I'm quoting his accurate explanation below, for reference.

====== start quote =======
      mapcount 0 page_mapcount 1
      kernel BUG at mm/huge_memory.c:1384!

    At some point prior to the panic, a "bad pmd ..." message similar to the
    following is logged on the console:

      mm/memory.c:145: bad pmd ffff8800376e1f98(80000000314000e7).

    The "bad pmd ..." message is logged by pmd_clear_bad() before it clears
    the page's PMD table entry.

        143 void pmd_clear_bad(pmd_t *pmd)
        144 {
    ->  145         pmd_ERROR(*pmd);
        146         pmd_clear(pmd);
        147 }

    After the PMD table entry has been cleared, there is an inconsistency
    between the actual number of PMD table entries that are mapping the page
    and the page's map count (_mapcount field in struct page). When the page
    is subsequently reclaimed, __split_huge_page() detects this inconsistency.

       1381         if (mapcount != page_mapcount(page))
       1382                 printk(KERN_ERR "mapcount %d page_mapcount %d\n",
       1383                        mapcount, page_mapcount(page));
    -> 1384         BUG_ON(mapcount != page_mapcount(page));

    The root cause of the problem is a race of two threads in a multithreaded
    process. Thread B incurs a page fault on a virtual address that has never
    been accessed (PMD entry is zero) while Thread A is executing an madvise()
    system call on a virtual address within the same 2 MB (huge page) range.

               virtual address space
              .---------------------.
              |                     |
              |                     |
            .-|---------------------|
            | |                     |
            | |                     |<-- B(fault)
            | |                     |
      2 MB  | |/////////////////////|-.
      huge <  |/////////////////////|  > A(range)
      page  | |/////////////////////|-'
            | |                     |
            | |                     |
            '-|---------------------|
              |                     |
              |                     |
              '---------------------'

    - Thread A is executing an madvise(..., MADV_DONTNEED) system call
      on the virtual address range "A(range)" shown in the picture.

    sys_madvise
      // Acquire the semaphore in shared mode.
      down_read(&current->mm->mmap_sem)
      ...
      madvise_vma
        switch (behavior)
        case MADV_DONTNEED:
             madvise_dontneed
               zap_page_range
                 unmap_vmas
                   unmap_page_range
                     zap_pud_range
                       zap_pmd_range
                         //
                         // Assume that this huge page has never been accessed.
                         // I.e. content of the PMD entry is zero (not mapped).
                         //
                         if (pmd_trans_huge(*pmd)) {
                             // We don't get here due to the above assumption.
                         }
                         //
                         // Assume that Thread B incurred a page fault and
             .---------> // sneaks in here as shown below.
             |           //
             |           if (pmd_none_or_clear_bad(pmd))
             |               {
             |                 if (unlikely(pmd_bad(*pmd)))
             |                     pmd_clear_bad
             |                     {
             |                       pmd_ERROR
             |                         // Log "bad pmd ..." message here.
             |                       pmd_clear
             |                         // Clear the page's PMD entry.
             |                         // Thread B incremented the map count
             |                         // in page_add_new_anon_rmap(), but
             |                         // now the page is no longer mapped
             |                         // by a PMD entry (-> inconsistency).
             |                     }
             |               }
             |
             v
    - Thread B is handling a page fault on virtual address "B(fault)" shown
      in the picture.

    ...
    do_page_fault
      __do_page_fault
        // Acquire the semaphore in shared mode.
        down_read_trylock(&mm->mmap_sem)
        ...
        handle_mm_fault
          if (pmd_none(*pmd) && transparent_hugepage_enabled(vma))
              // We get here due to the above assumption (PMD entry is zero).
              do_huge_pmd_anonymous_page
                alloc_hugepage_vma
                  // Allocate a new transparent huge page here.
                ...
                __do_huge_pmd_anonymous_page
                  ...
                  spin_lock(&mm->page_table_lock)
                  ...
                  page_add_new_anon_rmap
                    // Here we increment the page's map count (starts at -1).
                    atomic_set(&page->_mapcount, 0)
                  set_pmd_at
                    // Here we set the page's PMD entry which will be cleared
                    // when Thread A calls pmd_clear_bad().
                  ...
                  spin_unlock(&mm->page_table_lock)

    The mmap_sem does not prevent the race because both threads are acquiring
    it in shared mode (down_read).  Thread B holds the page_table_lock while
    the page's map count and PMD table entry are updated.  However, Thread A
    does not synchronize on that lock.

====== end quote =======

[akpm@linux-foundation.org: checkpatch fixes]
Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Jones <davej@redhat.com>
Acked-by: Larry Woodman <lwoodman@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Mark Salter <msalter@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/vm86_32.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f8753ab0a..04b87269edfb 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
 	spinlock_t *ptl;
 	int i;
 
+	down_write(&mm->mmap_sem);
 	pgd = pgd_offset(mm, 0xA0000);
 	if (pgd_none_or_clear_bad(pgd))
 		goto out;
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
 	}
 	pte_unmap_unlock(pte, ptl);
 out:
+	up_write(&mm->mmap_sem);
 	flush_tlb();
 }
 
-- 
cgit v1.2.3


From 0f06e7442a9d2035742189d05c3cdfd2b9dc47fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20B=C3=A4rwolf?= <stephan.baerwolf@tu-ilmenau.de>
Date: Thu, 12 Jan 2012 16:43:03 +0100
Subject: KVM: x86: extend "struct x86_emulate_ops" with "get_cpuid"

commit bdb42f5afebe208eae90406959383856ae2caf2b upstream.

In order to be able to proceed checks on CPU-specific properties
within the emulator, function "get_cpuid" is introduced.
With "get_cpuid" it is possible to virtually call the guests
"cpuid"-opcode without changing the VM's context.

[mtosatti: cleanup/beautify code]

Signed-off-by: Stephan Baerwolf <stephan.baerwolf@tu-ilmenau.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_emulate.h |  3 +++
 arch/x86/kvm/x86.c                 | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0049211959c0..18e54f1ec4ed 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -189,6 +189,9 @@ struct x86_emulate_ops {
 	int (*intercept)(struct x86_emulate_ctxt *ctxt,
 			 struct x86_instruction_info *info,
 			 enum x86_intercept_stage stage);
+
+	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
+			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
 };
 
 typedef u32 __attribute__((vector_size(16))) sse128_t;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77c9d8673dc4..fbb093601b5a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4407,6 +4407,28 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
 	return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
 }
 
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+			       u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	struct kvm_cpuid_entry2 *cpuid = NULL;
+
+	if (eax && ecx)
+		cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt),
+					    *eax, *ecx);
+
+	if (cpuid) {
+		*eax = cpuid->eax;
+		*ecx = cpuid->ecx;
+		if (ebx)
+			*ebx = cpuid->ebx;
+		if (edx)
+			*edx = cpuid->edx;
+		return true;
+	}
+
+	return false;
+}
+
 static struct x86_emulate_ops emulate_ops = {
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
@@ -4437,6 +4459,7 @@ static struct x86_emulate_ops emulate_ops = {
 	.get_fpu             = emulator_get_fpu,
 	.put_fpu             = emulator_put_fpu,
 	.intercept           = emulator_intercept,
+	.get_cpuid           = emulator_get_cpuid,
 };
 
 static void cache_all_regs(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 35447aeacbe88ebbb30913b41f8e81f3135c79d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20B=C3=A4rwolf?= <stephan.baerwolf@tu-ilmenau.de>
Date: Thu, 12 Jan 2012 16:43:04 +0100
Subject: KVM: x86: fix missing checks in syscall emulation

commit c2226fc9e87ba3da060e47333657cd6616652b84 upstream.

On hosts without this patch, 32bit guests will crash (and 64bit guests
may behave in a wrong way) for example by simply executing following
nasm-demo-application:

    [bits 32]
    global _start
    SECTION .text
    _start: syscall

(I tested it with winxp and linux - both always crashed)

    Disassembly of section .text:

    00000000 <_start>:
       0:   0f 05                   syscall

The reason seems a missing "invalid opcode"-trap (int6) for the
syscall opcode "0f05", which is not available on Intel CPUs
within non-longmodes, as also on some AMD CPUs within legacy-mode.
(depending on CPU vendor, MSR_EFER and cpuid)

Because previous mentioned OSs may not engage corresponding
syscall target-registers (STAR, LSTAR, CSTAR), they remain
NULL and (non trapping) syscalls are leading to multiple
faults and finally crashs.

Depending on the architecture (AMD or Intel) pretended by
guests, various checks according to vendor's documentation
are implemented to overcome the current issue and behave
like the CPUs physical counterparts.

[mtosatti: cleanup/beautify code]

Signed-off-by: Stephan Baerwolf <stephan.baerwolf@tu-ilmenau.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_emulate.h | 13 ++++++++++
 arch/x86/kvm/emulate.c             | 51 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 18e54f1ec4ed..0ab6a4dcb911 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -301,6 +301,19 @@ struct x86_emulate_ctxt {
 #define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
 			       X86EMUL_MODE_PROT64)
 
+/* CPUID vendors */
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
+
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
+
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
+
 enum x86_intercept_stage {
 	X86_ICTP_NONE = 0,   /* Allow zero-init to not match anything */
 	X86_ICPT_PRE_EXCEPT,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index adc98675cda0..3e7d9138dd2e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1901,6 +1901,51 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 	ss->p = 1;
 }
 
+static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
+{
+	struct x86_emulate_ops *ops = ctxt->ops;
+	u32 eax, ebx, ecx, edx;
+
+	/*
+	 * syscall should always be enabled in longmode - so only become
+	 * vendor specific (cpuid) if other modes are active...
+	 */
+	if (ctxt->mode == X86EMUL_MODE_PROT64)
+		return true;
+
+	eax = 0x00000000;
+	ecx = 0x00000000;
+	if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) {
+		/*
+		 * Intel ("GenuineIntel")
+		 * remark: Intel CPUs only support "syscall" in 64bit
+		 * longmode. Also an 64bit guest with a
+		 * 32bit compat-app running will #UD !! While this
+		 * behaviour can be fixed (by emulating) into AMD
+		 * response - CPUs of AMD can't behave like Intel.
+		 */
+		if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
+		    ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
+		    edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
+			return false;
+
+		/* AMD ("AuthenticAMD") */
+		if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
+		    ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
+		    edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+			return true;
+
+		/* AMD ("AMDisbetter!") */
+		if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
+		    ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
+		    edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
+			return true;
+	}
+
+	/* default: (not Intel, not AMD), apply Intel's stricter rules... */
+	return false;
+}
+
 static int
 emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
@@ -1915,9 +1960,15 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	    ctxt->mode == X86EMUL_MODE_VM86)
 		return emulate_ud(ctxt);
 
+	if (!(em_syscall_is_enabled(ctxt)))
+		return emulate_ud(ctxt);
+
 	ops->get_msr(ctxt, MSR_EFER, &efer);
 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
 
+	if (!(efer & EFER_SCE))
+		return emulate_ud(ctxt);
+
 	ops->get_msr(ctxt, MSR_STAR, &msr_data);
 	msr_data >>= 32;
 	cs_sel = (u16)(msr_data & 0xfffc);
-- 
cgit v1.2.3


From 6986f38042842b6c43dd10c36311db66c2865367 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Thu, 22 Mar 2012 21:39:25 +0100
Subject: x86-32: Fix endless loop when processing signals for kernel tasks

commit 29a2e2836ff9ea65a603c89df217f4198973a74f upstream.

The problem occurs on !CONFIG_VM86 kernels [1] when a kernel-mode task
returns from a system call with a pending signal.

A real-life scenario is a child of 'khelper' returning from a failed
kernel_execve() in ____call_usermodehelper() [ kernel/kmod.c ].
kernel_execve() fails due to a pending SIGKILL, which is the result of
"kill -9 -1" (at least, busybox's init does it upon reboot).

The loop is as follows:

* syscall_exit_work:
 - work_pending:            // start_of_the_loop
 - work_notify_sig:
   - do_notify_resume()
     - do_signal()
       - if (!user_mode(regs)) return;
 - resume_userspace         // TIF_SIGPENDING is still set
 - work_pending             // so we call work_pending => goto
                            // start_of_the_loop

More information can be found in another LKML thread:
http://www.serverphorums.com/read.php?12,457826

[1] the problem was also seen on MIPS.

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Link: http://lkml.kernel.org/r/1332448765.2299.68.camel@dimm
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@hack.frob.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/entry_32.S | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a91974918..edb3d46c16db 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -98,12 +98,6 @@
 #endif
 .endm
 
-#ifdef CONFIG_VM86
-#define resume_userspace_sig	check_userspace
-#else
-#define resume_userspace_sig	resume_userspace
-#endif
-
 /*
  * User gs save/restore
  *
@@ -327,10 +321,19 @@ ret_from_exception:
 	preempt_stop(CLBR_ANY)
 ret_from_intr:
 	GET_THREAD_INFO(%ebp)
-check_userspace:
+resume_userspace_sig:
+#ifdef CONFIG_VM86
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
 	movb PT_CS(%esp), %al
 	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+#else
+	/*
+	 * We can be coming here from a syscall done in the kernel space,
+	 * e.g. a failed kernel_execve().
+	 */
+	movl PT_CS(%esp), %eax
+	andl $SEGMENT_RPL_MASK, %eax
+#endif
 	cmpl $USER_RPL, %eax
 	jb resume_kernel		# not returning to v8086 or userspace
 
-- 
cgit v1.2.3


From 02241f8a5f7c225b4feea6572bbfcc3cf0bae6be Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 18 Mar 2012 02:40:48 +0000
Subject: net: bpf_jit: fix BPF_S_LDX_B_MSH compilation

[ Upstream commit dc72d99dabb870ca5bd6d9fff674be853bb4a88d ]

Matt Evans spotted that x86 bpf_jit was incorrectly handling negative
constant offsets in BPF_S_LDX_B_MSH instruction.

We need to abort JIT compilation like we do in common_load so that
filter uses the interpreter code and can call __load_pointer()

Reference: http://lists.openwall.net/netdev/2011/07/19/11

Thanks to Indan Zupancic to bring back this issue.

Reported-by: Matt Evans <matt@ozlabs.org>
Reported-by: Indan Zupancic <indan@nul.nu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/net/bpf_jit_comp.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7c1b765ecc59..5671752f8d9c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -475,8 +475,10 @@ void bpf_jit_compile(struct sk_filter *fp)
 			case BPF_S_LD_W_ABS:
 				func = sk_load_word;
 common_load:			seen |= SEEN_DATAREF;
-				if ((int)K < 0)
+				if ((int)K < 0) {
+					/* Abort the JIT because __load_pointer() is needed. */
 					goto out;
+				}
 				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call */
@@ -489,14 +491,8 @@ common_load:			seen |= SEEN_DATAREF;
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
 				if ((int)K < 0) {
-					if (pc_ret0 > 0) {
-						/* addrs[pc_ret0 - 1] is the start address */
-						EMIT_JMP(addrs[pc_ret0 - 1] - addrs[i]);
-						break;
-					}
-					CLEAR_A();
-					EMIT_JMP(cleanup_addr - addrs[i]);
-					break;
+					/* Abort the JIT because __load_pointer() is needed. */
+					goto out;
 				}
 				seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = sk_load_byte_msh - (image + addrs[i]);
-- 
cgit v1.2.3


From 585bb618b8fb7ebbc1dc0855d94f936fd1ee2d42 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Tue, 21 Feb 2012 18:19:55 -0800
Subject: x86, tsc: Skip refined tsc calibration on systems with reliable TSC

commit 57779dc2b3b75bee05ef5d1ada47f615f7a13932 upstream.

While running the latest Linux as guest under VMware in highly
over-committed situations, we have seen cases when the refined TSC
algorithm fails to get a valid tsc_start value in
tsc_refine_calibration_work from multiple attempts. As a result the
kernel keeps on scheduling the tsc_irqwork task for later. Subsequently
after several attempts when it gets a valid start value it goes through
the refined calibration and either bails out or uses the new results.
Given that the kernel originally read the TSC frequency from the
platform, which is the best it can get, I don't think there is much
value in refining it.

So  for systems which get the TSC frequency from the platform we
should skip the refined tsc algorithm.

We can use the TSC_RELIABLE cpu cap flag to detect this, right now it is
set only on VMware and for Moorestown Penwell both of which have there
own TSC calibration methods.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Dirk Brandewie <dirk.brandewie@gmail.com>
Cc: Alan Cox <alan@linux.intel.com>
[jstultz: Reworked to simply not schedule the refining work,
rather then scheduling the work and bombing out later]
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/tsc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6cc6922262af..63fed529edc9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -956,6 +956,16 @@ static int __init init_tsc_clocksource(void)
 		clocksource_tsc.rating = 0;
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
 	}
+
+	/*
+	 * Trust the results of the earlier calibration on systems
+	 * exporting a reliable TSC.
+	 */
+	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+		clocksource_register_khz(&clocksource_tsc, tsc_khz);
+		return 0;
+	}
+
 	schedule_delayed_work(&tsc_irqwork, 0);
 	return 0;
 }
-- 
cgit v1.2.3


From 57ad23326e53fec741be83138643b768e7d2e194 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 24 Mar 2012 10:52:50 +0300
Subject: x86, tls: Off by one limit check

commit 8f0750f19789cf352d7e24a6cc50f2ab1b4f1372 upstream.

These are used as offsets into an array of GDT_ENTRY_TLS_ENTRIES members
so GDT_ENTRY_TLS_ENTRIES is one past the end of the array.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: http://lkml.kernel.org/r/20120324075250.GA28258@elgon.mountain
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/tls.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 6bb7b8579e70..bcfec2d23769 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -163,7 +163,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
 {
 	const struct desc_struct *tls;
 
-	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+	if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
 	    (pos % sizeof(struct user_desc)) != 0 ||
 	    (count % sizeof(struct user_desc)) != 0)
 		return -EINVAL;
@@ -198,7 +198,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
 	const struct user_desc *info;
 
-	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+	if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
 	    (pos % sizeof(struct user_desc)) != 0 ||
 	    (count % sizeof(struct user_desc)) != 0)
 		return -EINVAL;
-- 
cgit v1.2.3


From 62a48bcddfa2b4cabef5b58564cb7d843658fe19 Mon Sep 17 00:00:00 2001
From: "zhuangfeiran@ict.ac.cn" <zhuangfeiran@ict.ac.cn>
Date: Wed, 28 Mar 2012 23:27:00 +0000
Subject: x86 bpf_jit: fix a bug in emitting the 16-bit immediate operand of
 AND

[ Upstream commit 1d24fb3684f347226747c6b11ea426b7b992694e ]

When K >= 0xFFFF0000, AND needs the two least significant bytes of K as
its operand, but EMIT2() gives it the least significant byte of K and
0x2. EMIT() should be used here to replace EMIT2().

Signed-off-by: Feiran Zhuang  <zhuangfeiran@ict.ac.cn>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/net/bpf_jit_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752f8d9c..5a5b6e4dd738 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -289,7 +289,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
 				} else if (K >= 0xFFFF0000) {
 					EMIT2(0x66, 0x25);	/* and imm16,%ax */
-					EMIT2(K, 2);
+					EMIT(K, 2);
 				} else {
 					EMIT1_off32(0x25, K);	/* and imm32,%eax */
 				}
-- 
cgit v1.2.3


From 7c11d1dd6e0c348d7ff0915842720acf373aa24c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 18 Mar 2012 13:21:38 +0100
Subject: m68k/mac: Add missing platform check before registering platform
 devices

commit 6cfeba53911d6d2f17ebbd1246893557d5ff5aeb upstream.

On multi-platform kernels, the Mac platform devices should be registered
when running on Mac only. Else it may crash later.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/mac/config.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index c247de02bc7e..1918d76aa06b 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -950,6 +950,9 @@ int __init mac_platform_init(void)
 {
 	u8 *swim_base;
 
+	if (!MACH_IS_MAC)
+		return -ENODEV;
+
 	/*
 	 * Serial devices
 	 */
-- 
cgit v1.2.3


From e5acefebad4863015271b91f865c62def9bfed82 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 23 Mar 2012 09:35:05 -0500
Subject: x86,kgdb: Fix DEBUG_RODATA limitation using text_poke()

commit 3751d3e85cf693e10e2c47c03c8caa65e171099b upstream.

There has long been a limitation using software breakpoints with a
kernel compiled with CONFIG_DEBUG_RODATA going back to 2.6.26. For
this particular patch, it will apply cleanly and has been tested all
the way back to 2.6.36.

The kprobes code uses the text_poke() function which accommodates
writing a breakpoint into a read-only page.  The x86 kgdb code can
solve the problem similarly by overriding the default breakpoint
set/remove routines and using text_poke() directly.

The x86 kgdb code will first attempt to use the traditional
probe_kernel_write(), and next try using a the text_poke() function.
The break point install method is tracked such that the correct break
point removal routine will get called later on.

Cc: x86@kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Inspried-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/kgdb.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5f9ecff328b5..fc1f48dc9989 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,8 @@
 #include <linux/smp.h>
 #include <linux/nmi.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <linux/memory.h>
 
 #include <asm/debugreg.h>
 #include <asm/apicdef.h>
@@ -710,6 +712,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 	regs->ip = ip;
 }
 
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+	int err;
+	char opc[BREAK_INSTR_SIZE];
+
+	bpt->type = BP_BREAKPOINT;
+	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+				BREAK_INSTR_SIZE);
+	if (err)
+		return err;
+	err = probe_kernel_write((char *)bpt->bpt_addr,
+				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+#ifdef CONFIG_DEBUG_RODATA
+	if (!err)
+		return err;
+	/*
+	 * It is safe to call text_poke() because normal kernel execution
+	 * is stopped on all cores, so long as the text_mutex is not locked.
+	 */
+	if (mutex_is_locked(&text_mutex))
+		return -EBUSY;
+	text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+		  BREAK_INSTR_SIZE);
+	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+	if (err)
+		return err;
+	if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
+		return -EINVAL;
+	bpt->type = BP_POKE_BREAKPOINT;
+#endif /* CONFIG_DEBUG_RODATA */
+	return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+#ifdef CONFIG_DEBUG_RODATA
+	int err;
+	char opc[BREAK_INSTR_SIZE];
+
+	if (bpt->type != BP_POKE_BREAKPOINT)
+		goto knl_write;
+	/*
+	 * It is safe to call text_poke() because normal kernel execution
+	 * is stopped on all cores, so long as the text_mutex is not locked.
+	 */
+	if (mutex_is_locked(&text_mutex))
+		goto knl_write;
+	text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
+	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
+		goto knl_write;
+	return err;
+knl_write:
+#endif /* CONFIG_DEBUG_RODATA */
+	return probe_kernel_write((char *)bpt->bpt_addr,
+				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+}
+
 struct kgdb_arch arch_kgdb_ops = {
 	/* Breakpoint instruction: */
 	.gdb_bpt_instr		= { 0xcc },
-- 
cgit v1.2.3


From 06bcd9526cab45751fe74466064d41211b3b45d6 Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Tue, 28 Feb 2012 11:51:10 -0700
Subject: x86/PCI: use host bridge _CRS info on MSI MS-7253

commit 8411371709610c826bf65684f886bfdfb5780ca1 upstream.

In the spirit of commit 29cf7a30f8a0 ("x86/PCI: use host bridge _CRS
info on ASUS M2V-MX SE"), this DMI quirk turns on "pci_use_crs" by
default on a board that needs it.

This fixes boot failures and oopses introduced in 3e3da00c01d0
("x86/pci: AMD one chain system to use pci read out res").  The quirk
is quite targetted (to a specific board and BIOS version) for two
reasons:

 (1) to emphasize that this method of tackling the problem one quirk
     at a time is a little insane

 (2) to give BIOS vendors an opportunity to use simpler tables and
     allow us to return to generic behavior (whatever that happens to
     be) with a later BIOS update

In other words, I am not at all happy with having quirks like this.
But it is even worse for the kernel not to work out of the box on
these machines, so...

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=42619
Reported-by: Svante Signell <svante.signell@telia.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/pci/acpi.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 53f9e684c819..76438801fffb 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -54,6 +54,17 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
 			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
 		},
 	},
+	/* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
+	{
+		.callback = set_use_crs,
+		.ident = "MSI MS-7253",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
+			DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "V1.6"),
+		},
+	},
 	{}
 };
 
-- 
cgit v1.2.3


From 37e008cfe9c8790d24d35b00f659a754209ab9a7 Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Tue, 28 Feb 2012 15:31:35 -0600
Subject: x86/PCI: do not tie MSI MS-7253 use_crs quirk to BIOS version

commit a97f4f5e524bcd09a85ef0b8821a14d35e69335f upstream.

Carlos was getting

	WARNING: at drivers/pci/pci.c:118 pci_ioremap_bar+0x24/0x52()

when probing his sound card, and sound did not work.  After adding
pci=use_crs to the kernel command line, no more trouble.

Ok, we can add a quirk.  dmidecode output reveals that this is an MSI
MS-7253, for which we already have a quirk, but the short-sighted
author tied the quirk to a single BIOS version, making it not kick in
on Carlos's machine with BIOS V1.2.  If a later BIOS update makes it
no longer necessary to look at the _CRS info it will still be
harmless, so let's stop trying to guess which versions have and don't
have accurate _CRS tables.

Addresses https://bugtrack.alsa-project.org/alsa-bug/view.php?id=5533
Also see <https://bugzilla.kernel.org/show_bug.cgi?id=42619>.

Reported-by: Carlos Luna <caralu74@gmail.com>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/pci/acpi.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 76438801fffb..0473a8f93501 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -62,7 +62,6 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
 			DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
-			DMI_MATCH(DMI_BIOS_VERSION, "V1.6"),
 		},
 	},
 	{}
-- 
cgit v1.2.3


From fb83b374f23848545068c9258500b92a36d18fe0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 10 Apr 2012 16:04:49 -0700
Subject: Revert "x86/ioapic: Add register level checks to detect bogus io-apic
 entries"

This reverts commit c2ec63edaf48c90c3495eeb0b75bb05102fbf71a
[73d63d038ee9f769f5e5b46792d227fe20e442c5 upstream]

It causes problems, so needs to be reverted from 3.2-stable for now.

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Jon Dufresne <jon@jondufresne.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: <yinghai@kernel.org>
Cc: Josh Boyer <jwboyer@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Teck Choon Giam <giamteckchoon@gmail.com>
Cc: Ben Guthro <ben@guthro.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/io_apic.c | 40 ++++++++--------------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 16e9d94bfdd3..e5293394b548 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3927,36 +3927,18 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
 static __init int bad_ioapic(unsigned long address)
 {
 	if (nr_ioapics >= MAX_IO_APICS) {
-		pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
-			MAX_IO_APICS, nr_ioapics);
+		printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
+		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
 		return 1;
 	}
 	if (!address) {
-		pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
+		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+		       " found in table, skipping!\n");
 		return 1;
 	}
 	return 0;
 }
 
-static __init int bad_ioapic_register(int idx)
-{
-	union IO_APIC_reg_00 reg_00;
-	union IO_APIC_reg_01 reg_01;
-	union IO_APIC_reg_02 reg_02;
-
-	reg_00.raw = io_apic_read(idx, 0);
-	reg_01.raw = io_apic_read(idx, 1);
-	reg_02.raw = io_apic_read(idx, 2);
-
-	if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
-		pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
-			mpc_ioapic_addr(idx));
-		return 1;
-	}
-
-	return 0;
-}
-
 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 {
 	int idx = 0;
@@ -3973,12 +3955,6 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	ioapics[idx].mp_config.apicaddr = address;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-
-	if (bad_ioapic_register(idx)) {
-		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
-		return;
-	}
-
 	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
 
@@ -3999,10 +3975,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
 
-	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
-		idx, mpc_ioapic_id(idx),
-		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
-		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
+	       mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+	       gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
 	nr_ioapics++;
 }
-- 
cgit v1.2.3


From ee9c2e08d37dae695a4f4cfb4dc003c5c2576f79 Mon Sep 17 00:00:00 2001
From: Salman Qazi <sqazi@google.com>
Date: Fri, 9 Mar 2012 16:41:01 -0800
Subject: sched/x86: Fix overflow in cyc2ns_offset

commit 9993bc635d01a6ee7f6b833b4ee65ce7c06350b1 upstream.

When a machine boots up, the TSC generally gets reset.  However,
when kexec is used to boot into a kernel, the TSC value would be
carried over from the previous kernel.  The computation of
cycns_offset in set_cyc2ns_scale is prone to an overflow, if the
machine has been up more than 208 days prior to the kexec.  The
overflow happens when we multiply *scale, even though there is
enough room to store the final answer.

We fix this issue by decomposing tsc_now into the quotient and
remainder of division by CYC2NS_SCALE_FACTOR and then performing
the multiplication separately on the two components.

Refactor code to share the calculation with the previous
fix in __cycles_2_ns().

Signed-off-by: Salman Qazi <sqazi@google.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: john stultz <johnstul@us.ibm.com>
Link: http://lkml.kernel.org/r/20120310004027.19291.88460.stgit@dungbeetle.mtv.corp.google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/timer.h | 8 ++------
 arch/x86/kernel/tsc.c        | 3 ++-
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 431793e5d484..34baa0eb5d0c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
 
 static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-	unsigned long long quot;
-	unsigned long long rem;
 	int cpu = smp_processor_id();
 	unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
-	quot = (cyc >> CYC2NS_SCALE_FACTOR);
-	rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
-	ns += quot * per_cpu(cyc2ns, cpu) +
-		((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
+	ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+			(1UL << CYC2NS_SCALE_FACTOR));
 	return ns;
 }
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 63fed529edc9..4406c038a0a8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 
 	if (cpu_khz) {
 		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
-		*offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+		*offset = ns_now - mult_frac(tsc_now, *scale,
+					     (1UL << CYC2NS_SCALE_FACTOR));
 	}
 
 	sched_clock_idle_wakeup_event(0);
-- 
cgit v1.2.3


From 1a92416a1d37fd96427bd8d8a5fb52b54429c26f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 11 Apr 2012 18:45:52 +0200
Subject: iommu/amd: Make sure IOMMU interrupts are re-enabled on resume

commit 9ddd592a191b32f2ee6c4b6ed2bd52665c3a49f5 upstream.

Unfortunatly the interrupts for the event log and the
peripheral page-faults are only enabled at boot but not
re-enabled at resume. Fix that.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
[bwh: Backport to 3.0:
 - Drop change to PPR log which was added in 3.3
  - Source is under arch/x86/kernel]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/amd_iommu_init.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index bfc8453bd98d..33df6e82f653 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1031,8 +1031,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 {
 	int r;
 
-	if (pci_enable_msi(iommu->dev))
-		return 1;
+	r = pci_enable_msi(iommu->dev);
+	if (r)
+		return r;
 
 	r = request_threaded_irq(iommu->dev->irq,
 				 amd_iommu_int_handler,
@@ -1042,24 +1043,33 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
-		return 1;
+		return r;
 	}
 
 	iommu->int_enabled = true;
-	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 
 	return 0;
 }
 
 static int iommu_init_msi(struct amd_iommu *iommu)
 {
+	int ret;
+
 	if (iommu->int_enabled)
-		return 0;
+		goto enable_faults;
 
 	if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
-		return iommu_setup_msi(iommu);
+		ret = iommu_setup_msi(iommu);
+	else
+		ret = -ENODEV;
 
-	return 1;
+	if (ret)
+		return ret;
+
+enable_faults:
+	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
+	return 0;
 }
 
 /****************************************************************************
-- 
cgit v1.2.3


From c30d6d63c156d6842a6f895493766077773bbf87 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <Jonathan.Austin@arm.com>
Date: Thu, 12 Apr 2012 17:45:25 +0100
Subject: ARM: 7384/1: ThumbEE: Disable userspace TEEHBR access for
 !CONFIG_ARM_THUMBEE

commit 078c04545ba56da21567728a909a496df5ff730d upstream.

Currently when ThumbEE is not enabled (!CONFIG_ARM_THUMBEE) the ThumbEE
register states are not saved/restored at context switch. The default state
of the ThumbEE Ctrl register (TEECR) allows userspace accesses to the
ThumbEE Base Handler register (TEEHBR). This can cause unexpected behaviour
when people use ThumbEE on !CONFIG_ARM_THUMBEE kernels, as well as allowing
covert communication - eg between userspace tasks running inside chroot
jails.

This patch sets up TEECR in order to prevent user-space access to TEEHBR
when !CONFIG_ARM_THUMBEE. In this case, tasks are sent SIGILL if they try to
access TEEHBR.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/proc-v7.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7e47888101a1..21cd29834076 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -371,6 +371,18 @@ __v7_setup:
 	ldr	r6, =NMRR			@ NMRR
 	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
+#endif
+#ifndef CONFIG_ARM_THUMBEE
+	mrc	p15, 0, r0, c0, c1, 0		@ read ID_PFR0 for ThumbEE
+	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
+	teq	r0, #(1 << 12)			@ check if ThumbEE is present
+	bne	1f
+	mov	r5, #0
+	mcr	p14, 6, r5, c1, c0, 0		@ Initialize TEEHBR to 0
+	mrc	p14, 6, r0, c0, c0, 0		@ load TEECR
+	orr	r0, r0, #1			@ set the 1st bit in order to
+	mcr	p14, 6, r0, c0, c0, 0		@ stop userspace TEEHBR access
+1:
 #endif
 	adr	r5, v7_crval
 	ldmia	r5, {r5, r6}
-- 
cgit v1.2.3


From 9a8bf5fd4a27ee78730355f732f5bd8eaa5349f9 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Mon, 16 Apr 2012 16:28:01 -0700
Subject: ia64: fix futex_atomic_cmpxchg_inatomic()

commit c76f39bddb84f93f70a5520d9253ec0317bec216 upstream.

Michel Lespinasse cleaned up the futex calling conventions in commit
37a9d912b24f ("futex: Sanitize cmpxchg_futex_value_locked API").

But the ia64 implementation was subtly broken.  Gcc does not know that
register "r8" will be updated by the fault handler if the cmpxchg
instruction takes an exception.  So it feels safe in letting the
initialization of r8 slide to after the cmpxchg.  Result: we always
return 0 whether the user address faulted or not.

Fix by moving the initialization of r8 into the __asm__ code so gcc
won't move it.

Reported-by: <emeric.maschino@gmail.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=42757
Tested-by: <emeric.maschino@gmail.com>
Acked-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/ia64/include/asm/futex.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index 8428525ddb22..21ab376465d5 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -107,15 +107,16 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	{
-		register unsigned long r8 __asm ("r8") = 0;
+		register unsigned long r8 __asm ("r8");
 		unsigned long prev;
 		__asm__ __volatile__(
 			"	mf;;					\n"
-			"	mov ar.ccv=%3;;				\n"
-			"[1:]	cmpxchg4.acq %0=[%1],%2,ar.ccv		\n"
+			"	mov %0=r0				\n"
+			"	mov ar.ccv=%4;;				\n"
+			"[1:]	cmpxchg4.acq %1=[%2],%3,ar.ccv		\n"
 			"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"
 			"[2:]"
-			: "=r" (prev)
+			: "=r" (r8), "=r" (prev)
 			: "r" (uaddr), "r" (newval),
 			  "rO" ((long) (unsigned) oldval)
 			: "memory");
-- 
cgit v1.2.3


From 81cc5e7c4ed5bf9172b26bb3a7d019362dd3d204 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 13 Apr 2012 03:35:13 +0000
Subject: sparc64: Eliminate obsolete __handle_softirq() function

commit 3d3eeb2ef26112a200785e5fca58ec58dd33bf1e upstream.

The invocation of softirq is now handled by irq_exit(), so there is no
need for sparc64 to invoke it on the trap-return path.  In fact, doing so
is a bug because if the trap occurred in the idle loop, this invocation
can result in lockdep-RCU failures.  The problem is that RCU ignores idle
CPUs, and the sparc64 trap-return path to the softirq handlers fails to
tell RCU that the CPU must be considered non-idle while those handlers
are executing.  This means that RCU is ignoring any RCU read-side critical
sections in those handlers, which in turn means that RCU-protected data
can be yanked out from under those read-side critical sections.

The shiny new lockdep-RCU ability to detect RCU read-side critical sections
that RCU is ignoring located this problem.

The fix is straightforward: Make sparc64 stop manually invoking the
softirq handlers.

Reported-by: Meelis Roos <mroos@linux.ee>
Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/rtrap_64.S | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 77f1b95e0806..9171fc238def 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -20,11 +20,6 @@
 
 		.text
 		.align			32
-__handle_softirq:
-		call			do_softirq
-		 nop
-		ba,a,pt			%xcc, __handle_softirq_continue
-		 nop
 __handle_preemption:
 		call			schedule
 		 wrpr			%g0, RTRAP_PSTATE, %pstate
@@ -89,9 +84,7 @@ rtrap:
 		cmp			%l1, 0
 
 		/* mm/ultra.S:xcall_report_regs KNOWS about this load. */
-		bne,pn			%icc, __handle_softirq
 		 ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-__handle_softirq_continue:
 rtrap_xcall:
 		sethi			%hi(0xf << 20), %l4
 		and			%l1, %l4, %l4
-- 
cgit v1.2.3


From 16f61cb9110047912a6feb40676b10af1a973a1f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 13 Apr 2012 11:56:22 -0700
Subject: sparc64: Fix bootup crash on sun4v.

commit 9e0daff30fd7ecf698e5d20b0fa7f851e427cca5 upstream.

The DS driver registers as a subsys_initcall() but this can be too
early, in particular this risks registering before we've had a chance
to allocate and setup module_kset in kernel/params.c which is
performed also as a subsyts_initcall().

Register DS using device_initcall() insteal.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/ds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 7429b47c3aca..dcae702fc1f3 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -1269,4 +1269,4 @@ static int __init ds_init(void)
 	return vio_register_driver(&ds_driver);
 }
 
-subsys_initcall(ds_init);
+fs_initcall(ds_init);
-- 
cgit v1.2.3


From a741ac8bc6f3940013cfd204a8457caa1fcc202f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 11 Apr 2012 14:28:07 +0200
Subject: S390: fix tlb flushing for page table pages

commit cd94154cc6a28dd9dc271042c1a59c08d26da886 upstream.

Git commit 36409f6353fc2d7b6516e631415f938eadd92ffa "use generic RCU
page-table freeing code" introduced a tlb flushing bug. Partially revert
the above git commit and go back to s390 specific page table flush code.

For s390 the TLB can contain three types of entries, "normal" TLB
page-table entries, TLB combined region-and-segment-table (CRST) entries
and real-space entries. Linux does not use real-space entries which
leaves normal TLB entries and CRST entries. The CRST entries are
intermediate steps in the page-table translation called translation paths.
For example a 4K page access in a three-level page table setup will
create two CRST TLB entries and one page-table TLB entry. The advantage
of that approach is that a page access next to the previous one can reuse
the CRST entries and needs just a single read from memory to create the
page-table TLB entry. The disadvantage is that the TLB flushing rules are
more complicated, before any page-table may be freed the TLB needs to be
flushed.

In short: the generic RCU page-table freeing code is incorrect for the
CRST entries, in particular the check for mm_users < 2 is troublesome.

This is applicable to 3.0+ kernels.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/Kconfig               |  1 -
 arch/s390/include/asm/pgalloc.h |  3 --
 arch/s390/include/asm/tlb.h     | 22 +-------------
 arch/s390/mm/pgtable.c          | 63 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 61 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9b922b12e9f0..c395f713ce31 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -89,7 +89,6 @@ config S390
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
-	select HAVE_RCU_TABLE_FREE if SMP
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 38e71ebcd3c2..e4b6609fe92a 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,10 +22,7 @@ void crst_table_free(struct mm_struct *, unsigned long *);
 
 unsigned long *page_table_alloc(struct mm_struct *);
 void page_table_free(struct mm_struct *, unsigned long *);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 void page_table_free_rcu(struct mmu_gather *, unsigned long *);
-void __tlb_remove_table(void *_table);
-#endif
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index c687a2c83462..775a5eea8f9e 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -30,14 +30,10 @@
 
 struct mmu_gather {
 	struct mm_struct *mm;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	struct mmu_table_batch *batch;
-#endif
 	unsigned int fullmm;
-	unsigned int need_flush;
 };
 
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 struct mmu_table_batch {
 	struct rcu_head		rcu;
 	unsigned int		nr;
@@ -49,7 +45,6 @@ struct mmu_table_batch {
 
 extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-#endif
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
@@ -57,29 +52,20 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 {
 	tlb->mm = mm;
 	tlb->fullmm = full_mm_flush;
-	tlb->need_flush = 0;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb->batch = NULL;
-#endif
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
 }
 
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
-	if (!tlb->need_flush)
-		return;
-	tlb->need_flush = 0;
-	__tlb_flush_mm(tlb->mm);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
-#endif
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb);
+	tlb_table_flush(tlb);
 }
 
 /*
@@ -105,10 +91,8 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				unsigned long address)
 {
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	if (!tlb->fullmm)
 		return page_table_free_rcu(tlb, (unsigned long *) pte);
-#endif
 	page_table_free(tlb->mm, (unsigned long *) pte);
 }
 
@@ -125,10 +109,8 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
 		return;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	if (!tlb->fullmm)
 		return tlb_remove_table(tlb, pmd);
-#endif
 	crst_table_free(tlb->mm, (unsigned long *) pmd);
 #endif
 }
@@ -146,10 +128,8 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
 		return;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	if (!tlb->fullmm)
 		return tlb_remove_table(tlb, pud);
-#endif
 	crst_table_free(tlb->mm, (unsigned long *) pud);
 #endif
 }
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 458893f5f6b8..51b80b9d1f6a 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -243,8 +243,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	}
 }
 
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-
 static void __page_table_free_rcu(void *table, unsigned bit)
 {
 	struct page *page;
@@ -301,7 +299,66 @@ void __tlb_remove_table(void *_table)
 		free_pages((unsigned long) table, ALLOC_ORDER);
 }
 
-#endif
+static void tlb_remove_table_smp_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+	/*
+	 * This isn't an RCU grace period and hence the page-tables cannot be
+	 * assumed to be actually RCU-freed.
+	 *
+	 * It is however sufficient for software page-table walkers that rely
+	 * on IRQ disabling. See the comment near struct mmu_table_batch.
+	 */
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+	__tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+	struct mmu_table_batch *batch;
+	int i;
+
+	batch = container_of(head, struct mmu_table_batch, rcu);
+
+	for (i = 0; i < batch->nr; i++)
+		__tlb_remove_table(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch) {
+		__tlb_flush_mm(tlb->mm);
+		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		*batch = NULL;
+	}
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch == NULL) {
+		*batch = (struct mmu_table_batch *)
+			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+		if (*batch == NULL) {
+			__tlb_flush_mm(tlb->mm);
+			tlb_remove_table_one(table);
+			return;
+		}
+		(*batch)->nr = 0;
+	}
+	(*batch)->tables[(*batch)->nr++] = table;
+	if ((*batch)->nr == MAX_TABLE_BATCH)
+		tlb_table_flush(tlb);
+}
 
 /*
  * switch on pgstes for its userspace process (for kvm)
-- 
cgit v1.2.3


From 322fd620a858fab9c1ea85a7cfebe3fc041d7126 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linux.intel.com>
Date: Wed, 18 Apr 2012 17:37:39 +0100
Subject: x86, apic: APIC code touches invalid MSR on P5 class machines

commit cbf2829b61c136edcba302a5e1b6b40e97d32c00 upstream.

Current APIC code assumes MSR_IA32_APICBASE is present for all systems.
Pentium Classic P5 and friends didn't have this MSR. MSR_IA32_APICBASE
was introduced as an architectural MSR by Intel @ P6.

Code paths that can touch this MSR invalidly are when vendor == Intel &&
cpu-family == 5 and APIC bit is set in CPUID - or when you simply pass
lapic on the kernel command line, on a P5.

The below patch stops Linux incorrectly interfering with the
MSR_IA32_APICBASE for P5 class machines. Other code paths exist that
touch the MSR - however those paths are not currently reachable for a
conformant P5.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linux.intel.com>
Link: http://lkml.kernel.org/r/4F8EEDD3.1080404@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/apic.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b9338b8cf420..147169569274 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1558,9 +1558,11 @@ static int __init apic_verify(void)
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 
 	/* The BIOS may have set up the APIC at some other address */
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	if (l & MSR_IA32_APICBASE_ENABLE)
-		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+	if (boot_cpu_data.x86 >= 6) {
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (l & MSR_IA32_APICBASE_ENABLE)
+			mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+	}
 
 	pr_info("Found and enabled local APIC!\n");
 	return 0;
@@ -1578,13 +1580,15 @@ int __init apic_force_enable(unsigned long addr)
 	 * MSR. This can only be done in software for Intel P6 or later
 	 * and AMD K7 (Model > 1) or later.
 	 */
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-		pr_info("Local APIC disabled by BIOS -- reenabling.\n");
-		l &= ~MSR_IA32_APICBASE_BASE;
-		l |= MSR_IA32_APICBASE_ENABLE | addr;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-		enabled_via_apicbase = 1;
+	if (boot_cpu_data.x86 >= 6) {
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | addr;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+			enabled_via_apicbase = 1;
+		}
 	}
 	return apic_verify();
 }
@@ -2112,10 +2116,12 @@ static void lapic_resume(void)
 		 * FIXME! This will be wrong if we ever support suspend on
 		 * SMP! We'll need to do this as part of the CPU restore!
 		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_BASE;
-		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-		wrmsr(MSR_IA32_APICBASE, l, h);
+		if (boot_cpu_data.x86 >= 6) {
+			rdmsr(MSR_IA32_APICBASE, l, h);
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+		}
 	}
 
 	maxlvt = lapic_get_maxlvt();
-- 
cgit v1.2.3


From aa4a6ac6d1eec0fb5c88cbc352a056ae826bc985 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 26 Apr 2012 19:44:06 +0100
Subject: xen: correctly check for pending events when restoring irq flags

commit 7eb7ce4d2e8991aff4ecb71a81949a907ca755ac upstream.

In xen_restore_fl_direct(), xen_force_evtchn_callback() was being
called even if no events were pending.  This resulted in (depending on
workload) about a 100 times as many xen_version hypercalls as
necessary.

Fix this by correcting the sense of the conditional jump.

This seems to give a significant performance benefit for some
workloads.

There is some subtle tricksy "..since the check here is trying to
check both pending and masked in a single cmpw, but I think this is
correct. It will call check_events now only when the combined
mask+pending word is 0x0001 (aka unmasked, pending)." (Ian)

Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/xen-asm.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 79d7362ad6d1..3e45aa000718 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct)
 
 	/* check for unmasked and pending */
 	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
-	jz 1f
+	jnz 1f
 2:	call check_events
 1:
 ENDPATCH(xen_restore_fl_direct)
-- 
cgit v1.2.3


From 5b09471039a7e08329eb1f48f2153bd979188351 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 26 Apr 2012 13:50:03 -0400
Subject: xen/smp: Fix crash when booting with ACPI hotplug CPUs.

commit cf405ae612b0f7e2358db7ff594c0e94846137aa upstream.

When we boot on a machine that can hotplug CPUs and we
are using 'dom0_max_vcpus=X' on the Xen hypervisor line
to clip the amount of CPUs available to the initial domain,
we get this:

(XEN) Command line: com1=115200,8n1 dom0_mem=8G noreboot dom0_max_vcpus=8 sync_console mce_verbosity=verbose console=com1,vga loglvl=all guest_loglvl=all
.. snip..
DMI: Intel Corporation S2600CP/S2600CP, BIOS SE5C600.86B.99.99.x032.072520111118 07/25/2011
.. snip.
SMP: Allowing 64 CPUs, 32 hotplug CPUs
installing Xen timer for CPU 7
cpu 7 spinlock event irq 361
NMI watchdog: disabled (cpu7): hardware events not enabled
Brought up 8 CPUs
.. snip..
	[acpi processor finds the CPUs are not initialized and starts calling
	arch_register_cpu, which creates /sys/devices/system/cpu/cpu8/online]
CPU 8 got hotplugged
CPU 9 got hotplugged
CPU 10 got hotplugged
.. snip..
initcall 1_acpi_battery_init_async+0x0/0x1b returned 0 after 406 usecs
calling  erst_init+0x0/0x2bb @ 1

	[and the scheduler sticks newly started tasks on the new CPUs, but
	said CPUs cannot be initialized b/c the hypervisor has limited the
	amount of vCPUS to 8 - as per the dom0_max_vcpus=8 flag.
	The spinlock tries to kick the other CPU, but the structure for that
	is not initialized and we crash.]
BUG: unable to handle kernel paging request at fffffffffffffed8
IP: [<ffffffff81035289>] xen_spin_lock+0x29/0x60
PGD 180d067 PUD 180e067 PMD 0
Oops: 0002 [#1] SMP
CPU 7
Modules linked in:

Pid: 1, comm: swapper/0 Not tainted 3.4.0-rc2upstream-00001-gf5154e8 #1 Intel Corporation S2600CP/S2600CP
RIP: e030:[<ffffffff81035289>]  [<ffffffff81035289>] xen_spin_lock+0x29/0x60
RSP: e02b:ffff8801fb9b3a70  EFLAGS: 00010282

With this patch, we cap the amount of vCPUS that the initial domain
can run, to exactly what dom0_max_vcpus=X has specified.

In the future, if there is a hypercall that will allow a running
domain to expand past its initial set of vCPUS, this patch should
be re-evaluated.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/smp.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d4fc6d454f8d..2843b5e7cf07 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -172,6 +172,7 @@ static void __init xen_fill_possible_map(void)
 static void __init xen_filter_cpu_maps(void)
 {
 	int i, rc;
+	unsigned int subtract = 0;
 
 	if (!xen_initial_domain())
 		return;
@@ -186,8 +187,22 @@ static void __init xen_filter_cpu_maps(void)
 		} else {
 			set_cpu_possible(i, false);
 			set_cpu_present(i, false);
+			subtract++;
 		}
 	}
+#ifdef CONFIG_HOTPLUG_CPU
+	/* This is akin to using 'nr_cpus' on the Linux command line.
+	 * Which is OK as when we use 'dom0_max_vcpus=X' we can only
+	 * have up to X, while nr_cpu_ids is greater than X. This
+	 * normally is not a problem, except when CPU hotplugging
+	 * is involved and then there might be more than X CPUs
+	 * in the guest - which will not work as there is no
+	 * hypercall to expand the max number of VCPUs an already
+	 * running guest has. So cap it up to X. */
+	if (subtract)
+		nr_cpu_ids = nr_cpu_ids - subtract;
+#endif
+
 }
 
 static void __init xen_smp_prepare_boot_cpu(void)
-- 
cgit v1.2.3


From 62a17c9c34a40907e250b5ac110a5c64325f0aef Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 27 Apr 2012 12:45:07 +0100
Subject: ARM: 7403/1: tls: remove covert channel via TPIDRURW

commit 6a1c53124aa161eb624ce7b1e40ade728186d34c upstream.

TPIDRURW is a user read/write register forming part of the group of
thread registers in more recent versions of the ARM architecture (~v6+).

Currently, the kernel does not touch this register, which allows tasks
to communicate covertly by reading and writing to the register without
context-switching affecting its contents.

This patch clears TPIDRURW when TPIDRURO is updated via the set_tls
macro, which is called directly from __switch_to. Since the current
behaviour makes the register useless to userspace as far as thread
pointers are concerned, simply clearing the register (rather than saving
and restoring it) will not cause any problems to userspace.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/tls.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 60843eb0f61c..73409e6c0251 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -7,6 +7,8 @@
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
+	mov	\tmp1, #0
+	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -15,6 +17,8 @@
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
 	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	movne	\tmp1, #0
+	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
-- 
cgit v1.2.3