aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/Kconfig11
-rw-r--r--arch/arm/mm/Makefile2
-rw-r--r--arch/arm/mm/cache-l2x0.c104
-rw-r--r--arch/arm/mm/consistent.c17
-rw-r--r--arch/arm/mm/context.c12
-rw-r--r--arch/arm/mm/proc-v6.S22
-rw-r--r--arch/arm/mm/proc-xsc3.S151
-rw-r--r--arch/arm/mm/tlb-v6.S4
8 files changed, 228 insertions, 95 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index aade2f72c92..da8f043dc2c 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -171,8 +171,8 @@ config CPU_ARM925T
# ARM926T
config CPU_ARM926T
bool "Support ARM926T processor"
- depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261
- default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261
+ depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263
+ default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263
select CPU_32v5
select CPU_ABRT_EV5TJ
select CPU_CACHE_VIVT
@@ -609,3 +609,10 @@ config NEEDS_SYSCALL_FOR_CMPXCHG
Forget about fast user space cmpxchg support.
It is just not possible.
+config OUTER_CACHE
+ bool
+ default n
+
+config CACHE_L2X0
+ bool
+ select OUTER_CACHE
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d2f5672ecf6..2f8b9594777 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -66,3 +66,5 @@ obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o
obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o
obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o
obj-$(CONFIG_CPU_V6) += proc-v6.o
+
+obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
new file mode 100644
index 00000000000..08a36f1b35d
--- /dev/null
+++ b/arch/arm/mm/cache-l2x0.c
@@ -0,0 +1,104 @@
+/*
+ * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
+ *
+ * Copyright (C) 2007 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/init.h>
+
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+#include <asm/hardware/cache-l2x0.h>
+
+#define CACHE_LINE_SIZE 32
+
+static void __iomem *l2x0_base;
+
+static inline void sync_writel(unsigned long val, unsigned long reg,
+ unsigned long complete_mask)
+{
+ writel(val, l2x0_base + reg);
+ /* wait for the operation to complete */
+ while (readl(l2x0_base + reg) & complete_mask)
+ ;
+}
+
+static inline void cache_sync(void)
+{
+ sync_writel(0, L2X0_CACHE_SYNC, 1);
+}
+
+static inline void l2x0_inv_all(void)
+{
+ /* invalidate all ways */
+ sync_writel(0xff, L2X0_INV_WAY, 0xff);
+ cache_sync();
+}
+
+static void l2x0_inv_range(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
+ sync_writel(addr, L2X0_INV_LINE_PA, 1);
+ cache_sync();
+}
+
+static void l2x0_clean_range(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
+ sync_writel(addr, L2X0_CLEAN_LINE_PA, 1);
+ cache_sync();
+}
+
+static void l2x0_flush_range(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
+ sync_writel(addr, L2X0_CLEAN_INV_LINE_PA, 1);
+ cache_sync();
+}
+
+void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
+{
+ __u32 aux;
+
+ l2x0_base = base;
+
+ /* disable L2X0 */
+ writel(0, l2x0_base + L2X0_CTRL);
+
+ aux = readl(l2x0_base + L2X0_AUX_CTRL);
+ aux &= aux_mask;
+ aux |= aux_val;
+ writel(aux, l2x0_base + L2X0_AUX_CTRL);
+
+ l2x0_inv_all();
+
+ /* enable L2X0 */
+ writel(1, l2x0_base + L2X0_CTRL);
+
+ outer_cache.inv_range = l2x0_inv_range;
+ outer_cache.clean_range = l2x0_clean_range;
+ outer_cache.flush_range = l2x0_flush_range;
+
+ printk(KERN_INFO "L2X0 cache controller enabled\n");
+}
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 6a9c362fef5..1f9f94f9af4 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -205,9 +205,10 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
* kernel direct-mapped region for device DMA.
*/
{
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- dmac_flush_range(kaddr, kaddr + size);
+ void *ptr = page_address(page);
+ memset(ptr, 0, size);
+ dmac_flush_range(ptr, ptr + size);
+ outer_flush_range(__pa(ptr), __pa(ptr) + size);
}
/*
@@ -480,20 +481,24 @@ core_initcall(consistent_init);
* platforms with CONFIG_DMABOUNCE.
* Use the driver DMA support - see dma-mapping.h (dma_sync_*)
*/
-void consistent_sync(void *vaddr, size_t size, int direction)
+void consistent_sync(const void *start, size_t size, int direction)
{
- unsigned long start = (unsigned long)vaddr;
- unsigned long end = start + size;
+ const void *end = start + size;
+
+ BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1));
switch (direction) {
case DMA_FROM_DEVICE: /* invalidate only */
dmac_inv_range(start, end);
+ outer_inv_range(__pa(start), __pa(end));
break;
case DMA_TO_DEVICE: /* writeback only */
dmac_clean_range(start, end);
+ outer_clean_range(__pa(start), __pa(end));
break;
case DMA_BIDIRECTIONAL: /* writeback and invalidate */
dmac_flush_range(start, end);
+ outer_flush_range(__pa(start), __pa(end));
break;
default:
BUG();
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 79e80020242..9da43a0fdcd 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -19,7 +19,8 @@ unsigned int cpu_last_asid = { 1 << ASID_BITS };
/*
* We fork()ed a process, and we need a new context for the child
* to run in. We reserve version 0 for initial tasks so we will
- * always allocate an ASID.
+ * always allocate an ASID. The ASID 0 is reserved for the TTBR
+ * register changing sequence.
*/
void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
@@ -38,8 +39,15 @@ void __new_context(struct mm_struct *mm)
* If we've used up all our ASIDs, we need
* to start a new version and flush the TLB.
*/
- if ((asid & ~ASID_MASK) == 0)
+ if ((asid & ~ASID_MASK) == 0) {
+ asid = ++cpu_last_asid;
+ /* set the reserved ASID before flushing the TLB */
+ asm("mcr p15, 0, %0, c13, c0, 1 @ set reserved context ID\n"
+ :
+ : "r" (0));
+ isb();
flush_tlb_all();
+ }
mm->context.id = asid;
}
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 7b1843befb9..eb42e5b9486 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -14,10 +14,13 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/elf.h>
-#include <asm/hardware/arm_scu.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
+#ifdef CONFIG_SMP
+#include <asm/hardware/arm_scu.h>
+#endif
+
#include "proc-macros.S"
#define D_CACHE_LINE_SIZE 32
@@ -30,6 +33,12 @@
#define TTB_RGN_WT (2 << 3)
#define TTB_RGN_WB (3 << 3)
+#ifndef CONFIG_SMP
+#define TTB_FLAGS TTB_RGN_WBWA
+#else
+#define TTB_FLAGS TTB_RGN_WBWA|TTB_S
+#endif
+
ENTRY(cpu_v6_proc_init)
mov pc, lr
@@ -92,9 +101,7 @@ ENTRY(cpu_v6_switch_mm)
#ifdef CONFIG_MMU
mov r2, #0
ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id
-#ifdef CONFIG_SMP
- orr r0, r0, #TTB_RGN_WBWA|TTB_S @ mark PTWs shared, outer cacheable
-#endif
+ orr r0, r0, #TTB_FLAGS
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
mcr p15, 0, r2, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
@@ -183,8 +190,7 @@ __v6_setup:
/* Set up the SCU on core 0 only */
mrc p15, 0, r0, c0, c0, 5 @ CPU core number
ands r0, r0, #15
- moveq r0, #0x10000000 @ SCU_BASE
- orreq r0, r0, #0x00100000
+ ldreq r0, =SCU_BASE
ldreq r5, [r0, #SCU_CTRL]
orreq r5, r5, #1
streq r5, [r0, #SCU_CTRL]
@@ -204,9 +210,7 @@ __v6_setup:
#ifdef CONFIG_MMU
mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs
mcr p15, 0, r0, c2, c0, 2 @ TTB control register
-#ifdef CONFIG_SMP
- orr r4, r4, #TTB_RGN_WBWA|TTB_S @ mark PTWs shared, outer cacheable
-#endif
+ orr r4, r4, #TTB_FLAGS
mcr p15, 0, r4, c2, c0, 1 @ load TTB1
#endif /* CONFIG_MMU */
adr r5, v6_crval
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 94a58455f34..d95921a2ab9 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -5,23 +5,23 @@
* Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
*
* Copyright 2004 (C) Intel Corp.
- * Copyright 2005 (c) MontaVista Software, Inc.
+ * Copyright 2005 (C) MontaVista Software, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is an
- * extension to Intel's original XScale core that adds the following
+ * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is
+ * an extension to Intel's original XScale core that adds the following
* features:
*
* - ARMv6 Supersections
* - Low Locality Reference pages (replaces mini-cache)
* - 36-bit addressing
* - L2 cache
- * - Cache-coherency if chipset supports it
+ * - Cache coherency if chipset supports it
*
- * Based on orignal XScale code by Nicolas Pitre
+ * Based on original XScale code by Nicolas Pitre.
*/
#include <linux/linkage.h>
@@ -42,12 +42,12 @@
#define MAX_AREA_SIZE 32768
/*
- * The cache line size of the I and D cache.
+ * The cache line size of the L1 I, L1 D and unified L2 cache.
*/
#define CACHELINESIZE 32
/*
- * The size of the data cache.
+ * The size of the L1 D cache.
*/
#define CACHESIZE 32768
@@ -57,9 +57,9 @@
#define L2_CACHE_ENABLE 1
/*
- * This macro is used to wait for a CP15 write and is needed
- * when we have to ensure that the last operation to the co-pro
- * was completed before continuing with operation.
+ * This macro is used to wait for a CP15 write and is needed when we
+ * have to ensure that the last operation to the coprocessor was
+ * completed before continuing with operation.
*/
.macro cpwait_ret, lr, rd
mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15
@@ -68,13 +68,13 @@
.endm
/*
- * This macro cleans & invalidates the entire xsc3 dcache by set & way.
+ * This macro cleans and invalidates the entire L1 D cache.
*/
.macro clean_d_cache rd, rs
mov \rd, #0x1f00
orr \rd, \rd, #0x00e0
-1: mcr p15, 0, \rd, c7, c14, 2 @ clean/inv set/way
+1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line
adds \rd, \rd, #0x40000000
bcc 1b
subs \rd, \rd, #0x20
@@ -119,15 +119,15 @@ ENTRY(cpu_xsc3_reset)
mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
msr cpsr_c, r1 @ reset CPSR
mrc p15, 0, r1, c1, c0, 0 @ ctrl register
- bic r1, r1, #0x0086 @ ........B....CA.
bic r1, r1, #0x3900 @ ..VIZ..S........
+ bic r1, r1, #0x0086 @ ........B....CA.
mcr p15, 0, r1, c1, c0, 0 @ ctrl register
- mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB
+ mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB
bic r1, r1, #0x0001 @ ...............M
mcr p15, 0, r1, c1, c0, 0 @ ctrl register
@ CAUTION: MMU turned off from this point. We count on the pipeline
@ already containing those two last instructions to survive.
- mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
mov pc, r0
/*
@@ -139,14 +139,12 @@ ENTRY(cpu_xsc3_reset)
*
* XScale supports clock switching, but using idle mode support
* allows external hardware to react to system state changes.
-
- MMG: Come back to this one.
*/
.align 5
ENTRY(cpu_xsc3_do_idle)
mov r0, #1
- mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE
+ mcr p14, 0, r0, c7, c0, 0 @ go to idle
mov pc, lr
/* ================================= CACHE ================================ */
@@ -171,9 +169,9 @@ ENTRY(xsc3_flush_kern_cache_all)
__flush_whole_cache:
clean_d_cache r0, r1
tst r2, #VM_EXEC
- mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
- mcrne p15, 0, ip, c7, c10, 4 @ Drain Write Buffer
- mcrne p15, 0, ip, c7, c5, 4 @ Prefetch Flush
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
mov pc, lr
/*
@@ -194,21 +192,21 @@ ENTRY(xsc3_flush_user_cache_range)
bhs __flush_whole_cache
1: tst r2, #VM_EXEC
- mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line
- mcr p15, 0, r0, c7, c14, 1 @ Clean/invalidate D cache line
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line
+ mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
tst r2, #VM_EXEC
- mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB
- mcrne p15, 0, ip, c7, c10, 4 @ Drain Write Buffer
- mcrne p15, 0, ip, c7, c5, 4 @ Prefetch Flush
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
mov pc, lr
/*
* coherent_kern_range(start, end)
*
- * Ensure coherency between the Icache and the Dcache in the
+ * Ensure coherency between the I cache and the D cache in the
* region described by start. If you have non-snooping
* Harvard caches, you need to implement this function.
*
@@ -222,34 +220,34 @@ ENTRY(xsc3_coherent_kern_range)
/* FALLTHROUGH */
ENTRY(xsc3_coherent_user_range)
bic r0, r0, #CACHELINESIZE - 1
-1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mov r0, #0
- mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
- mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer
- mcr p15, 0, r0, c7, c5, 4 @ Prefetch Flush
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
mov pc, lr
/*
* flush_kern_dcache_page(void *page)
*
* Ensure no D cache aliasing occurs, either with itself or
- * the I cache
+ * the I cache.
*
* - addr - page aligned address
*/
ENTRY(xsc3_flush_kern_dcache_page)
add r1, r0, #PAGE_SZ
-1: mcr p15, 0, r0, c7, c14, 1 @ Clean/Invalidate D Cache line
+1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mov r0, #0
- mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
- mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer
- mcr p15, 0, r0, c7, c5, 4 @ Prefetch Flush
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
mov pc, lr
/*
@@ -266,17 +264,17 @@ ENTRY(xsc3_flush_kern_dcache_page)
ENTRY(xsc3_dma_inv_range)
tst r0, #CACHELINESIZE - 1
bic r0, r0, #CACHELINESIZE - 1
- mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D entry
- mcrne p15, 1, r0, c7, c11, 1 @ clean L2 D entry
+ mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line
+ mcrne p15, 1, r0, c7, c11, 1 @ clean L2 line
tst r1, #CACHELINESIZE - 1
- mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D entry
- mcrne p15, 1, r1, c7, c11, 1 @ clean L2 D entry
-1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D entry
- mcr p15, 1, r0, c7, c7, 1 @ Invalidate L2 D cache line
+ mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line
+ mcrne p15, 1, r1, c7, c11, 1 @ clean L2 line
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line
+ mcr p15, 1, r0, c7, c7, 1 @ invalidate L2 line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
- mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mov pc, lr
/*
@@ -289,12 +287,12 @@ ENTRY(xsc3_dma_inv_range)
*/
ENTRY(xsc3_dma_clean_range)
bic r0, r0, #CACHELINESIZE - 1
-1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D entry
- mcr p15, 1, r0, c7, c11, 1 @ clean L2 D entry
+1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
+ mcr p15, 1, r0, c7, c11, 1 @ clean L2 line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
- mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mov pc, lr
/*
@@ -307,13 +305,13 @@ ENTRY(xsc3_dma_clean_range)
*/
ENTRY(xsc3_dma_flush_range)
bic r0, r0, #CACHELINESIZE - 1
-1: mcr p15, 0, r0, c7, c14, 1 @ Clean/invalidate L1 D cache line
- mcr p15, 1, r0, c7, c11, 1 @ Clean L2 D cache line
- mcr p15, 1, r0, c7, c7, 1 @ Invalidate L2 D cache line
+1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line
+ mcr p15, 1, r0, c7, c11, 1 @ clean L2 line
+ mcr p15, 1, r0, c7, c7, 1 @ invalidate L2 line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
- mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mov pc, lr
ENTRY(xsc3_cache_fns)
@@ -328,7 +326,7 @@ ENTRY(xsc3_cache_fns)
.long xsc3_dma_flush_range
ENTRY(cpu_xsc3_dcache_clean_area)
-1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
@@ -346,14 +344,14 @@ ENTRY(cpu_xsc3_dcache_clean_area)
.align 5
ENTRY(cpu_xsc3_switch_mm)
clean_d_cache r1, r2
- mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
- mcr p15, 0, ip, c7, c10, 4 @ Drain Write Buffer
- mcr p15, 0, ip, c7, c5, 4 @ Prefetch Flush
+ mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB
+ mcr p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcr p15, 0, ip, c7, c5, 4 @ prefetch flush
#ifdef L2_CACHE_ENABLE
orr r0, r0, #0x18 @ cache the page table in L2
#endif
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
- mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
cpwait_ret lr, ip
/*
@@ -366,34 +364,34 @@ ENTRY(cpu_xsc3_switch_mm)
ENTRY(cpu_xsc3_set_pte_ext)
str r1, [r0], #-2048 @ linux version
- bic r2, r1, #0xff0 @ Keep C, B bits
+ bic r2, r1, #0xff0 @ keep C, B bits
orr r2, r2, #PTE_TYPE_EXT @ extended page
- tst r1, #L_PTE_SHARED @ Shared?
+ tst r1, #L_PTE_SHARED @ shared?
orrne r2, r2, #0x200
eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
- tst r3, #L_PTE_USER @ User?
+ tst r3, #L_PTE_USER @ user?
orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w
- tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty?
+ tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty?
orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w
@ combined with user -> user r/w
#if L2_CACHE_ENABLE
- @ If its cacheable it needs to be in L2 also.
+ @ If it's cacheable, it needs to be in L2 also.
eor ip, r1, #L_PTE_CACHEABLE
tst ip, #L_PTE_CACHEABLE
orreq r2, r2, #PTE_EXT_TEX(0x5)
#endif
- tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young?
+ tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young?
movne r2, #0 @ no -> fault
str r2, [r0] @ hardware version
mov ip, #0
- mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line mcr
- mcr p15, 0, ip, c7, c10, 4 @ Drain Write Buffer
+ mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
+ mcr p15, 0, ip, c7, c10, 4 @ data write barrier
mov pc, lr
.ltorg
@@ -406,17 +404,18 @@ ENTRY(cpu_xsc3_set_pte_ext)
__xsc3_setup:
mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
msr cpsr_c, r0
- mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB
- mcr p15, 0, ip, c7, c10, 4 @ Drain Write Buffer
- mcr p15, 0, ip, c7, c5, 4 @ Prefetch Flush
- mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs
+ mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB
+ mcr p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcr p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
#if L2_CACHE_ENABLE
orr r4, r4, #0x18 @ cache the page table in L2
#endif
mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
- mov r0, #1 @ Allow access to CP0 and CP13
- orr r0, r0, #1 << 13 @ Its undefined whether this
- mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes
+
+ mov r0, #0 @ don't allow CP access
+ mcr p15, 0, r0, c15, c1, 0 @ write CP access register
+
mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg
and r0, r0, #2 @ preserve bit P bit setting
#if L2_CACHE_ENABLE
@@ -427,9 +426,9 @@ __xsc3_setup:
adr r5, xsc3_crval
ldmia r5, {r5, r6}
mrc p15, 0, r0, c1, c0, 0 @ get control register
- bic r0, r0, r5 @ .... .... .... ..A.
- orr r0, r0, r6 @ .... .... .... .C.M
- orr r0, r0, #0x00000800 @ ..VI Z..S .... ....
+ bic r0, r0, r5 @ ..V. ..R. .... ..A.
+ orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu)
+ @ ...I Z..S .... .... (uc)
#if L2_CACHE_ENABLE
orr r0, r0, #0x04000000 @ L2 enable
#endif
@@ -439,7 +438,7 @@ __xsc3_setup:
.type xsc3_crval, #object
xsc3_crval:
- crval clear=0x04003b02, mmuset=0x00003105, ucset=0x00001100
+ crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
__INITDATA
@@ -474,7 +473,7 @@ cpu_elf_name:
.type cpu_xsc3_name, #object
cpu_xsc3_name:
- .asciz "XScale-Core3"
+ .asciz "XScale-V3 based processor"
.size cpu_xsc3_name, . - cpu_xsc3_name
.align
@@ -490,7 +489,7 @@ __xsc3_proc_info:
PMD_SECT_CACHEABLE | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
- .long PMD_TYPE_SECT | \
+ .long PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
b __xsc3_setup
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index fd6adde3909..20f84bbaa9b 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -53,6 +53,8 @@ ENTRY(v6wbi_flush_user_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
+ mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB
+ mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier
mov pc, lr
/*
@@ -80,7 +82,9 @@ ENTRY(v6wbi_flush_kern_tlb_range)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
+ mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier
+ mcr p15, 0, r2, c7, c5, 4 @ prefetch flush
mov pc, lr
.section ".text.init", #alloc, #execinstr