diff options
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/Kconfig | 11 | ||||
-rw-r--r-- | arch/arm/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 104 | ||||
-rw-r--r-- | arch/arm/mm/consistent.c | 17 | ||||
-rw-r--r-- | arch/arm/mm/context.c | 12 | ||||
-rw-r--r-- | arch/arm/mm/proc-v6.S | 22 | ||||
-rw-r--r-- | arch/arm/mm/proc-xsc3.S | 151 | ||||
-rw-r--r-- | arch/arm/mm/tlb-v6.S | 4 |
8 files changed, 228 insertions, 95 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index aade2f72c92..da8f043dc2c 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -171,8 +171,8 @@ config CPU_ARM925T # ARM926T config CPU_ARM926T bool "Support ARM926T processor" - depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 - default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 + depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 + default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 select CPU_32v5 select CPU_ABRT_EV5TJ select CPU_CACHE_VIVT @@ -609,3 +609,10 @@ config NEEDS_SYSCALL_FOR_CMPXCHG Forget about fast user space cmpxchg support. It is just not possible. +config OUTER_CACHE + bool + default n + +config CACHE_L2X0 + bool + select OUTER_CACHE diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index d2f5672ecf6..2f8b9594777 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -66,3 +66,5 @@ obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o obj-$(CONFIG_CPU_V6) += proc-v6.o + +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c new file mode 100644 index 00000000000..08a36f1b35d --- /dev/null +++ b/arch/arm/mm/cache-l2x0.c @@ -0,0 +1,104 @@ +/* + * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support + * + * Copyright (C) 2007 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> + +#include <asm/cacheflush.h> +#include <asm/io.h> +#include <asm/hardware/cache-l2x0.h> + +#define CACHE_LINE_SIZE 32 + +static void __iomem *l2x0_base; + +static inline void sync_writel(unsigned long val, unsigned long reg, + unsigned long complete_mask) +{ + writel(val, l2x0_base + reg); + /* wait for the operation to complete */ + while (readl(l2x0_base + reg) & complete_mask) + ; +} + +static inline void cache_sync(void) +{ + sync_writel(0, L2X0_CACHE_SYNC, 1); +} + +static inline void l2x0_inv_all(void) +{ + /* invalidate all ways */ + sync_writel(0xff, L2X0_INV_WAY, 0xff); + cache_sync(); +} + +static void l2x0_inv_range(unsigned long start, unsigned long end) +{ + unsigned long addr; + + start &= ~(CACHE_LINE_SIZE - 1); + for (addr = start; addr < end; addr += CACHE_LINE_SIZE) + sync_writel(addr, L2X0_INV_LINE_PA, 1); + cache_sync(); +} + +static void l2x0_clean_range(unsigned long start, unsigned long end) +{ + unsigned long addr; + + start &= ~(CACHE_LINE_SIZE - 1); + for (addr = start; addr < end; addr += CACHE_LINE_SIZE) + sync_writel(addr, L2X0_CLEAN_LINE_PA, 1); + cache_sync(); +} + +static void l2x0_flush_range(unsigned long start, unsigned long end) +{ + unsigned long addr; + + start &= ~(CACHE_LINE_SIZE - 1); + for (addr = start; addr < end; addr += CACHE_LINE_SIZE) + sync_writel(addr, L2X0_CLEAN_INV_LINE_PA, 1); + cache_sync(); +} + +void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) +{ + __u32 aux; + + l2x0_base = base; + + /* disable L2X0 */ + writel(0, l2x0_base + L2X0_CTRL); + + aux = readl(l2x0_base + L2X0_AUX_CTRL); + aux &= aux_mask; + aux |= aux_val; + writel(aux, l2x0_base + L2X0_AUX_CTRL); + + l2x0_inv_all(); + + /* enable L2X0 */ + writel(1, l2x0_base + L2X0_CTRL); + + outer_cache.inv_range = l2x0_inv_range; + outer_cache.clean_range = l2x0_clean_range; + outer_cache.flush_range = l2x0_flush_range; + + printk(KERN_INFO "L2X0 cache controller enabled\n"); +} diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index 6a9c362fef5..1f9f94f9af4 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -205,9 +205,10 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, * kernel direct-mapped region for device DMA. */ { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - dmac_flush_range(kaddr, kaddr + size); + void *ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); } /* @@ -480,20 +481,24 @@ core_initcall(consistent_init); * platforms with CONFIG_DMABOUNCE. * Use the driver DMA support - see dma-mapping.h (dma_sync_*) */ -void consistent_sync(void *vaddr, size_t size, int direction) +void consistent_sync(const void *start, size_t size, int direction) { - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; + const void *end = start + size; + + BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1)); switch (direction) { case DMA_FROM_DEVICE: /* invalidate only */ dmac_inv_range(start, end); + outer_inv_range(__pa(start), __pa(end)); break; case DMA_TO_DEVICE: /* writeback only */ dmac_clean_range(start, end); + outer_clean_range(__pa(start), __pa(end)); break; case DMA_BIDIRECTIONAL: /* writeback and invalidate */ dmac_flush_range(start, end); + outer_flush_range(__pa(start), __pa(end)); break; default: BUG(); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 79e80020242..9da43a0fdcd 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -19,7 +19,8 @@ unsigned int cpu_last_asid = { 1 << ASID_BITS }; /* * We fork()ed a process, and we need a new context for the child * to run in. We reserve version 0 for initial tasks so we will - * always allocate an ASID. + * always allocate an ASID. The ASID 0 is reserved for the TTBR + * register changing sequence. */ void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -38,8 +39,15 @@ void __new_context(struct mm_struct *mm) * If we've used up all our ASIDs, we need * to start a new version and flush the TLB. */ - if ((asid & ~ASID_MASK) == 0) + if ((asid & ~ASID_MASK) == 0) { + asid = ++cpu_last_asid; + /* set the reserved ASID before flushing the TLB */ + asm("mcr p15, 0, %0, c13, c0, 1 @ set reserved context ID\n" + : + : "r" (0)); + isb(); flush_tlb_all(); + } mm->context.id = asid; } diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 7b1843befb9..eb42e5b9486 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -14,10 +14,13 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/elf.h> -#include <asm/hardware/arm_scu.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#ifdef CONFIG_SMP +#include <asm/hardware/arm_scu.h> +#endif + #include "proc-macros.S" #define D_CACHE_LINE_SIZE 32 @@ -30,6 +33,12 @@ #define TTB_RGN_WT (2 << 3) #define TTB_RGN_WB (3 << 3) +#ifndef CONFIG_SMP +#define TTB_FLAGS TTB_RGN_WBWA +#else +#define TTB_FLAGS TTB_RGN_WBWA|TTB_S +#endif + ENTRY(cpu_v6_proc_init) mov pc, lr @@ -92,9 +101,7 @@ ENTRY(cpu_v6_switch_mm) #ifdef CONFIG_MMU mov r2, #0 ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id -#ifdef CONFIG_SMP - orr r0, r0, #TTB_RGN_WBWA|TTB_S @ mark PTWs shared, outer cacheable -#endif + orr r0, r0, #TTB_FLAGS mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 @@ -183,8 +190,7 @@ __v6_setup: /* Set up the SCU on core 0 only */ mrc p15, 0, r0, c0, c0, 5 @ CPU core number ands r0, r0, #15 - moveq r0, #0x10000000 @ SCU_BASE - orreq r0, r0, #0x00100000 + ldreq r0, =SCU_BASE ldreq r5, [r0, #SCU_CTRL] orreq r5, r5, #1 streq r5, [r0, #SCU_CTRL] @@ -204,9 +210,7 @@ __v6_setup: #ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r0, c2, c0, 2 @ TTB control register -#ifdef CONFIG_SMP - orr r4, r4, #TTB_RGN_WBWA|TTB_S @ mark PTWs shared, outer cacheable -#endif + orr r4, r4, #TTB_FLAGS mcr p15, 0, r4, c2, c0, 1 @ load TTB1 #endif /* CONFIG_MMU */ adr r5, v6_crval diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 94a58455f34..d95921a2ab9 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -5,23 +5,23 @@ * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> * * Copyright 2004 (C) Intel Corp. - * Copyright 2005 (c) MontaVista Software, Inc. + * Copyright 2005 (C) MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is an - * extension to Intel's original XScale core that adds the following + * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is + * an extension to Intel's original XScale core that adds the following * features: * * - ARMv6 Supersections * - Low Locality Reference pages (replaces mini-cache) * - 36-bit addressing * - L2 cache - * - Cache-coherency if chipset supports it + * - Cache coherency if chipset supports it * - * Based on orignal XScale code by Nicolas Pitre + * Based on original XScale code by Nicolas Pitre. */ #include <linux/linkage.h> @@ -42,12 +42,12 @@ #define MAX_AREA_SIZE 32768 /* - * The cache line size of the I and D cache. + * The cache line size of the L1 I, L1 D and unified L2 cache. */ #define CACHELINESIZE 32 /* - * The size of the data cache. + * The size of the L1 D cache. */ #define CACHESIZE 32768 @@ -57,9 +57,9 @@ #define L2_CACHE_ENABLE 1 /* - * This macro is used to wait for a CP15 write and is needed - * when we have to ensure that the last operation to the co-pro - * was completed before continuing with operation. + * This macro is used to wait for a CP15 write and is needed when we + * have to ensure that the last operation to the coprocessor was + * completed before continuing with operation. */ .macro cpwait_ret, lr, rd mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 @@ -68,13 +68,13 @@ .endm /* - * This macro cleans & invalidates the entire xsc3 dcache by set & way. + * This macro cleans and invalidates the entire L1 D cache. */ .macro clean_d_cache rd, rs mov \rd, #0x1f00 orr \rd, \rd, #0x00e0 -1: mcr p15, 0, \rd, c7, c14, 2 @ clean/inv set/way +1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line adds \rd, \rd, #0x40000000 bcc 1b subs \rd, \rd, #0x20 @@ -119,15 +119,15 @@ ENTRY(cpu_xsc3_reset) mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r1 @ reset CPSR mrc p15, 0, r1, c1, c0, 0 @ ctrl register - bic r1, r1, #0x0086 @ ........B....CA. bic r1, r1, #0x3900 @ ..VIZ..S........ + bic r1, r1, #0x0086 @ ........B....CA. mcr p15, 0, r1, c1, c0, 0 @ ctrl register - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB bic r1, r1, #0x0001 @ ...............M mcr p15, 0, r1, c1, c0, 0 @ ctrl register @ CAUTION: MMU turned off from this point. We count on the pipeline @ already containing those two last instructions to survive. - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs mov pc, r0 /* @@ -139,14 +139,12 @@ ENTRY(cpu_xsc3_reset) * * XScale supports clock switching, but using idle mode support * allows external hardware to react to system state changes. - - MMG: Come back to this one. */ .align 5 ENTRY(cpu_xsc3_do_idle) mov r0, #1 - mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE + mcr p14, 0, r0, c7, c0, 0 @ go to idle mov pc, lr /* ================================= CACHE ================================ */ @@ -171,9 +169,9 @@ ENTRY(xsc3_flush_kern_cache_all) __flush_whole_cache: clean_d_cache r0, r1 tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB - mcrne p15, 0, ip, c7, c10, 4 @ Drain Write Buffer - mcrne p15, 0, ip, c7, c5, 4 @ Prefetch Flush + mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush mov pc, lr /* @@ -194,21 +192,21 @@ ENTRY(xsc3_flush_user_cache_range) bhs __flush_whole_cache 1: tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line - mcr p15, 0, r0, c7, c14, 1 @ Clean/invalidate D cache line + mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line + mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB - mcrne p15, 0, ip, c7, c10, 4 @ Drain Write Buffer - mcrne p15, 0, ip, c7, c5, 4 @ Prefetch Flush + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush mov pc, lr /* * coherent_kern_range(start, end) * - * Ensure coherency between the Icache and the Dcache in the + * Ensure coherency between the I cache and the D cache in the * region described by start. If you have non-snooping * Harvard caches, you need to implement this function. * @@ -222,34 +220,34 @@ ENTRY(xsc3_coherent_kern_range) /* FALLTHROUGH */ ENTRY(xsc3_coherent_user_range) bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB - mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer - mcr p15, 0, r0, c7, c5, 4 @ Prefetch Flush + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush mov pc, lr /* * flush_kern_dcache_page(void *page) * * Ensure no D cache aliasing occurs, either with itself or - * the I cache + * the I cache. * * - addr - page aligned address */ ENTRY(xsc3_flush_kern_dcache_page) add r1, r0, #PAGE_SZ -1: mcr p15, 0, r0, c7, c14, 1 @ Clean/Invalidate D Cache line +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB - mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer - mcr p15, 0, r0, c7, c5, 4 @ Prefetch Flush + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush mov pc, lr /* @@ -266,17 +264,17 @@ ENTRY(xsc3_flush_kern_dcache_page) ENTRY(xsc3_dma_inv_range) tst r0, #CACHELINESIZE - 1 bic r0, r0, #CACHELINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D entry - mcrne p15, 1, r0, c7, c11, 1 @ clean L2 D entry + mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcrne p15, 1, r0, c7, c11, 1 @ clean L2 line tst r1, #CACHELINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D entry - mcrne p15, 1, r1, c7, c11, 1 @ clean L2 D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D entry - mcr p15, 1, r0, c7, c7, 1 @ Invalidate L2 D cache line + mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line + mcrne p15, 1, r1, c7, c11, 1 @ clean L2 line +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line + mcr p15, 1, r0, c7, c7, 1 @ invalidate L2 line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer + mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr /* @@ -289,12 +287,12 @@ ENTRY(xsc3_dma_inv_range) */ ENTRY(xsc3_dma_clean_range) bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D entry - mcr p15, 1, r0, c7, c11, 1 @ clean L2 D entry +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcr p15, 1, r0, c7, c11, 1 @ clean L2 line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer + mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr /* @@ -307,13 +305,13 @@ ENTRY(xsc3_dma_clean_range) */ ENTRY(xsc3_dma_flush_range) bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ Clean/invalidate L1 D cache line - mcr p15, 1, r0, c7, c11, 1 @ Clean L2 D cache line - mcr p15, 1, r0, c7, c7, 1 @ Invalidate L2 D cache line +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + mcr p15, 1, r0, c7, c11, 1 @ clean L2 line + mcr p15, 1, r0, c7, c7, 1 @ invalidate L2 line add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write Buffer + mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr ENTRY(xsc3_cache_fns) @@ -328,7 +326,7 @@ ENTRY(xsc3_cache_fns) .long xsc3_dma_flush_range ENTRY(cpu_xsc3_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b @@ -346,14 +344,14 @@ ENTRY(cpu_xsc3_dcache_clean_area) .align 5 ENTRY(cpu_xsc3_switch_mm) clean_d_cache r1, r2 - mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB - mcr p15, 0, ip, c7, c10, 4 @ Drain Write Buffer - mcr p15, 0, ip, c7, c5, 4 @ Prefetch Flush + mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush #ifdef L2_CACHE_ENABLE orr r0, r0, #0x18 @ cache the page table in L2 #endif mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs cpwait_ret lr, ip /* @@ -366,34 +364,34 @@ ENTRY(cpu_xsc3_switch_mm) ENTRY(cpu_xsc3_set_pte_ext) str r1, [r0], #-2048 @ linux version - bic r2, r1, #0xff0 @ Keep C, B bits + bic r2, r1, #0xff0 @ keep C, B bits orr r2, r2, #PTE_TYPE_EXT @ extended page - tst r1, #L_PTE_SHARED @ Shared? + tst r1, #L_PTE_SHARED @ shared? orrne r2, r2, #0x200 eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - tst r3, #L_PTE_USER @ User? + tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? + tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w @ combined with user -> user r/w #if L2_CACHE_ENABLE - @ If its cacheable it needs to be in L2 also. + @ If it's cacheable, it needs to be in L2 also. eor ip, r1, #L_PTE_CACHEABLE tst ip, #L_PTE_CACHEABLE orreq r2, r2, #PTE_EXT_TEX(0x5) #endif - tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? movne r2, #0 @ no -> fault str r2, [r0] @ hardware version mov ip, #0 - mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line mcr - mcr p15, 0, ip, c7, c10, 4 @ Drain Write Buffer + mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcr p15, 0, ip, c7, c10, 4 @ data write barrier mov pc, lr .ltorg @@ -406,17 +404,18 @@ ENTRY(cpu_xsc3_set_pte_ext) __xsc3_setup: mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB - mcr p15, 0, ip, c7, c10, 4 @ Drain Write Buffer - mcr p15, 0, ip, c7, c5, 4 @ Prefetch Flush - mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs #if L2_CACHE_ENABLE orr r4, r4, #0x18 @ cache the page table in L2 #endif mcr p15, 0, r4, c2, c0, 0 @ load page table pointer - mov r0, #1 @ Allow access to CP0 and CP13 - orr r0, r0, #1 << 13 @ Its undefined whether this - mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes + + mov r0, #0 @ don't allow CP access + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg and r0, r0, #2 @ preserve bit P bit setting #if L2_CACHE_ENABLE @@ -427,9 +426,9 @@ __xsc3_setup: adr r5, xsc3_crval ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0, 0 @ get control register - bic r0, r0, r5 @ .... .... .... ..A. - orr r0, r0, r6 @ .... .... .... .C.M - orr r0, r0, #0x00000800 @ ..VI Z..S .... .... + bic r0, r0, r5 @ ..V. ..R. .... ..A. + orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) + @ ...I Z..S .... .... (uc) #if L2_CACHE_ENABLE orr r0, r0, #0x04000000 @ L2 enable #endif @@ -439,7 +438,7 @@ __xsc3_setup: .type xsc3_crval, #object xsc3_crval: - crval clear=0x04003b02, mmuset=0x00003105, ucset=0x00001100 + crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 __INITDATA @@ -474,7 +473,7 @@ cpu_elf_name: .type cpu_xsc3_name, #object cpu_xsc3_name: - .asciz "XScale-Core3" + .asciz "XScale-V3 based processor" .size cpu_xsc3_name, . - cpu_xsc3_name .align @@ -490,7 +489,7 @@ __xsc3_proc_info: PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ + .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ b __xsc3_setup diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index fd6adde3909..20f84bbaa9b 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -53,6 +53,8 @@ ENTRY(v6wbi_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b + mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB + mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier mov pc, lr /* @@ -80,7 +82,9 @@ ENTRY(v6wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier + mcr p15, 0, r2, c7, c5, 4 @ prefetch flush mov pc, lr .section ".text.init", #alloc, #execinstr |