diff options
Diffstat (limited to 'arch/x86/include')
27 files changed, 722 insertions, 399 deletions
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7a9df3beb89b..676ee5807d86 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -74,6 +74,9 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) +# define _ASM_EXTABLE_REFCOUNT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) + # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ @@ -123,6 +126,9 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) +# define _ASM_EXTABLE_REFCOUNT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) + /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 33380b871463..0874ebda3069 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new) return xchg(&v->counter, new); } -#define ATOMIC_OP(op) \ -static inline void atomic_##op(int i, atomic_t *v) \ -{ \ - asm volatile(LOCK_PREFIX #op"l %1,%0" \ - : "+m" (v->counter) \ - : "ir" (i) \ - : "memory"); \ +static inline void atomic_and(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "andl %1,%0" + : "+m" (v->counter) + : "ir" (i) + : "memory"); +} + +static inline int atomic_fetch_and(int i, atomic_t *v) +{ + int val = atomic_read(v); + + do { } while (!atomic_try_cmpxchg(v, &val, val & i)); + + return val; } -#define ATOMIC_FETCH_OP(op, c_op) \ -static inline int atomic_fetch_##op(int i, atomic_t *v) \ -{ \ - int val = atomic_read(v); \ - do { \ - } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \ - return val; \ +static inline void atomic_or(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "orl %1,%0" + : "+m" (v->counter) + : "ir" (i) + : "memory"); } -#define ATOMIC_OPS(op, c_op) \ - ATOMIC_OP(op) \ - ATOMIC_FETCH_OP(op, c_op) +static inline int atomic_fetch_or(int i, atomic_t *v) +{ + int val = atomic_read(v); -ATOMIC_OPS(and, &) -ATOMIC_OPS(or , |) -ATOMIC_OPS(xor, ^) + do { } while (!atomic_try_cmpxchg(v, &val, val | i)); -#undef ATOMIC_OPS -#undef ATOMIC_FETCH_OP -#undef ATOMIC_OP + return val; +} + +static inline void atomic_xor(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "xorl %1,%0" + : "+m" (v->counter) + : "ir" (i) + : "memory"); +} + +static inline int atomic_fetch_xor(int i, atomic_t *v) +{ + int val = atomic_read(v); + + do { } while (!atomic_try_cmpxchg(v, &val, val ^ i)); + + return val; +} /** * __atomic_add_unless - add unless the number is already a given value @@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^) static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c = atomic_read(v); + do { if (unlikely(c == u)) break; } while (!atomic_try_cmpxchg(v, &c, c + a)); + return c; } diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 71d7705fb303..9e206f31ce2a 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -#define ATOMIC64_OP(op, c_op) \ -static inline void atomic64_##op(long long i, atomic64_t *v) \ -{ \ - long long old, c = 0; \ - while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ - c = old; \ +static inline void atomic64_and(long long i, atomic64_t *v) +{ + long long old, c = 0; + + while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + c = old; } -#define ATOMIC64_FETCH_OP(op, c_op) \ -static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ -{ \ - long long old, c = 0; \ - while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ - c = old; \ - return old; \ +static inline long long atomic64_fetch_and(long long i, atomic64_t *v) +{ + long long old, c = 0; + + while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + c = old; + + return old; } -ATOMIC64_FETCH_OP(add, +) +static inline void atomic64_or(long long i, atomic64_t *v) +{ + long long old, c = 0; -#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) + while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + c = old; +} + +static inline long long atomic64_fetch_or(long long i, atomic64_t *v) +{ + long long old, c = 0; + + while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + c = old; + + return old; +} -#define ATOMIC64_OPS(op, c_op) \ - ATOMIC64_OP(op, c_op) \ - ATOMIC64_FETCH_OP(op, c_op) +static inline void atomic64_xor(long long i, atomic64_t *v) +{ + long long old, c = 0; + + while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + c = old; +} -ATOMIC64_OPS(and, &) -ATOMIC64_OPS(or, |) -ATOMIC64_OPS(xor, ^) +static inline long long atomic64_fetch_xor(long long i, atomic64_t *v) +{ + long long old, c = 0; + + while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + c = old; + + return old; +} -#undef ATOMIC64_OPS -#undef ATOMIC64_FETCH_OP -#undef ATOMIC64_OP +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + long long old, c = 0; + + while ((old = atomic64_cmpxchg(v, c, c + i)) != c) + c = old; + + return old; +} + +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 6189a433c9a9..5d9de36a2f04 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) } #define atomic64_try_cmpxchg atomic64_try_cmpxchg -static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new) +static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) { return try_cmpxchg(&v->counter, old, new); } @@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new) */ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) { - long c = atomic64_read(v); + s64 c = atomic64_read(v); do { if (unlikely(c == u)) return false; @@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) */ static inline long atomic64_dec_if_positive(atomic64_t *v) { - long dec, c = atomic64_read(v); + s64 dec, c = atomic64_read(v); do { dec = c - 1; if (unlikely(dec < 0)) @@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) return dec; } -#define ATOMIC64_OP(op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ -{ \ - asm volatile(LOCK_PREFIX #op"q %1,%0" \ - : "+m" (v->counter) \ - : "er" (i) \ - : "memory"); \ +static inline void atomic64_and(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "andq %1,%0" + : "+m" (v->counter) + : "er" (i) + : "memory"); } -#define ATOMIC64_FETCH_OP(op, c_op) \ -static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ -{ \ - long val = atomic64_read(v); \ - do { \ - } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \ - return val; \ +static inline long atomic64_fetch_and(long i, atomic64_t *v) +{ + s64 val = atomic64_read(v); + + do { + } while (!atomic64_try_cmpxchg(v, &val, val & i)); + return val; } -#define ATOMIC64_OPS(op, c_op) \ - ATOMIC64_OP(op) \ - ATOMIC64_FETCH_OP(op, c_op) +static inline void atomic64_or(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "orq %1,%0" + : "+m" (v->counter) + : "er" (i) + : "memory"); +} -ATOMIC64_OPS(and, &) -ATOMIC64_OPS(or, |) -ATOMIC64_OPS(xor, ^) +static inline long atomic64_fetch_or(long i, atomic64_t *v) +{ + s64 val = atomic64_read(v); -#undef ATOMIC64_OPS -#undef ATOMIC64_FETCH_OP -#undef ATOMIC64_OP + do { + } while (!atomic64_try_cmpxchg(v, &val, val | i)); + return val; +} + +static inline void atomic64_xor(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "xorq %1,%0" + : "+m" (v->counter) + : "er" (i) + : "memory"); +} + +static inline long atomic64_fetch_xor(long i, atomic64_t *v) +{ + s64 val = atomic64_read(v); + + do { + } while (!atomic64_try_cmpxchg(v, &val, val ^ i)); + return val; +} #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index d90296d061e8..b5069e802d5c 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -157,7 +157,7 @@ extern void __add_wrong_size(void) #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ ({ \ bool success; \ - __typeof__(_ptr) _old = (_pold); \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ switch (size) { \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 66ac08607471..42bbbf0f173d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -177,7 +177,7 @@ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index a3de31ffb722..04330c8d9af9 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -126,15 +126,15 @@ do { \ pr_reg[4] = regs->di; \ pr_reg[5] = regs->bp; \ pr_reg[6] = regs->ax; \ - pr_reg[7] = regs->ds & 0xffff; \ - pr_reg[8] = regs->es & 0xffff; \ - pr_reg[9] = regs->fs & 0xffff; \ + pr_reg[7] = regs->ds; \ + pr_reg[8] = regs->es; \ + pr_reg[9] = regs->fs; \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ - pr_reg[13] = regs->cs & 0xffff; \ + pr_reg[13] = regs->cs; \ pr_reg[14] = regs->flags; \ pr_reg[15] = regs->sp; \ - pr_reg[16] = regs->ss & 0xffff; \ + pr_reg[16] = regs->ss; \ } while (0); #define ELF_CORE_COPY_REGS(pr_reg, regs) \ @@ -204,6 +204,7 @@ void set_personality_ia32(bool); #define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ + unsigned long base; \ unsigned v; \ (pr_reg)[0] = (regs)->r15; \ (pr_reg)[1] = (regs)->r14; \ @@ -226,8 +227,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - (pr_reg)[21] = current->thread.fsbase; \ - (pr_reg)[22] = current->thread.gsbase; \ + rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \ + rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 255645f60ca2..554cdb205d17 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, -1); + copy_kernel_to_xregs(&fpstate->xsave, mask); } else { if (use_fxsr()) copy_kernel_to_fxregs(&fpstate->fxsave); @@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpstate); + __copy_kernel_to_fpregs(fpstate, -1); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index b4c1f5453436..f4dc9b63bdda 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -41,20 +41,11 @@ "+m" (*uaddr), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)) -static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, + u32 __user *uaddr) { - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; int oldval = 0, ret, tem; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) - return -EFAULT; - pagefault_disable(); switch (op) { @@ -80,30 +71,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) pagefault_enable(); - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: - ret = (oldval == cmparg); - break; - case FUTEX_OP_CMP_NE: - ret = (oldval != cmparg); - break; - case FUTEX_OP_CMP_LT: - ret = (oldval < cmparg); - break; - case FUTEX_OP_CMP_GE: - ret = (oldval >= cmparg); - break; - case FUTEX_OP_CMP_LE: - ret = (oldval <= cmparg); - break; - case FUTEX_OP_CMP_GT: - ret = (oldval > cmparg); - break; - default: - ret = -ENOSYS; - } - } + if (!ret) + *oval = oldval; + return ret; } diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4bc6f459a8b6..c40a95c33bb8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -69,6 +69,9 @@ build_mmio_write(__writeb, "b", unsigned char, "q", ) build_mmio_write(__writew, "w", unsigned short, "r", ) build_mmio_write(__writel, "l", unsigned int, "r", ) +#define readb readb +#define readw readw +#define readl readl #define readb_relaxed(a) __readb(a) #define readw_relaxed(a) __readw(a) #define readl_relaxed(a) __readl(a) @@ -76,6 +79,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #define __raw_readw __readw #define __raw_readl __readl +#define writeb writeb +#define writew writew +#define writel writel #define writeb_relaxed(v, a) __writeb(v, a) #define writew_relaxed(v, a) __writew(v, a) #define writel_relaxed(v, a) __writel(v, a) @@ -88,13 +94,15 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #ifdef CONFIG_X86_64 build_mmio_read(readq, "q", unsigned long, "=r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "=r", ) build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +build_mmio_write(__writeq, "q", unsigned long, "r", ) -#define readq_relaxed(a) readq(a) -#define writeq_relaxed(v, a) writeq(v, a) +#define readq_relaxed(a) __readq(a) +#define writeq_relaxed(v, a) __writeq(v, a) -#define __raw_readq(a) readq(a) -#define __raw_writeq(val, addr) writeq(val, addr) +#define __raw_readq __readq +#define __raw_writeq __writeq /* Let people know that we have them */ #define readq readq @@ -119,6 +127,7 @@ static inline phys_addr_t virt_to_phys(volatile void *address) { return __pa(address); } +#define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual @@ -137,6 +146,7 @@ static inline void *phys_to_virt(phys_addr_t address) { return __va(address); } +#define phys_to_virt phys_to_virt /* * Change "struct page" to physical address. @@ -169,11 +179,14 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * else, you probably want one of the following. */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +#define ioremap_nocache ioremap_nocache extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +#define ioremap_cache ioremap_cache extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); +#define ioremap_prot ioremap_prot /** * ioremap - map bus memory into CPU space @@ -193,8 +206,10 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { return ioremap_nocache(offset, size); } +#define ioremap ioremap extern void iounmap(volatile void __iomem *addr); +#define iounmap iounmap extern void set_iounmap_nonlazy(void); @@ -203,53 +218,6 @@ extern void set_iounmap_nonlazy(void); #include <asm-generic/iomap.h> /* - * Convert a virtual cached pointer to an uncached pointer - */ -#define xlate_dev_kmem_ptr(p) p - -/** - * memset_io Set a range of I/O memory to a constant value - * @addr: The beginning of the I/O-memory range to set - * @val: The value to set the memory to - * @count: The number of bytes to set - * - * Set a range of I/O memory to a given value. - */ -static inline void -memset_io(volatile void __iomem *addr, unsigned char val, size_t count) -{ - memset((void __force *)addr, val, count); -} - -/** - * memcpy_fromio Copy a block of data from I/O memory - * @dst: The (RAM) destination for the copy - * @src: The (I/O memory) source for the data - * @count: The number of bytes to copy - * - * Copy a block of data from I/O memory. - */ -static inline void -memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) -{ - memcpy(dst, (const void __force *)src, count); -} - -/** - * memcpy_toio Copy a block of data into I/O memory - * @dst: The (I/O memory) destination for the copy - * @src: The (RAM) source for the data - * @count: The number of bytes to copy - * - * Copy a block of data to I/O memory. - */ -static inline void -memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) -{ - memcpy((void __force *)dst, src, count); -} - -/* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped * to PAGE_OFFSET is pure coincidence - it does not mean ISA values @@ -341,13 +309,38 @@ BUILDIO(b, b, char) BUILDIO(w, w, short) BUILDIO(l, , int) +#define inb inb +#define inw inw +#define inl inl +#define inb_p inb_p +#define inw_p inw_p +#define inl_p inl_p +#define insb insb +#define insw insw +#define insl insl + +#define outb outb +#define outw outw +#define outl outl +#define outb_p outb_p +#define outw_p outw_p +#define outl_p outl_p +#define outsb outsb +#define outsw outsw +#define outsl outsl + extern void *xlate_dev_mem_ptr(phys_addr_t phys); extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); +#define xlate_dev_mem_ptr xlate_dev_mem_ptr +#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr + extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); +#define ioremap_wc ioremap_wc extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); +#define ioremap_wt ioremap_wt extern bool is_early_ioremap_ptep(pte_t *ptep); @@ -365,6 +358,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, #define IO_SPACE_LIMIT 0xffff +#include <asm-generic/io.h> +#undef PCI_IOBASE + #ifdef CONFIG_MTRR extern int __must_check arch_phys_wc_index(int handle); #define arch_phys_wc_index arch_phys_wc_index diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7cbaab523f22..369e41c23f07 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -492,6 +492,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 pkru; u32 hflags; u64 efer; u64 apic_base; @@ -1374,8 +1375,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); -void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h deleted file mode 100644 index 73d0c9b92087..000000000000 --- a/arch/x86/include/asm/lguest.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _ASM_X86_LGUEST_H -#define _ASM_X86_LGUEST_H - -#define GDT_ENTRY_LGUEST_CS 10 -#define GDT_ENTRY_LGUEST_DS 11 -#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) -#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) - -#ifndef __ASSEMBLY__ -#include <asm/desc.h> - -#define GUEST_PL 1 - -/* Page for Switcher text itself, then two pages per cpu */ -#define SWITCHER_TEXT_PAGES (1) -#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids) -#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES) - -/* Where we map the Switcher, in both Host and Guest. */ -extern unsigned long switcher_addr; - -/* Found in switcher.S */ -extern unsigned long default_idt_entries[]; - -/* Declarations for definitions in arch/x86/lguest/head_32.S */ -extern char lguest_noirq_iret[]; -extern const char lgstart_cli[], lgend_cli[]; -extern const char lgstart_pushf[], lgend_pushf[]; - -extern void lguest_iret(void); -extern void lguest_init(void); - -struct lguest_regs { - /* Manually saved part. */ - unsigned long eax, ebx, ecx, edx; - unsigned long esi, edi, ebp; - unsigned long gs; - unsigned long fs, ds, es; - unsigned long trapnum, errcode; - /* Trap pushed part */ - unsigned long eip; - unsigned long cs; - unsigned long eflags; - unsigned long esp; - unsigned long ss; -}; - -/* This is a guest-specific page (mapped ro) into the guest. */ -struct lguest_ro_state { - /* Host information we need to restore when we switch back. */ - u32 host_cr3; - struct desc_ptr host_idt_desc; - struct desc_ptr host_gdt_desc; - u32 host_sp; - - /* Fields which are used when guest is running. */ - struct desc_ptr guest_idt_desc; - struct desc_ptr guest_gdt_desc; - struct x86_hw_tss guest_tss; - struct desc_struct guest_idt[IDT_ENTRIES]; - struct desc_struct guest_gdt[GDT_ENTRIES]; -}; - -struct lg_cpu_arch { - /* The GDT entries copied into lguest_ro_state when running. */ - struct desc_struct gdt[GDT_ENTRIES]; - - /* The IDT entries: some copied into lguest_ro_state when running. */ - struct desc_struct idt[IDT_ENTRIES]; - - /* The address of the last guest-visible pagefault (ie. cr2). */ - unsigned long last_pagefault; -}; - -static inline void lguest_set_ts(void) -{ - u32 cr0; - - cr0 = read_cr0(); - if (!(cr0 & 8)) - write_cr0(cr0 | 8); -} - -/* Full 4G segment descriptors, suitable for CS and DS. */ -#define FULL_EXEC_SEGMENT \ - ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff)) -#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff)) - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_LGUEST_H */ diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h deleted file mode 100644 index 6c119cfae218..000000000000 --- a/arch/x86/include/asm/lguest_hcall.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Architecture specific portion of the lguest hypercalls */ -#ifndef _ASM_X86_LGUEST_HCALL_H -#define _ASM_X86_LGUEST_HCALL_H - -#define LHCALL_FLUSH_ASYNC 0 -#define LHCALL_LGUEST_INIT 1 -#define LHCALL_SHUTDOWN 2 -#define LHCALL_NEW_PGTABLE 4 -#define LHCALL_FLUSH_TLB 5 -#define LHCALL_LOAD_IDT_ENTRY 6 -#define LHCALL_SET_STACK 7 -#define LHCALL_SET_CLOCKEVENT 9 -#define LHCALL_HALT 10 -#define LHCALL_SET_PMD 13 -#define LHCALL_SET_PTE 14 -#define LHCALL_SET_PGD 15 -#define LHCALL_LOAD_TLS 16 -#define LHCALL_LOAD_GDT_ENTRY 18 -#define LHCALL_SEND_INTERRUPTS 19 - -#define LGUEST_TRAP_ENTRY 0x1F - -/* Argument number 3 to LHCALL_LGUEST_SHUTDOWN */ -#define LGUEST_SHUTDOWN_POWEROFF 1 -#define LGUEST_SHUTDOWN_RESTART 2 - -#ifndef __ASSEMBLY__ -#include <asm/hw_irq.h> - -/*G:030 - * But first, how does our Guest contact the Host to ask for privileged - * operations? There are two ways: the direct way is to make a "hypercall", - * to make requests of the Host Itself. - * - * Our hypercall mechanism uses the highest unused trap code (traps 32 and - * above are used by real hardware interrupts). Seventeen hypercalls are - * available: the hypercall number is put in the %eax register, and the - * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. - * If a return value makes sense, it's returned in %eax. - * - * Grossly invalid calls result in Sudden Death at the hands of the vengeful - * Host, rather than returning failure. This reflects Winston Churchill's - * definition of a gentleman: "someone who is only rude intentionally". - */ -static inline unsigned long -hcall(unsigned long call, - unsigned long arg1, unsigned long arg2, unsigned long arg3, - unsigned long arg4) -{ - /* "int" is the Intel instruction to trigger a trap. */ - asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) - /* The call in %eax (aka "a") might be overwritten */ - : "=a"(call) - /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ - : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) - /* "memory" means this might write somewhere in memory. - * This isn't true for all calls, but it's safe to tell - * gcc that it might happen so it doesn't get clever. */ - : "memory"); - return call; -} -/*:*/ - -/* Can't use our min() macro here: needs to be a constant */ -#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) - -#define LHCALL_RING_SIZE 64 -struct hcall_args { - /* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */ - unsigned long arg0, arg1, arg2, arg3, arg4; -}; - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_X86_LGUEST_HCALL_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index d25d9f4abb15..7ae318c340d9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -148,9 +148,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif - init_new_context_ldt(tsk, mm); - - return 0; + return init_new_context_ldt(tsk, mm); } static inline void destroy_context(struct mm_struct *mm) { diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index e3b7819caeef..9eb7c718aaf8 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -2,6 +2,15 @@ #define _ASM_X86_MODULE_H #include <asm-generic/module.h> +#include <asm/orc_types.h> + +struct mod_arch_specific { +#ifdef CONFIG_ORC_UNWINDER + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; +#endif +}; #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h new file mode 100644 index 000000000000..91c8d868424d --- /dev/null +++ b/arch/x86/include/asm/orc_lookup.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef _ORC_LOOKUP_H +#define _ORC_LOOKUP_H + +/* + * This is a lookup table for speeding up access to the .orc_unwind table. + * Given an input address offset, the corresponding lookup table entry + * specifies a subset of the .orc_unwind table to search. + * + * Each block represents the end of the previous range and the start of the + * next range. An extra block is added to give the last range an end. + * + * The block size should be a power of 2 to avoid a costly 'div' instruction. + * + * A block size of 256 was chosen because it roughly doubles unwinder + * performance while only adding ~5% to the ORC data footprint. + */ +#define LOOKUP_BLOCK_ORDER 8 +#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER) + +#ifndef LINKER_SCRIPT + +extern unsigned int orc_lookup[]; +extern unsigned int orc_lookup_end[]; + +#define LOOKUP_START_IP (unsigned long)_stext +#define LOOKUP_STOP_IP (unsigned long)_etext + +#endif /* LINKER_SCRIPT */ + +#endif /* _ORC_LOOKUP_H */ diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h new file mode 100644 index 000000000000..9c9dc579bd7d --- /dev/null +++ b/arch/x86/include/asm/orc_types.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _ORC_TYPES_H +#define _ORC_TYPES_H + +#include <linux/types.h> +#include <linux/compiler.h> + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and BP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is + * usually based on. + * + * The rest of the base registers are needed for special cases like entry code + * and GCC realigned stacks. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_DX 2 +#define ORC_REG_DI 3 +#define ORC_REG_BP 4 +#define ORC_REG_SP 5 +#define ORC_REG_R10 6 +#define ORC_REG_R13 7 +#define ORC_REG_BP_INDIRECT 8 +#define ORC_REG_SP_INDIRECT 9 +#define ORC_REG_MAX 15 + +/* + * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the + * caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points + * to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset + * points to the iret return frame. + * + * The UNWIND_HINT macros are used only for the unwind_hint struct. They + * aren't used in struct orc_entry due to size and complexity constraints. + * Objtool converts them to real types when it converts the hints to orc + * entries. + */ +#define ORC_TYPE_CALL 0 +#define ORC_TYPE_REGS 1 +#define ORC_TYPE_REGS_IRET 2 +#define UNWIND_HINT_TYPE_SAVE 3 +#define UNWIND_HINT_TYPE_RESTORE 4 + +#ifndef __ASSEMBLY__ +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and BP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + s16 sp_offset; + s16 bp_offset; + unsigned sp_reg:4; + unsigned bp_reg:4; + unsigned type:2; +} __packed; + +/* + * This struct is used by asm and inline asm code to manually annotate the + * location of registers on the stack for the ORC unwinder. + * + * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. + */ +struct unwind_hint { + u32 ip; + s16 sp_offset; + u8 sp_reg; + u8 type; +}; +#endif /* __ASSEMBLY__ */ + +#endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c61bab07a84e..3fa26a61eabc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -22,6 +22,7 @@ struct vm86; #include <asm/nops.h> #include <asm/special_insns.h> #include <asm/fpu/types.h> +#include <asm/unwind_hints.h> #include <linux/personality.h> #include <linux/cache.h> @@ -667,7 +668,7 @@ static inline void sync_core(void) * In case NMI unmasking or performance ever becomes a problem, * the next best option appears to be MOV-to-CR2 and an * unconditional jump. That sequence also works on all CPUs, - * but it will fault at CPL3 (i.e. Xen PV and lguest). + * but it will fault at CPL3 (i.e. Xen PV). * * CPUID is the conventional way, but it's nasty: it doesn't * exist on some 486-like CPUs, and it usually exits to a @@ -690,6 +691,7 @@ static inline void sync_core(void) unsigned int tmp; asm volatile ( + UNWIND_HINT_SAVE "mov %%ss, %0\n\t" "pushq %q0\n\t" "pushq %%rsp\n\t" @@ -699,6 +701,7 @@ static inline void sync_core(void) "pushq %q0\n\t" "pushq $1f\n\t" "iretq\n\t" + UNWIND_HINT_RESTORE "1:" : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); #endif diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 2b5d686ea9f3..91c04c8e67fa 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -9,6 +9,20 @@ #ifdef __i386__ struct pt_regs { + /* + * NB: 32-bit x86 CPUs are inconsistent as what happens in the + * following cases (where %seg represents a segment register): + * + * - pushl %seg: some do a 16-bit write and leave the high + * bits alone + * - movl %seg, [mem]: some do a 16-bit write despite the movl + * - IDT entry: some (e.g. 486) will leave the high bits of CS + * and (if applicable) SS undefined. + * + * Fortunately, x86-32 doesn't read the high bits on POP or IRET, + * so we can just treat all of the segment registers as 16-bit + * values. + */ unsigned long bx; unsigned long cx; unsigned long dx; @@ -16,16 +30,22 @@ struct pt_regs { unsigned long di; unsigned long bp; unsigned long ax; - unsigned long ds; - unsigned long es; - unsigned long fs; - unsigned long gs; + unsigned short ds; + unsigned short __dsh; + unsigned short es; + unsigned short __esh; + unsigned short fs; + unsigned short __fsh; + unsigned short gs; + unsigned short __gsh; unsigned long orig_ax; unsigned long ip; - unsigned long cs; + unsigned short cs; + unsigned short __csh; unsigned long flags; unsigned long sp; - unsigned long ss; + unsigned short ss; + unsigned short __ssh; }; #else /* __i386__ */ @@ -176,6 +196,17 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, if (offset == offsetof(struct pt_regs, sp) && regs->cs == __KERNEL_CS) return kernel_stack_pointer(regs); + + /* The selector fields are 16-bit. */ + if (offset == offsetof(struct pt_regs, cs) || + offset == offsetof(struct pt_regs, ss) || + offset == offsetof(struct pt_regs, ds) || + offset == offsetof(struct pt_regs, es) || + offset == offsetof(struct pt_regs, fs) || + offset == offsetof(struct pt_regs, gs)) { + return *(u16 *)((unsigned long)regs + offset); + + } #endif return *(unsigned long *)((unsigned long)regs + offset); } diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h new file mode 100644 index 000000000000..ff871210b9f2 --- /dev/null +++ b/arch/x86/include/asm/refcount.h @@ -0,0 +1,109 @@ +#ifndef __ASM_X86_REFCOUNT_H +#define __ASM_X86_REFCOUNT_H +/* + * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from + * PaX/grsecurity. + */ +#include <linux/refcount.h> + +/* + * This is the first portion of the refcount error handling, which lives in + * .text.unlikely, and is jumped to from the CPU flag check (in the + * following macros). This saves the refcount value location into CX for + * the exception handler to use (in mm/extable.c), and then triggers the + * central refcount exception. The fixup address for the exception points + * back to the regular execution flow in .text. + */ +#define _REFCOUNT_EXCEPTION \ + ".pushsection .text.unlikely\n" \ + "111:\tlea %[counter], %%" _ASM_CX "\n" \ + "112:\t" ASM_UD0 "\n" \ + ASM_UNREACHABLE \ + ".popsection\n" \ + "113:\n" \ + _ASM_EXTABLE_REFCOUNT(112b, 113b) + +/* Trigger refcount exception if refcount result is negative. */ +#define REFCOUNT_CHECK_LT_ZERO \ + "js 111f\n\t" \ + _REFCOUNT_EXCEPTION + +/* Trigger refcount exception if refcount result is zero or negative. */ +#define REFCOUNT_CHECK_LE_ZERO \ + "jz 111f\n\t" \ + REFCOUNT_CHECK_LT_ZERO + +/* Trigger refcount exception unconditionally. */ +#define REFCOUNT_ERROR \ + "jmp 111f\n\t" \ + _REFCOUNT_EXCEPTION + +static __always_inline void refcount_add(unsigned int i, refcount_t *r) +{ + asm volatile(LOCK_PREFIX "addl %1,%0\n\t" + REFCOUNT_CHECK_LT_ZERO + : [counter] "+m" (r->refs.counter) + : "ir" (i) + : "cc", "cx"); +} + +static __always_inline void refcount_inc(refcount_t *r) +{ + asm volatile(LOCK_PREFIX "incl %0\n\t" + REFCOUNT_CHECK_LT_ZERO + : [counter] "+m" (r->refs.counter) + : : "cc", "cx"); +} + +static __always_inline void refcount_dec(refcount_t *r) +{ + asm volatile(LOCK_PREFIX "decl %0\n\t" + REFCOUNT_CHECK_LE_ZERO + : [counter] "+m" (r->refs.counter) + : : "cc", "cx"); +} + +static __always_inline __must_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, + r->refs.counter, "er", i, "%0", e); +} + +static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) +{ + GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, + r->refs.counter, "%0", e); +} + +static __always_inline __must_check +bool refcount_add_not_zero(unsigned int i, refcount_t *r) +{ + int c, result; + + c = atomic_read(&(r->refs)); + do { + if (unlikely(c == 0)) + return false; + + result = c + i; + + /* Did we try to increment from/to an undesirable state? */ + if (unlikely(c < 0 || c == INT_MAX || result < c)) { + asm volatile(REFCOUNT_ERROR + : : [counter] "m" (r->refs.counter) + : "cc", "cx"); + break; + } + + } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); + + return c != 0; +} + +static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) +{ + return refcount_add_not_zero(1, r); +} + +#endif diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 661dd305694a..045f99211a99 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -1,45 +1,56 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc +#define __CLOBBERS_MEM "memory" +#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx" + #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) /* Use asm goto */ -#define __GEN_RMWcc(fullop, var, cc, ...) \ +#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ do { \ asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ - : : "m" (var), ## __VA_ARGS__ \ - : "memory" : cc_label); \ + : : [counter] "m" (var), ## __VA_ARGS__ \ + : clobbers : cc_label); \ return 0; \ cc_label: \ return 1; \ } while (0) -#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc) +#define __BINARY_RMWcc_ARG " %1, " -#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ - __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ /* Use flags output or a set instruction */ -#define __GEN_RMWcc(fullop, var, cc, ...) \ +#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ do { \ bool c; \ asm volatile (fullop ";" CC_SET(cc) \ - : "+m" (var), CC_OUT(cc) (c) \ - : __VA_ARGS__ : "memory"); \ + : [counter] "+m" (var), CC_OUT(cc) (c) \ + : __VA_ARGS__ : clobbers); \ return c; \ } while (0) +#define __BINARY_RMWcc_ARG " %2, " + +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ + #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc) + __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) + +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ + __CLOBBERS_MEM_CC_CX) #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ - __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) + __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ + __CLOBBERS_MEM, vcon (val)) -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ + __CLOBBERS_MEM_CC_CX, vcon (val)) #endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e00e1bd6e7b3..5161da1a0fa0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -98,6 +98,7 @@ struct thread_info { #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ #define TIF_X32 30 /* 32-bit native x86-64 binary */ +#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -122,6 +123,7 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_X32 (1 << TIF_X32) +#define _TIF_FSCHECK (1 << TIF_FSCHECK) /* * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for @@ -137,7 +139,8 @@ struct thread_info { (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \ _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \ - _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT) + _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_FSCHECK) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 6358a85e2270..c1d2a9892352 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -75,12 +75,6 @@ static inline const struct cpumask *cpumask_of_node(int node) extern void setup_node_to_cpumask_map(void); -/* - * Returns the number of the node containing Node 'node'. This - * architecture is flat, so it is a pretty simple function! - */ -#define parent_node(node) (node) - #define pcibus_to_node(bus) __pcibus_to_node(bus) extern int __node_distance(int, int); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 30269dafec47..184eb9894dae 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -26,7 +26,12 @@ #define get_ds() (KERNEL_DS) #define get_fs() (current->thread.addr_limit) -#define set_fs(x) (current->thread.addr_limit = (x)) +static inline void set_fs(mm_segment_t fs) +{ + current->thread.addr_limit = fs; + /* On user-mode return, check fs is correct */ + set_thread_flag(TIF_FSCHECK); +} #define segment_eq(a, b) ((a).seg == (b).seg) diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index e6676495b125..e9f793e2df7a 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -12,11 +12,14 @@ struct unwind_state { struct task_struct *task; int graph_idx; bool error; -#ifdef CONFIG_FRAME_POINTER +#if defined(CONFIG_ORC_UNWINDER) + bool signal, full_regs; + unsigned long sp, bp, ip; + struct pt_regs *regs; +#elif defined(CONFIG_FRAME_POINTER_UNWINDER) bool got_irq; - unsigned long *bp, *orig_sp; + unsigned long *bp, *orig_sp, ip; struct pt_regs *regs; - unsigned long ip; #else unsigned long *sp; #endif @@ -24,41 +27,30 @@ struct unwind_state { void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame); - bool unwind_next_frame(struct unwind_state *state); - unsigned long unwind_get_return_address(struct unwind_state *state); +unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); static inline bool unwind_done(struct unwind_state *state) { return state->stack_info.type == STACK_TYPE_UNKNOWN; } -static inline -void unwind_start(struct unwind_state *state, struct task_struct *task, - struct pt_regs *regs, unsigned long *first_frame) -{ - first_frame = first_frame ? : get_stack_pointer(task, regs); - - __unwind_start(state, task, regs, first_frame); -} - static inline bool unwind_error(struct unwind_state *state) { return state->error; } -#ifdef CONFIG_FRAME_POINTER - static inline -unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame) { - if (unwind_done(state)) - return NULL; + first_frame = first_frame ? : get_stack_pointer(task, regs); - return state->regs ? &state->regs->ip : state->bp + 1; + __unwind_start(state, task, regs, first_frame); } +#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) { if (unwind_done(state)) @@ -66,20 +58,46 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) return state->regs; } - -#else /* !CONFIG_FRAME_POINTER */ - -static inline -unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) +#else +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) { return NULL; } +#endif -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +#ifdef CONFIG_ORC_UNWINDER +void unwind_init(void); +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, + void *orc, size_t orc_size); +#else +static inline void unwind_init(void) {} +static inline +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, + void *orc, size_t orc_size) {} +#endif + +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + if (task == current) \ + val = READ_ONCE(x); \ + else \ + val = READ_ONCE_NOCHECK(x); \ + val; \ +}) + +static inline bool task_on_another_cpu(struct task_struct *task) { - return NULL; +#ifdef CONFIG_SMP + return task != current && task->on_cpu; +#else + return false; +#endif } -#endif /* CONFIG_FRAME_POINTER */ - #endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h new file mode 100644 index 000000000000..bae46fc6b9de --- /dev/null +++ b/arch/x86/include/asm/unwind_hints.h @@ -0,0 +1,105 @@ +#ifndef _ASM_X86_UNWIND_HINTS_H +#define _ASM_X86_UNWIND_HINTS_H + +#include "orc_types.h" + +#ifdef __ASSEMBLY__ + +/* + * In asm, there are two kinds of code: normal C-type callable functions and + * the rest. The normal callable functions can be called by other code, and + * don't do anything unusual with the stack. Such normal callable functions + * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * category. In this case, no special debugging annotations are needed because + * objtool can automatically generate the ORC data for the ORC unwinder to read + * at runtime. + * + * Anything which doesn't fall into the above category, such as syscall and + * interrupt handlers, tends to not be called directly by other functions, and + * often does unusual non-C-function-type things with the stack pointer. Such + * code needs to be annotated such that objtool can understand it. The + * following CFI hint macros are for this type of code. + * + * These macros provide hints to objtool about the state of the stack at each + * instruction. Objtool starts from the hints and follows the code flow, + * making automatic CFI adjustments when it sees pushes and pops, filling out + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL +#ifdef CONFIG_STACK_VALIDATION +.Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ + .long .Lunwind_hint_ip_\@ - . + .short \sp_offset + .byte \sp_reg + .byte \type + .popsection +#endif +.endm + +.macro UNWIND_HINT_EMPTY + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED +.endm + +.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 + .if \base == %rsp + .if \indirect + .set sp_reg, ORC_REG_SP_INDIRECT + .else + .set sp_reg, ORC_REG_SP + .endif + .elseif \base == %rbp + .set sp_reg, ORC_REG_BP + .elseif \base == %rdi + .set sp_reg, ORC_REG_DI + .elseif \base == %rdx + .set sp_reg, ORC_REG_DX + .elseif \base == %r10 + .set sp_reg, ORC_REG_R10 + .else + .error "UNWIND_HINT_REGS: bad base register" + .endif + + .set sp_offset, \offset + + .if \iret + .set type, ORC_TYPE_REGS_IRET + .elseif \extra == 0 + .set type, ORC_TYPE_REGS_IRET + .set sp_offset, \offset + (16*8) + .else + .set type, ORC_TYPE_REGS + .endif + + UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type +.endm + +.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 + UNWIND_HINT_REGS base=\base offset=\offset iret=1 +.endm + +.macro UNWIND_HINT_FUNC sp_offset=8 + UNWIND_HINT sp_offset=\sp_offset +.endm + +#else /* !__ASSEMBLY__ */ + +#define UNWIND_HINT(sp_reg, sp_offset, type) \ + "987: \n\t" \ + ".pushsection .discard.unwind_hints\n\t" \ + /* struct unwind_hint */ \ + ".long 987b - .\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ + ".byte " __stringify(sp_reg) "\n\t" \ + ".byte " __stringify(type) "\n\t" \ + ".popsection\n\t" + +#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) + +#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index ddef37b16af2..66b8f93333d1 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -201,7 +201,7 @@ struct boot_params { * * @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard * PC mechanisms (PCI, ACPI) and doesn't need a special boot flow. - * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest + * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest, deprecated * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path, * which start at asm startup_xen() entry point and later jump to the C * xen_start_kernel() entry point. Both domU and dom0 type of guests are |