38 files changed, 1196 insertions, 354 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6a5de5643e0b..cac0ba127576 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2211,6 +2211,8 @@ KVM_S390_SIGP_STOP (vcpu) - sigp restart
 KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
 KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
 KVM_S390_RESTART (vcpu) - restart
+KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
+KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
 KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
 			   parameters in parm and parm64
 KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 0d45f6fe734f..a27f5007062a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -32,8 +32,10 @@
 #define KVM_NR_IRQCHIPS 1
 #define KVM_IRQCHIP_NUM_PINS 4096
 
+#define SIGP_CTRL_C	0x00800000
+
 struct sca_entry {
-	atomic_t scn;
+	atomic_t ctrl;
 	__u32	reserved;
 	__u64	sda;
 	__u64	reserved2[2];
@@ -72,6 +74,7 @@ struct sca_block {
 #define CPUSTAT_ZARCH      0x00000800
 #define CPUSTAT_MCDS       0x00000100
 #define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
 #define CPUSTAT_G          0x00000008
 #define CPUSTAT_GED        0x00000004
 #define CPUSTAT_J          0x00000002
@@ -79,7 +82,9 @@ struct sca_block {
 
 struct kvm_s390_sie_block {
 	atomic_t cpuflags;		/* 0x0000 */
-	__u32	prefix;			/* 0x0004 */
+	__u32 : 1;			/* 0x0004 */
+	__u32 prefix : 18;
+	__u32 : 13;
 	__u8	reserved08[4];		/* 0x0008 */
 #define PROG_IN_SIE (1<<0)
 	__u32	prog0c;			/* 0x000c */
@@ -131,7 +136,10 @@ struct kvm_s390_sie_block {
 	psw_t	gpsw;			/* 0x0090 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg15;			/* 0x00a8 */
-	__u8	reservedb0[28];		/* 0x00b0 */
+	__u8	reservedb0[20];		/* 0x00b0 */
+	__u16	extcpuaddr;		/* 0x00c4 */
+	__u16	eic;			/* 0x00c6 */
+	__u32	reservedc8;		/* 0x00c8 */
 	__u16	pgmilc;			/* 0x00cc */
 	__u16	iprcc;			/* 0x00ce */
 	__u32	dxc;			/* 0x00d0 */
@@ -411,6 +419,7 @@ struct kvm_arch{
 	int use_cmma;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
+	spinlock_t start_stop_lock;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 943d43451116..1aba89b53cb9 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -66,5 +66,6 @@ int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 unsigned long sclp_get_hsa_size(void);
 void sclp_early_detect(void);
 int sclp_has_siif(void);
+unsigned int sclp_get_ibc(void);
 
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 000000000000..3d97f610198d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,245 @@
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#include <asm/sigp.h>
+
+#define diagnose_codes						\
+	{ 0x10, "DIAG (0x10) release pages" },			\
+	{ 0x44, "DIAG (0x44) time slice end" },			\
+	{ 0x9c, "DIAG (0x9c) time slice end directed" },	\
+	{ 0x204, "DIAG (0x204) logical-cpu utilization" },	\
+	{ 0x258, "DIAG (0x258) page-reference services" },	\
+	{ 0x308, "DIAG (0x308) ipl functions" },		\
+	{ 0x500, "DIAG (0x500) KVM virtio functions" },		\
+	{ 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes						\
+	{ SIGP_SENSE, "SIGP sense" },					\
+	{ SIGP_EXTERNAL_CALL, "SIGP external call" },			\
+	{ SIGP_EMERGENCY_SIGNAL, "SIGP emergency signal" },		\
+	{ SIGP_STOP, "SIGP stop" },					\
+	{ SIGP_STOP_AND_STORE_STATUS, "SIGP stop and store status" },	\
+	{ SIGP_SET_ARCHITECTURE, "SIGP set architecture" },		\
+	{ SIGP_SET_PREFIX, "SIGP set prefix" },				\
+	{ SIGP_SENSE_RUNNING, "SIGP sense running" },			\
+	{ SIGP_RESTART, "SIGP restart" },				\
+	{ SIGP_INITIAL_CPU_RESET, "SIGP initial cpu reset" },		\
+	{ SIGP_STORE_STATUS_AT_ADDRESS, "SIGP store status at address" }
+
+#define icpt_prog_codes						\
+	{ 0x0001, "Prog Operation" },				\
+	{ 0x0002, "Prog Privileged Operation" },		\
+	{ 0x0003, "Prog Execute" },				\
+	{ 0x0004, "Prog Protection" },				\
+	{ 0x0005, "Prog Addressing" },				\
+	{ 0x0006, "Prog Specification" },			\
+	{ 0x0007, "Prog Data" },				\
+	{ 0x0008, "Prog Fixedpoint overflow" },			\
+	{ 0x0009, "Prog Fixedpoint divide" },			\
+	{ 0x000A, "Prog Decimal overflow" },			\
+	{ 0x000B, "Prog Decimal divide" },			\
+	{ 0x000C, "Prog HFP exponent overflow" },		\
+	{ 0x000D, "Prog HFP exponent underflow" },		\
+	{ 0x000E, "Prog HFP significance" },			\
+	{ 0x000F, "Prog HFP divide" },				\
+	{ 0x0010, "Prog Segment translation" },			\
+	{ 0x0011, "Prog Page translation" },			\
+	{ 0x0012, "Prog Translation specification" },		\
+	{ 0x0013, "Prog Special operation" },			\
+	{ 0x0015, "Prog Operand" },				\
+	{ 0x0016, "Prog Trace table" },				\
+	{ 0x0017, "Prog ASNtranslation specification" },	\
+	{ 0x001C, "Prog Spaceswitch event" },			\
+	{ 0x001D, "Prog HFP square root" },			\
+	{ 0x001F, "Prog PCtranslation specification" },		\
+	{ 0x0020, "Prog AFX translation" },			\
+	{ 0x0021, "Prog ASX translation" },			\
+	{ 0x0022, "Prog LX translation" },			\
+	{ 0x0023, "Prog EX translation" },			\
+	{ 0x0024, "Prog Primary authority" },			\
+	{ 0x0025, "Prog Secondary authority" },			\
+	{ 0x0026, "Prog LFXtranslation exception" },		\
+	{ 0x0027, "Prog LSXtranslation exception" },		\
+	{ 0x0028, "Prog ALET specification" },			\
+	{ 0x0029, "Prog ALEN translation" },			\
+	{ 0x002A, "Prog ALE sequence" },			\
+	{ 0x002B, "Prog ASTE validity" },			\
+	{ 0x002C, "Prog ASTE sequence" },			\
+	{ 0x002D, "Prog Extended authority" },			\
+	{ 0x002E, "Prog LSTE sequence" },			\
+	{ 0x002F, "Prog ASTE instance" },			\
+	{ 0x0030, "Prog Stack full" },				\
+	{ 0x0031, "Prog Stack empty" },				\
+	{ 0x0032, "Prog Stack specification" },			\
+	{ 0x0033, "Prog Stack type" },				\
+	{ 0x0034, "Prog Stack operation" },			\
+	{ 0x0039, "Prog Region first translation" },		\
+	{ 0x003A, "Prog Region second translation" },		\
+	{ 0x003B, "Prog Region third translation" },		\
+	{ 0x0040, "Prog Monitor event" },			\
+	{ 0x0080, "Prog PER event" },				\
+	{ 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic)		\
+	{ (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic)			\
+	{ opcode, mnemonic }
+
+#define icpt_insn_codes				\
+	exit_code_ipa0(0x01, 0x01, "PR"),	\
+	exit_code_ipa0(0x01, 0x04, "PTFF"),	\
+	exit_code_ipa0(0x01, 0x07, "SCKPF"),	\
+	exit_code_ipa0(0xAA, 0x00, "RINEXT"),	\
+	exit_code_ipa0(0xAA, 0x01, "RION"),	\
+	exit_code_ipa0(0xAA, 0x02, "TRIC"),	\
+	exit_code_ipa0(0xAA, 0x03, "RIOFF"),	\
+	exit_code_ipa0(0xAA, 0x04, "RIEMIT"),	\
+	exit_code_ipa0(0xB2, 0x02, "STIDP"),	\
+	exit_code_ipa0(0xB2, 0x04, "SCK"),	\
+	exit_code_ipa0(0xB2, 0x05, "STCK"),	\
+	exit_code_ipa0(0xB2, 0x06, "SCKC"),	\
+	exit_code_ipa0(0xB2, 0x07, "STCKC"),	\
+	exit_code_ipa0(0xB2, 0x08, "SPT"),	\
+	exit_code_ipa0(0xB2, 0x09, "STPT"),	\
+	exit_code_ipa0(0xB2, 0x0d, "PTLB"),	\
+	exit_code_ipa0(0xB2, 0x10, "SPX"),	\
+	exit_code_ipa0(0xB2, 0x11, "STPX"),	\
+	exit_code_ipa0(0xB2, 0x12, "STAP"),	\
+	exit_code_ipa0(0xB2, 0x14, "SIE"),	\
+	exit_code_ipa0(0xB2, 0x16, "SETR"),	\
+	exit_code_ipa0(0xB2, 0x17, "STETR"),	\
+	exit_code_ipa0(0xB2, 0x18, "PC"),	\
+	exit_code_ipa0(0xB2, 0x20, "SERVC"),	\
+	exit_code_ipa0(0xB2, 0x28, "PT"),	\
+	exit_code_ipa0(0xB2, 0x29, "ISKE"),	\
+	exit_code_ipa0(0xB2, 0x2a, "RRBE"),	\
+	exit_code_ipa0(0xB2, 0x2b, "SSKE"),	\
+	exit_code_ipa0(0xB2, 0x2c, "TB"),	\
+	exit_code_ipa0(0xB2, 0x2e, "PGIN"),	\
+	exit_code_ipa0(0xB2, 0x2f, "PGOUT"),	\
+	exit_code_ipa0(0xB2, 0x30, "CSCH"),	\
+	exit_code_ipa0(0xB2, 0x31, "HSCH"),	\
+	exit_code_ipa0(0xB2, 0x32, "MSCH"),	\
+	exit_code_ipa0(0xB2, 0x33, "SSCH"),	\
+	exit_code_ipa0(0xB2, 0x34, "STSCH"),	\
+	exit_code_ipa0(0xB2, 0x35, "TSCH"),	\
+	exit_code_ipa0(0xB2, 0x36, "TPI"),	\
+	exit_code_ipa0(0xB2, 0x37, "SAL"),	\
+	exit_code_ipa0(0xB2, 0x38, "RSCH"),	\
+	exit_code_ipa0(0xB2, 0x39, "STCRW"),	\
+	exit_code_ipa0(0xB2, 0x3a, "STCPS"),	\
+	exit_code_ipa0(0xB2, 0x3b, "RCHP"),	\
+	exit_code_ipa0(0xB2, 0x3c, "SCHM"),	\
+	exit_code_ipa0(0xB2, 0x40, "BAKR"),	\
+	exit_code_ipa0(0xB2, 0x48, "PALB"),	\
+	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
+	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
+	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
+	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
+	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
+	exit_code_ipa0(0xB2, 0x74, "SIGA"),	\
+	exit_code_ipa0(0xB2, 0x76, "XSCH"),	\
+	exit_code_ipa0(0xB2, 0x78, "STCKE"),	\
+	exit_code_ipa0(0xB2, 0x7c, "STCKF"),	\
+	exit_code_ipa0(0xB2, 0x7d, "STSI"),	\
+	exit_code_ipa0(0xB2, 0xb0, "STFLE"),	\
+	exit_code_ipa0(0xB2, 0xb1, "STFL"),	\
+	exit_code_ipa0(0xB2, 0xb2, "LPSWE"),	\
+	exit_code_ipa0(0xB2, 0xf8, "TEND"),	\
+	exit_code_ipa0(0xB2, 0xfc, "TABORT"),	\
+	exit_code_ipa0(0xB9, 0x1e, "KMAC"),	\
+	exit_code_ipa0(0xB9, 0x28, "PCKMO"),	\
+	exit_code_ipa0(0xB9, 0x2a, "KMF"),	\
+	exit_code_ipa0(0xB9, 0x2b, "KMO"),	\
+	exit_code_ipa0(0xB9, 0x2d, "KMCTR"),	\
+	exit_code_ipa0(0xB9, 0x2e, "KM"),	\
+	exit_code_ipa0(0xB9, 0x2f, "KMC"),	\
+	exit_code_ipa0(0xB9, 0x3e, "KIMD"),	\
+	exit_code_ipa0(0xB9, 0x3f, "KLMD"),	\
+	exit_code_ipa0(0xB9, 0x8a, "CSPG"),	\
+	exit_code_ipa0(0xB9, 0x8d, "EPSW"),	\
+	exit_code_ipa0(0xB9, 0x8e, "IDTE"),	\
+	exit_code_ipa0(0xB9, 0x8f, "CRDTE"),	\
+	exit_code_ipa0(0xB9, 0x9c, "EQBS"),	\
+	exit_code_ipa0(0xB9, 0xa2, "PTF"),	\
+	exit_code_ipa0(0xB9, 0xab, "ESSA"),	\
+	exit_code_ipa0(0xB9, 0xae, "RRBM"),	\
+	exit_code_ipa0(0xB9, 0xaf, "PFMF"),	\
+	exit_code_ipa0(0xE3, 0x03, "LRAG"),	\
+	exit_code_ipa0(0xE3, 0x13, "LRAY"),	\
+	exit_code_ipa0(0xE3, 0x25, "NTSTG"),	\
+	exit_code_ipa0(0xE5, 0x00, "LASP"),	\
+	exit_code_ipa0(0xE5, 0x01, "TPROT"),	\
+	exit_code_ipa0(0xE5, 0x60, "TBEGIN"),	\
+	exit_code_ipa0(0xE5, 0x61, "TBEGINC"),	\
+	exit_code_ipa0(0xEB, 0x25, "STCTG"),	\
+	exit_code_ipa0(0xEB, 0x2f, "LCTLG"),	\
+	exit_code_ipa0(0xEB, 0x60, "LRIC"),	\
+	exit_code_ipa0(0xEB, 0x61, "STRIC"),	\
+	exit_code_ipa0(0xEB, 0x62, "MRIC"),	\
+	exit_code_ipa0(0xEB, 0x8a, "SQBS"),	\
+	exit_code_ipa0(0xC8, 0x01, "ECTG"),	\
+	exit_code(0x0a, "SVC"),			\
+	exit_code(0x80, "SSM"),			\
+	exit_code(0x82, "LPSW"),		\
+	exit_code(0x83, "DIAG"),		\
+	exit_code(0xae, "SIGP"),		\
+	exit_code(0xac, "STNSM"),		\
+	exit_code(0xad, "STOSM"),		\
+	exit_code(0xb1, "LRA"),			\
+	exit_code(0xb6, "STCTL"),		\
+	exit_code(0xb7, "LCTL"),		\
+	exit_code(0xee, "PLO")
+
+#define sie_intercept_code					\
+	{ 0x00, "Host interruption" },				\
+	{ 0x04, "Instruction" },				\
+	{ 0x08, "Program interruption" },			\
+	{ 0x0c, "Instruction and program interruption" },	\
+	{ 0x10, "External request" },				\
+	{ 0x14, "External interruption" },			\
+	{ 0x18, "I/O request" },				\
+	{ 0x1c, "Wait state" },					\
+	{ 0x20, "Validity" },					\
+	{ 0x28, "Stop request" },				\
+	{ 0x2c, "Operation exception" },			\
+	{ 0x38, "Partial-execution" },				\
+	{ 0x3c, "I/O interruption" },				\
+	{ 0x40, "I/O instruction" },				\
+	{ 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask)		\
+	(insn >> 56) == (ipa0) ?				\
+		((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn)			\
+	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
+	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xb9, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe3, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
+	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
+	INSN_DECODE(insn)
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 5521ace8b60d..0161675878a2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -23,7 +23,7 @@
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 {
 	unsigned long start, end;
-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
@@ -176,7 +176,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	}
 
-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	kvm_s390_vcpu_stop(vcpu);
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 691fdb776c90..db608c3f9303 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -643,3 +643,31 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 	}
 	return rc;
 }
+
+/**
+ * kvm_s390_check_low_addr_protection - check for low-address protection
+ * @ga: Guest address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec_bits;
+
+	if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+		return 0;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = FSI_STORE;
+	tec_bits->as = psw_bits(*psw).as;
+	tec_bits->addr = ga >> PAGE_SHIFT;
+	pgm->code = PGM_PROTECTION;
+
+	return pgm->code;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 1079c8fc6d0d..a07ee08ac478 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -30,7 +30,7 @@
 static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
 						 unsigned long gra)
 {
-	unsigned long prefix = vcpu->arch.sie_block->prefix;
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
 
 	if (gra < 2 * PAGE_SIZE)
 		gra += prefix;
@@ -99,7 +99,7 @@ static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
 	unsigned long __gpa;					\
 								\
 	__gpa = (unsigned long)(gra);				\
-	__gpa += __vcpu->arch.sie_block->prefix;		\
+	__gpa += kvm_s390_get_prefix(__vcpu);			\
 	kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x));	\
 })
 
@@ -124,7 +124,7 @@ static inline __must_check
 int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 		   unsigned long len)
 {
-	unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
 
 	return kvm_write_guest(vcpu->kvm, gpa, data, len);
 }
@@ -150,7 +150,7 @@ static inline __must_check
 int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 		  unsigned long len)
 {
-	unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
 
 	return kvm_read_guest(vcpu->kvm, gpa, data, len);
 }
@@ -325,5 +325,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 }
 
 int ipte_lock_held(struct kvm_vcpu *vcpu);
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 757ccef62fd5..3e8d4092ce30 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -223,9 +223,10 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
 		goto error;
 	}
 
-	ret = copy_from_user(bp_data, dbg->arch.hw_bp, size);
-	if (ret)
+	if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
+		ret = -EFAULT;
 		goto error;
+	}
 
 	for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
 		switch (bp_data[i].type) {
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 30e1c5eb726a..a0b586c1913c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,7 +1,7 @@
 /*
  * in-kernel handling for sie intercepts
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -17,6 +17,7 @@
 
 #include <asm/kvm_host.h>
 #include <asm/asm-offsets.h>
+#include <asm/irq.h>
 
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -46,9 +47,6 @@ static int handle_noop(struct kvm_vcpu *vcpu)
 	case 0x10:
 		vcpu->stat.exit_external_request++;
 		break;
-	case 0x14:
-		vcpu->stat.exit_external_interrupt++;
-		break;
 	default:
 		break; /* nothing */
 	}
@@ -65,8 +63,7 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
 
 	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
-		atomic_set_mask(CPUSTAT_STOPPED,
-				&vcpu->arch.sie_block->cpuflags);
+		kvm_s390_vcpu_stop(vcpu);
 		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
 		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
 		rc = -EOPNOTSUPP;
@@ -198,6 +195,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_pgm_info pgm_info;
+	psw_t psw;
 	int rc;
 
 	vcpu->stat.exit_program_interruption++;
@@ -210,7 +208,14 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
-
+	if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+		rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+		if (rc)
+			return rc;
+		/* Avoid endless loops of specification exceptions */
+		if (!is_valid_psw(&psw))
+			return -EOPNOTSUPP;
+	}
 	rc = handle_itdb(vcpu);
 	if (rc)
 		return rc;
@@ -234,17 +239,110 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 	return rc2;
 }
 
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ *
+ * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+ * the new PSW does not have external interrupts disabled. In the first case,
+ * we've got to deliver the interrupt manually, and in the second case, we
+ * drop to userspace to handle the situation there.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+	u16 eic = vcpu->arch.sie_block->eic;
+	struct kvm_s390_interrupt irq;
+	psw_t newpsw;
+	int rc;
+
+	vcpu->stat.exit_external_interrupt++;
+
+	rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+	if (rc)
+		return rc;
+	/* We can not handle clock comparator or timer interrupt with bad PSW */
+	if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+	    (newpsw.mask & PSW_MASK_EXT))
+		return -EOPNOTSUPP;
+
+	switch (eic) {
+	case EXT_IRQ_CLK_COMP:
+		irq.type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case EXT_IRQ_CPU_TIMER:
+		irq.type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case EXT_IRQ_EXTERNAL_CALL:
+		if (kvm_s390_si_ext_call_pending(vcpu))
+			return 0;
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.parm = vcpu->arch.sie_block->extcpuaddr;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * Handle MOVE PAGE partial execution interception.
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long srcaddr, dstaddr;
+	int reg1, reg2, rc;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	/* Make sure that the source is paged-in */
+	srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
+	if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+	if (rc != 0)
+		return rc;
+
+	/* Make sure that the destination is paged-in */
+	dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
+	if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+	if (rc != 0)
+		return rc;
+
+	psw->addr = __rewind_psw(*psw, 4);
+
+	return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->ipa == 0xb254)	/* MVPG */
+		return handle_mvpg_pei(vcpu);
+	if (vcpu->arch.sie_block->ipa >> 8 == 0xae)	/* SIGP */
+		return kvm_s390_handle_sigp_pei(vcpu);
+
+	return -EOPNOTSUPP;
+}
+
 static const intercept_handler_t intercept_funcs[] = {
 	[0x00 >> 2] = handle_noop,
 	[0x04 >> 2] = handle_instruction,
 	[0x08 >> 2] = handle_prog,
 	[0x0C >> 2] = handle_instruction_and_prog,
 	[0x10 >> 2] = handle_noop,
-	[0x14 >> 2] = handle_noop,
+	[0x14 >> 2] = handle_external_interrupt,
 	[0x18 >> 2] = handle_noop,
 	[0x1C >> 2] = kvm_s390_handle_wait,
 	[0x20 >> 2] = handle_validity,
 	[0x28 >> 2] = handle_stop,
+	[0x38 >> 2] = handle_partial_execution,
 };
 
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 077e4738ebdc..bf0d9bc15bcd 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -27,6 +27,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+
 static int is_ioint(u64 type)
 {
 	return ((type & 0xfffe0000u) != 0xfffe0000u);
@@ -89,6 +91,14 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
 			return 1;
 		return 0;
+	case KVM_S390_INT_CLOCK_COMP:
+		return ckc_interrupts_enabled(vcpu);
+	case KVM_S390_INT_CPU_TIMER:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
+			return 1;
+		return 0;
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
@@ -138,9 +148,8 @@ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_ECALL_PEND |
-		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
-		&vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+			  &vcpu->arch.sie_block->cpuflags);
 	vcpu->arch.sie_block->lctl = 0x0000;
 	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
 
@@ -166,6 +175,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_CLOCK_COMP:
+	case KVM_S390_INT_CPU_TIMER:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 		else
@@ -326,6 +337,24 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 				    &vcpu->arch.sie_block->gpsw,
 				    sizeof(psw_t));
 		break;
+	case KVM_S390_INT_CLOCK_COMP:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+						 inti->ext.ext_params, 0);
+		deliver_ckc_interrupt(vcpu);
+		break;
+	case KVM_S390_INT_CPU_TIMER:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+						 inti->ext.ext_params, 0);
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+				   (u16 *)__LC_EXT_INT_CODE);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				   (u32 *)__LC_EXT_PARAMS);
+		break;
 	case KVM_S390_INT_SERVICE:
 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
 			   inti->ext.ext_params);
@@ -413,7 +442,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
 				    &vcpu->arch.sie_block->gpsw,
 				    sizeof(psw_t));
-		atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+		kvm_s390_vcpu_start(vcpu);
 		break;
 	case KVM_S390_PROGRAM_INT:
 		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
@@ -494,6 +523,20 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 	}
 }
 
+/* Check whether SIGP interpretation facility has an external call pending */
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
+{
+	atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
+
+	if (!psw_extint_disabled(vcpu) &&
+	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
+	    (atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
+	    (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+		return 1;
+
+	return 0;
+}
+
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -524,6 +567,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	if (!rc && kvm_cpu_has_pending_timer(vcpu))
 		rc = 1;
 
+	if (!rc && kvm_s390_si_ext_call_pending(vcpu))
+		rc = 1;
+
 	return rc;
 }
 
@@ -580,7 +626,8 @@ no_timer:
 	while (list_empty(&vcpu->arch.local_int.list) &&
 		list_empty(&vcpu->arch.local_int.float_int->list) &&
 		(!vcpu->arch.local_int.timer_due) &&
-		!signal_pending(current)) {
+		!signal_pending(current) &&
+		!kvm_s390_si_ext_call_pending(vcpu)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_unlock_bh(&vcpu->arch.local_int.lock);
 		spin_unlock(&vcpu->arch.local_int.float_int->lock);
@@ -637,6 +684,11 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 	}
 	atomic_set(&li->active, 0);
 	spin_unlock_bh(&li->lock);
+
+	/* clear pending external calls set by sigp interpretation facility */
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(SIGP_CTRL_C,
+			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@@ -984,6 +1036,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		break;
 	case KVM_S390_SIGP_STOP:
 	case KVM_S390_RESTART:
+	case KVM_S390_INT_CLOCK_COMP:
+	case KVM_S390_INT_CPU_TIMER:
 		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
 		inti->type = s390int->type;
 		break;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b32c42cbc706..e519860c6031 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -458,6 +458,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.css_support = 0;
 	kvm->arch.use_irqchip = 0;
 
+	spin_lock_init(&kvm->arch.start_stop_lock);
+
 	return 0;
 out_nogmap:
 	debug_unregister(kvm->arch.dbf);
@@ -592,7 +594,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 	kvm_clear_async_pf_completion_queue(vcpu);
-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	kvm_s390_vcpu_stop(vcpu);
 	kvm_s390_clear_local_irqs(vcpu);
 }
 
@@ -631,7 +633,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= 0x10;
 
 	vcpu->arch.sie_block->ecb2  = 8;
-	vcpu->arch.sie_block->eca   = 0xC1002000U;
+	vcpu->arch.sie_block->eca   = 0xD1002000U;
 	if (sclp_has_siif())
 		vcpu->arch.sie_block->eca |= 1;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
@@ -751,7 +753,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		/* match against both prefix pages */
-		if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
+		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 			exit_sie_sync(vcpu);
@@ -996,8 +998,15 @@ bool kvm_s390_cmma_enabled(struct kvm *kvm)
 	return true;
 }
 
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+}
+
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
+retry:
+	s390_vcpu_unblock(vcpu);
 	/*
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -1005,27 +1014,61 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 	 * already finished. We might race against a second unmapper that
 	 * wants to set the blocking bit. Lets just retry the request loop.
 	 */
-	while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
 		int rc;
 		rc = gmap_ipte_notify(vcpu->arch.gmap,
-				      vcpu->arch.sie_block->prefix,
+				      kvm_s390_get_prefix(vcpu),
 				      PAGE_SIZE * 2);
 		if (rc)
 			return rc;
-		s390_vcpu_unblock(vcpu);
+		goto retry;
 	}
+
+	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+		if (!ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+			atomic_set_mask(CPUSTAT_IBS,
+					&vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+		if (ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+			atomic_clear_mask(CPUSTAT_IBS,
+					  &vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
+	}
+
 	return 0;
 }
 
-static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
 {
-	long rc;
-	hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
 	struct mm_struct *mm = current->mm;
+	hva_t hva;
+	long rc;
+
+	hva = gmap_fault(gpa, vcpu->arch.gmap);
+	if (IS_ERR_VALUE(hva))
+		return (long)hva;
 	down_read(&mm->mmap_sem);
-	rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+	rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
-	return rc;
+
+	return rc < 0 ? rc : 0;
 }
 
 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1163,9 +1206,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	} else if (current->thread.gmap_pfault) {
 		trace_kvm_s390_major_guest_pfault(vcpu);
 		current->thread.gmap_pfault = 0;
-		if (kvm_arch_setup_async_pf(vcpu) ||
-		    (kvm_arch_fault_in_sync(vcpu) >= 0))
+		if (kvm_arch_setup_async_pf(vcpu)) {
 			rc = 0;
+		} else {
+			gpa_t gpa = current->thread.gmap_addr;
+			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
+		}
 	}
 
 	if (rc == -1) {
@@ -1235,7 +1281,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
-	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	kvm_s390_vcpu_start(vcpu);
 
 	switch (kvm_run->exit_reason) {
 	case KVM_EXIT_S390_SIEIC:
@@ -1292,7 +1338,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
 	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
-	kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
+	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
 
 	if (vcpu->sigset_active)
@@ -1311,6 +1357,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
 	unsigned char archmode = 1;
+	unsigned int px;
 	u64 clkcomp;
 	int rc;
 
@@ -1329,8 +1376,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 			      vcpu->run->s.regs.gprs, 128);
 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
 			      &vcpu->arch.sie_block->gpsw, 16);
+	px = kvm_s390_get_prefix(vcpu);
 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
-			      &vcpu->arch.sie_block->prefix, 4);
+			      &px, 4);
 	rc |= write_guest_abs(vcpu,
 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
 			      &vcpu->arch.guest_fpregs.fpc, 4);
@@ -1362,6 +1410,109 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		__disable_ibs_on_vcpu(vcpu);
+	}
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, started_vcpus = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		return;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+			started_vcpus++;
+	}
+
+	if (started_vcpus == 0) {
+		/* we're the only active VCPU -> speed it up */
+		__enable_ibs_on_vcpu(vcpu);
+	} else if (started_vcpus == 1) {
+		/*
+		 * As we are starting a second VCPU, we have to disable
+		 * the IBS facility on all VCPUs to remove potentially
+		 * oustanding ENABLE requests.
+		 */
+		__disable_ibs_on_all_vcpus(vcpu->kvm);
+	}
+
+	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	/*
+	 * Another VCPU might have used IBS while we were offline.
+	 * Let's play safe and flush the VCPU at startup.
+	 */
+	vcpu->arch.sie_block->ihcpu  = 0xffff;
+	spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+	return;
+}
+
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, started_vcpus = 0;
+	struct kvm_vcpu *started_vcpu = NULL;
+
+	if (is_vcpu_stopped(vcpu))
+		return;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	__disable_ibs_on_vcpu(vcpu);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+			started_vcpus++;
+			started_vcpu = vcpu->kvm->vcpus[i];
+		}
+	}
+
+	if (started_vcpus == 1) {
+		/*
+		 * As we only have one VCPU left, we want to enable the
+		 * IBS facility for that VCPU to speed it up.
+		 */
+		__enable_ibs_on_vcpu(started_vcpu);
+	}
+
+	spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+	return;
+}
+
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				     struct kvm_enable_cap *cap)
 {
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 9b5680d1f6cc..a8655ed31616 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -61,9 +61,15 @@ static inline int kvm_is_ucontrol(struct kvm *kvm)
 #endif
 }
 
+#define GUEST_PREFIX_SHIFT 13
+static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
+}
+
 static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
-	vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
+	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
 	vcpu->arch.sie_block->ihcpu  = 0xffff;
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
@@ -142,6 +148,7 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in priv.c */
+int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
@@ -153,10 +160,14 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
 void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
 void exit_sie(struct kvm_vcpu *vcpu);
@@ -210,6 +221,7 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 27f9051a78f8..6296159ac883 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -119,8 +119,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	if (operand2 & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	address = vcpu->arch.sie_block->prefix;
-	address = address & 0x7fffe000u;
+	address = kvm_s390_get_prefix(vcpu);
 
 	/* get the value */
 	rc = write_guest(vcpu, operand2, &address, sizeof(address));
@@ -206,6 +205,9 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 
 	kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
 	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	addr = kvm_s390_logical_to_effective(vcpu, addr);
+	if (kvm_s390_check_low_addr_protection(vcpu, addr))
+		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 	addr = kvm_s390_real_to_abs(vcpu, addr);
 
 	if (kvm_is_error_gpa(vcpu->kvm, addr))
@@ -362,7 +364,8 @@ static void handle_new_psw(struct kvm_vcpu *vcpu)
 #define PSW_ADDR_24 0x0000000000ffffffUL
 #define PSW_ADDR_31 0x000000007fffffffUL
 
-static int is_valid_psw(psw_t *psw) {
+int is_valid_psw(psw_t *psw)
+{
 	if (psw->mask & PSW_MASK_UNASSIGNED)
 		return 0;
 	if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
@@ -373,6 +376,8 @@ static int is_valid_psw(psw_t *psw) {
 		return 0;
 	if ((psw->mask & PSW_MASK_ADDR_MODE) ==  PSW_MASK_EA)
 		return 0;
+	if (psw->addr & 1)
+		return 0;
 	return 1;
 }
 
@@ -650,6 +655,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_protection(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
 	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 	case 0x00000000:
 		end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
@@ -665,10 +675,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	}
 	while (start < end) {
-		unsigned long useraddr;
-
-		useraddr = gmap_translate(start, vcpu->arch.gmap);
-		if (IS_ERR((void *)useraddr))
+		unsigned long useraddr, abs_addr;
+
+		/* Translate guest address to host address */
+		if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
+			abs_addr = kvm_s390_real_to_abs(vcpu, start);
+		else
+			abs_addr = start;
+		useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+		if (kvm_is_error_hva(useraddr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index c0b99e0f6b63..d0341d2e54b1 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -458,3 +458,38 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 	kvm_s390_set_psw_cc(vcpu, rc);
 	return 0;
 }
+
+/*
+ * Handle SIGP partial execution interception.
+ *
+ * This interception will occur at the source cpu when a source cpu sends an
+ * external call to a target cpu and the target cpu has the WAIT bit set in
+ * its cpuflags. Interception will occurr after the interrupt indicator bits at
+ * the target cpu have been set. All error cases will lead to instruction
+ * interception, therefore nothing is to be checked or prepared.
+ */
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+{
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	struct kvm_vcpu *dest_vcpu;
+	u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
+	if (order_code == SIGP_EXTERNAL_CALL) {
+		dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+		BUG_ON(dest_vcpu == NULL);
+
+		spin_lock_bh(&dest_vcpu->arch.local_int.lock);
+		if (waitqueue_active(&dest_vcpu->wq))
+			wake_up_interruptible(&dest_vcpu->wq);
+		dest_vcpu->preempted = true;
+		spin_unlock_bh(&dest_vcpu->arch.local_int.lock);
+
+		kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 13f30f58a2df..647e9d6a4818 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -68,6 +68,27 @@ TRACE_EVENT(kvm_s390_destroy_vcpu,
 	);
 
 /*
+ * Trace point for start and stop of vpcus.
+ */
+TRACE_EVENT(kvm_s390_vcpu_start_stop,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
+		      __entry->id)
+	);
+
+/*
  * Trace points for injection of interrupts, either per machine or
  * per vcpu.
  */
@@ -223,6 +244,28 @@ TRACE_EVENT(kvm_s390_enable_css,
 		      __entry->kvm)
 	);
 
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s ibs on cpu %d",
+		      __entry->state ? "enabling" : "disabling", __entry->id)
+	);
+
 
 #endif /* _TRACE_KVMS390_H */
 
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index abf6ba52769e..916834d7a73a 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -2,7 +2,7 @@
 #define _TRACE_KVM_H
 
 #include <linux/tracepoint.h>
-#include <asm/sigp.h>
+#include <asm/sie.h>
 #include <asm/debug.h>
 #include <asm/dis.h>
 
@@ -125,17 +125,6 @@ TRACE_EVENT(kvm_s390_sie_fault,
 	    VCPU_TP_PRINTK("%s", "fault in sie instruction")
 	);
 
-#define sie_intercept_code				\
-	{0x04, "Instruction"},				\
-	{0x08, "Program interruption"},			\
-	{0x0C, "Instruction and program interruption"},	\
-	{0x10, "External request"},			\
-	{0x14, "External interruption"},		\
-	{0x18, "I/O request"},				\
-	{0x1C, "Wait state"},				\
-	{0x20, "Validity"},				\
-	{0x28, "Stop request"}
-
 TRACE_EVENT(kvm_s390_sie_exit,
 	    TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
 	    TP_ARGS(VCPU_ARGS_COMMON, icptcode),
@@ -165,7 +154,6 @@ TRACE_EVENT(kvm_s390_intercept_instruction,
 	    TP_STRUCT__entry(
 		    VCPU_FIELD_COMMON
 		    __field(__u64, instruction)
-		    __field(char, insn[8])
 		    ),
 
 	    TP_fast_assign(
@@ -176,10 +164,8 @@ TRACE_EVENT(kvm_s390_intercept_instruction,
 
 	    VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
 			   __entry->instruction,
-			   insn_to_mnemonic((unsigned char *)
-					    &__entry->instruction,
-					 __entry->insn, sizeof(__entry->insn)) ?
-			   "unknown" : __entry->insn)
+			   __print_symbolic(icpt_insn_decoder(__entry->instruction),
+					    icpt_insn_codes))
 	);
 
 /*
@@ -227,18 +213,6 @@ TRACE_EVENT(kvm_s390_intercept_validity,
  * Trace points for instructions that are of special interest.
  */
 
-#define sigp_order_codes					\
-	{SIGP_SENSE, "sense"},					\
-	{SIGP_EXTERNAL_CALL, "external call"},			\
-	{SIGP_EMERGENCY_SIGNAL, "emergency signal"},		\
-	{SIGP_STOP, "stop"},					\
-	{SIGP_STOP_AND_STORE_STATUS, "stop and store status"},	\
-	{SIGP_SET_ARCHITECTURE, "set architecture"},		\
-	{SIGP_SET_PREFIX, "set prefix"},			\
-	{SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"},	\
-	{SIGP_SENSE_RUNNING, "sense running"},			\
-	{SIGP_RESTART, "restart"}
-
 TRACE_EVENT(kvm_s390_handle_sigp,
 	    TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
 		     __u32 parameter),
@@ -265,12 +239,28 @@ TRACE_EVENT(kvm_s390_handle_sigp,
 			   __entry->cpu_addr, __entry->parameter)
 	);
 
-#define diagnose_codes				\
-	{0x10, "release pages"},		\
-	{0x44, "time slice end"},		\
-	{0x308, "ipl functions"},		\
-	{0x500, "kvm hypercall"},		\
-	{0x501, "kvm breakpoint"}
+TRACE_EVENT(kvm_s390_handle_sigp_pei,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u8, order_code)
+		    __field(__u16, cpu_addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->order_code = order_code;
+		    __entry->cpu_addr = cpu_addr;
+		    ),
+
+	    VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
+			   __entry->order_code,
+			   __print_symbolic(__entry->order_code,
+					    sigp_order_codes),
+			   __entry->cpu_addr)
+	);
 
 TRACE_EVENT(kvm_s390_handle_diag,
 	    TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ea4a31b95990..66ba60c9b77e 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -958,8 +958,10 @@ void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
 	unsigned long addr, next;
 	pgd_t *pgd;
 
+	down_write(&mm->mmap_sem);
+	if (init_skey && mm_use_skey(mm))
+		goto out_up;
 	addr = start;
-	down_read(&mm->mmap_sem);
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
@@ -967,7 +969,10 @@ void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
 			continue;
 		next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
 	} while (pgd++, addr = next, addr != end);
-	up_read(&mm->mmap_sem);
+	if (init_skey)
+		current->mm->context.use_skey = 1;
+out_up:
+	up_write(&mm->mmap_sem);
 }
 EXPORT_SYMBOL(page_table_reset_pgste);
 
@@ -1384,19 +1389,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
  */
 void s390_enable_skey(void)
 {
-	/*
-	 * To avoid races between multiple vcpus, ending in calling
-	 * page_table_reset twice or more,
-	 * the page_table_lock is taken for serialization.
-	 */
-	spin_lock(&current->mm->page_table_lock);
-	if (mm_use_skey(current->mm)) {
-		spin_unlock(&current->mm->page_table_lock);
-		return;
-	}
-
-	current->mm->context.use_skey = 1;
-	spin_unlock(&current->mm->page_table_lock);
 	page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
 }
 EXPORT_SYMBOL_GPL(s390_enable_skey);
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 24ec1216596e..a04fe4eb237d 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -189,7 +189,6 @@ struct x86_emulate_ops {
 	void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
 	ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
 	int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
-	void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);
 	int (*cpl)(struct x86_emulate_ctxt *ctxt);
 	int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
 	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e21aee98a5c2..49314155b66c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -130,7 +130,6 @@ enum kvm_reg_ex {
 	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
 	VCPU_EXREG_CR3,
 	VCPU_EXREG_RFLAGS,
-	VCPU_EXREG_CPL,
 	VCPU_EXREG_SEGMENTS,
 };
 
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 58d66fe06b61..8ba18842c48e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,6 +74,11 @@ dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
 #ifdef CONFIG_TRACING
 dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
+#else
+static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
+{
+	do_page_fault(regs, error);
+}
 #endif
 dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
 dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 0331cb389d68..7e97371387fd 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -259,7 +259,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	switch (kvm_read_and_reset_pf_reason()) {
 	default:
-		do_page_fault(regs, error_code);
+		trace_do_page_fault(regs, error_code);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f47a104a749c..38a0afe83c6b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -283,6 +283,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
 	/* cpuid 1.ecx */
 	const u32 kvm_supported_word4_x86_features =
+		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
+		 * but *not* advertised to guests via CPUID ! */
 		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
 		0 /* DS-CPL, VMX, SMX, EST */ |
 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
@@ -495,6 +497,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		entry->ecx &= kvm_supported_word6_x86_features;
 		cpuid_mask(&entry->ecx, 6);
 		break;
+	case 0x80000007: /* Advanced power management */
+		/* invariant TSC is CPUID.80000007H:EDX[8] */
+		entry->edx &= (1 << 8);
+		/* mask against host */
+		entry->edx &= boot_cpu_data.x86_power;
+		entry->eax = entry->ebx = entry->ecx = 0;
+		break;
 	case 0x80000008: {
 		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
 		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
@@ -525,7 +534,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	case 3: /* Processor serial number */
 	case 5: /* MONITOR/MWAIT */
 	case 6: /* Thermal management */
-	case 0x80000007: /* Advanced power management */
 	case 0xC0000002:
 	case 0xC0000003:
 	case 0xC0000004:
@@ -726,6 +734,7 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
 not_found:
 	return 36;
 }
+EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
 
 /*
  * If no match is found, check whether we exceed the vCPU's limit
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index eeecbed26ac7..f9087315e0cd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -88,4 +88,11 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
 	return best && (best->ecx & bit(X86_FEATURE_X2APIC));
 }
 
+static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+	return best && (best->edx & bit(X86_FEATURE_GBPAGES));
+}
 #endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e8a58409b5ac..e4e833d3d7d7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,6 +161,7 @@
 #define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
 #define NoWrite     ((u64)1 << 45)  /* No writeback */
 #define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
+#define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -1077,7 +1078,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	ctxt->modrm_rm |= (ctxt->modrm & 0x07);
 	ctxt->modrm_seg = VCPU_SREG_DS;
 
-	if (ctxt->modrm_mod == 3) {
+	if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
@@ -1410,11 +1411,11 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 }
 
 /* Does not support long mode */
-static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
-				   u16 selector, int seg)
+static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+				     u16 selector, int seg, u8 cpl, bool in_task_switch)
 {
 	struct desc_struct seg_desc, old_desc;
-	u8 dpl, rpl, cpl;
+	u8 dpl, rpl;
 	unsigned err_vec = GP_VECTOR;
 	u32 err_code = 0;
 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
@@ -1442,7 +1443,6 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	}
 
 	rpl = selector & 3;
-	cpl = ctxt->ops->cpl(ctxt);
 
 	/* NULL selector is not valid for TR, CS and SS (except for long mode) */
 	if ((seg == VCPU_SREG_CS
@@ -1487,6 +1487,9 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 			goto exception;
 		break;
 	case VCPU_SREG_CS:
+		if (in_task_switch && rpl != dpl)
+			goto exception;
+
 		if (!(seg_desc.type & 8))
 			goto exception;
 
@@ -1544,6 +1547,13 @@ exception:
 	return X86EMUL_PROPAGATE_FAULT;
 }
 
+static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+				   u16 selector, int seg)
+{
+	u8 cpl = ctxt->ops->cpl(ctxt);
+	return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
+}
+
 static void write_register_operand(struct operand *op)
 {
 	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
@@ -2405,6 +2415,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 				 struct tss_segment_16 *tss)
 {
 	int ret;
+	u8 cpl;
 
 	ctxt->_eip = tss->ip;
 	ctxt->eflags = tss->flag | 2;
@@ -2427,23 +2438,25 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
 	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
 
+	cpl = tss->cs & 3;
+
 	/*
 	 * Now load segment descriptors. If fault happens at this stage
 	 * it is handled in a context of new task
 	 */
-	ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
+	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -2521,6 +2534,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 				 struct tss_segment_32 *tss)
 {
 	int ret;
+	u8 cpl;
 
 	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
 		return emulate_gp(ctxt, 0);
@@ -2539,7 +2553,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
-	 * descriptors
+	 * descriptors.  This is important because CPL checks will
+	 * use CS.RPL.
 	 */
 	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
 	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
@@ -2553,43 +2568,38 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	 * If we're switching between Protected Mode and VM86, we need to make
 	 * sure to update the mode before loading the segment descriptors so
 	 * that the selectors are interpreted correctly.
-	 *
-	 * Need to get rflags to the vcpu struct immediately because it
-	 * influences the CPL which is checked at least when loading the segment
-	 * descriptors and when pushing an error code to the new kernel stack.
-	 *
-	 * TODO Introduce a separate ctxt->ops->set_cpl callback
 	 */
-	if (ctxt->eflags & X86_EFLAGS_VM)
+	if (ctxt->eflags & X86_EFLAGS_VM) {
 		ctxt->mode = X86EMUL_MODE_VM86;
-	else
+		cpl = 3;
+	} else {
 		ctxt->mode = X86EMUL_MODE_PROT32;
-
-	ctxt->ops->set_rflags(ctxt, ctxt->eflags);
+		cpl = tss->cs & 3;
+	}
 
 	/*
 	 * Now load segment descriptors. If fault happenes at this stage
 	 * it is handled in a context of new task
 	 */
-	ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
+	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
+	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -3868,10 +3878,12 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N, N, N, N, N,
 	D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
 	/* 0x20 - 0x2F */
-	DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
-	DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
-	IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
-	IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
+	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
+	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
+	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
+						check_cr_write),
+	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
+						check_dr_write),
 	N, N, N, N,
 	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
 	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9736529ade08..006911858174 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -360,6 +360,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 {
+	/* Note that we never get here with APIC virtualization enabled.  */
+
 	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 		++apic->isr_count;
 	BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -371,12 +373,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 	apic->highest_isr_cache = vec;
 }
 
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+	int result;
+
+	/*
+	 * Note that isr_count is always 1, and highest_isr_cache
+	 * is always -1, with APIC virtualization enabled.
+	 */
+	if (!apic->isr_count)
+		return -1;
+	if (likely(apic->highest_isr_cache != -1))
+		return apic->highest_isr_cache;
+
+	result = find_highest_vector(apic->regs + APIC_ISR);
+	ASSERT(result == -1 || result >= 16);
+
+	return result;
+}
+
 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 {
-	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+	struct kvm_vcpu *vcpu;
+	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+		return;
+
+	vcpu = apic->vcpu;
+
+	/*
+	 * We do get here for APIC virtualization enabled if the guest
+	 * uses the Hyper-V APIC enlightenment.  In this case we may need
+	 * to trigger a new interrupt delivery by writing the SVI field;
+	 * on the other hand isr_count and highest_isr_cache are unused
+	 * and must be left alone.
+	 */
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+					       apic_find_highest_isr(apic));
+	else {
 		--apic->isr_count;
-	BUG_ON(apic->isr_count < 0);
-	apic->highest_isr_cache = -1;
+		BUG_ON(apic->isr_count < 0);
+		apic->highest_isr_cache = -1;
+	}
 }
 
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
@@ -456,22 +494,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 }
 
-static inline int apic_find_highest_isr(struct kvm_lapic *apic)
-{
-	int result;
-
-	/* Note that isr_count is always 1 with vid enabled */
-	if (!apic->isr_count)
-		return -1;
-	if (likely(apic->highest_isr_cache != -1))
-		return apic->highest_isr_cache;
-
-	result = find_highest_vector(apic->regs + APIC_ISR);
-	ASSERT(result == -1 || result >= 16);
-
-	return result;
-}
-
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1605,6 +1627,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	int vector = kvm_apic_has_interrupt(vcpu);
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
+	/* Note that we never get here with APIC virtualization enabled.  */
+
 	if (vector == -1)
 		return -1;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 65f2400b8268..931467881da7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -22,6 +22,7 @@
 #include "mmu.h"
 #include "x86.h"
 #include "kvm_cache_regs.h"
+#include "cpuid.h"
 
 #include <linux/kvm_host.h>
 #include <linux/types.h>
@@ -3516,11 +3517,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 {
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
+	u64 gbpages_bit_rsvd = 0;
 
 	context->bad_mt_xwr = 0;
 
 	if (!context->nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
+	if (!guest_cpuid_has_gbpages(vcpu))
+		gbpages_bit_rsvd = rsvd_bits(7, 7);
 	switch (context->root_level) {
 	case PT32_ROOT_LEVEL:
 		/* no rsvd bits for 2 level 4K page table entries */
@@ -3557,14 +3561,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
 		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
+			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
 		context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51) |
+			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 29);
 		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7f4f9c2badae..ec8366c5cfea 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1338,21 +1338,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 }
 
-static void svm_update_cpl(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-	int cpl;
-
-	if (!is_protmode(vcpu))
-		cpl = 0;
-	else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
-		cpl = 3;
-	else
-		cpl = svm->vmcb->save.cs.selector & 0x3;
-
-	svm->vmcb->save.cpl = cpl;
-}
-
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 {
 	return to_svm(vcpu)->vmcb->save.rflags;
@@ -1360,11 +1345,12 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 
 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
-
+       /*
+        * Any change of EFLAGS.VM is accompained by a reload of SS
+        * (caused by either a task switch or an inter-privilege IRET),
+        * so we do not need to update the CPL here.
+        */
 	to_svm(vcpu)->vmcb->save.rflags = rflags;
-	if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
-		svm_update_cpl(vcpu);
 }
 
 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1631,8 +1617,15 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
 		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
 	}
-	if (seg == VCPU_SREG_CS)
-		svm_update_cpl(vcpu);
+
+	/*
+	 * This is always accurate, except if SYSRET returned to a segment
+	 * with SS.DPL != 3.  Intel does not have this quirk, and always
+	 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
+	 * would entail passing the CPL to userspace and back.
+	 */
+	if (seg == VCPU_SREG_SS)
+		svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
 
 	mark_dirty(svm->vmcb, VMCB_SEG);
 }
@@ -2770,12 +2763,6 @@ static int xsetbv_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int invalid_op_interception(struct vcpu_svm *svm)
-{
-	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
-	return 1;
-}
-
 static int task_switch_interception(struct vcpu_svm *svm)
 {
 	u16 tss_selector;
@@ -3287,6 +3274,24 @@ static int pause_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
+static int nop_interception(struct vcpu_svm *svm)
+{
+	skip_emulated_instruction(&(svm->vcpu));
+	return 1;
+}
+
+static int monitor_interception(struct vcpu_svm *svm)
+{
+	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
+	return nop_interception(svm);
+}
+
+static int mwait_interception(struct vcpu_svm *svm)
+{
+	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
+	return nop_interception(svm);
+}
+
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
@@ -3344,8 +3349,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= skinit_interception,
 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
-	[SVM_EXIT_MONITOR]			= invalid_op_interception,
-	[SVM_EXIT_MWAIT]			= invalid_op_interception,
+	[SVM_EXIT_MONITOR]			= monitor_interception,
+	[SVM_EXIT_MWAIT]			= mwait_interception,
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
 	[SVM_EXIT_NPF]				= pf_interception,
 };
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 545245d7cc63..33574c95220d 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -91,16 +91,21 @@ TRACE_EVENT(kvm_hv_hypercall,
 /*
  * Tracepoint for PIO.
  */
+
+#define KVM_PIO_IN   0
+#define KVM_PIO_OUT  1
+
 TRACE_EVENT(kvm_pio,
 	TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
-		 unsigned int count),
-	TP_ARGS(rw, port, size, count),
+		 unsigned int count, void *data),
+	TP_ARGS(rw, port, size, count, data),
 
 	TP_STRUCT__entry(
 		__field(	unsigned int, 	rw		)
 		__field(	unsigned int, 	port		)
 		__field(	unsigned int, 	size		)
 		__field(	unsigned int,	count		)
+		__field(	unsigned int,	val		)
 	),
 
 	TP_fast_assign(
@@ -108,11 +113,18 @@ TRACE_EVENT(kvm_pio,
 		__entry->port		= port;
 		__entry->size		= size;
 		__entry->count		= count;
+		if (size == 1)
+			__entry->val	= *(unsigned char *)data;
+		else if (size == 2)
+			__entry->val	= *(unsigned short *)data;
+		else
+			__entry->val	= *(unsigned int *)data;
 	),
 
-	TP_printk("pio_%s at 0x%x size %d count %d",
+	TP_printk("pio_%s at 0x%x size %d count %d val 0x%x %s",
 		  __entry->rw ? "write" : "read",
-		  __entry->port, __entry->size, __entry->count)
+		  __entry->port, __entry->size, __entry->count, __entry->val,
+		  __entry->count > 1 ? "(...)" : "")
 );
 
 /*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 72b8012991b9..248287cefa7a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -354,6 +354,7 @@ struct vmcs02_list {
 struct nested_vmx {
 	/* Has the level1 guest done vmxon? */
 	bool vmxon;
+	gpa_t vmxon_ptr;
 
 	/* The guest-physical address of the current VMCS L1 keeps for L2 */
 	gpa_t current_vmptr;
@@ -413,7 +414,6 @@ struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	unsigned long         host_rsp;
 	u8                    fail;
-	u8                    cpl;
 	bool                  nmi_known_unmasked;
 	u32                   exit_intr_info;
 	u32                   idt_vectoring_info;
@@ -3149,10 +3149,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
 	fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
 	fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
-
-	/* CPL is always 0 when CPU enters protected mode */
-	__set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
-	vmx->cpl = 0;
 }
 
 static void fix_rmode_seg(int seg, struct kvm_segment *save)
@@ -3554,22 +3550,14 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!is_protmode(vcpu))
+	if (unlikely(vmx->rmode.vm86_active))
 		return 0;
-
-	if (!is_long_mode(vcpu)
-	    && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
-		return 3;
-
-	if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
-		__set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
-		vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
+	else {
+		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
+		return AR_DPL(ar);
 	}
-
-	return vmx->cpl;
 }
 
-
 static u32 vmx_segment_access_rights(struct kvm_segment *var)
 {
 	u32 ar;
@@ -3597,8 +3585,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
 	vmx_segment_cache_clear(vmx);
-	if (seg == VCPU_SREG_CS)
-		__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
 
 	if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
 		vmx->rmode.segs[seg] = *var;
@@ -5142,7 +5128,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 			return 1;
 		kvm_register_write(vcpu, reg, val);
 	} else
-		if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
+		if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg)))
 			return 1;
 
 	skip_emulated_instruction(vcpu);
@@ -5415,7 +5401,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 	}
 
 	/* clear all local breakpoint enable flags */
-	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
 
 	/*
 	 * TODO: What about debug traps on tss switch?
@@ -5649,12 +5635,24 @@ static int handle_pause(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int handle_invalid_op(struct kvm_vcpu *vcpu)
+static int handle_nop(struct kvm_vcpu *vcpu)
 {
-	kvm_queue_exception(vcpu, UD_VECTOR);
+	skip_emulated_instruction(vcpu);
 	return 1;
 }
 
+static int handle_mwait(struct kvm_vcpu *vcpu)
+{
+	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
+	return handle_nop(vcpu);
+}
+
+static int handle_monitor(struct kvm_vcpu *vcpu)
+{
+	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
+	return handle_nop(vcpu);
+}
+
 /*
  * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
  * We could reuse a single VMCS for all the L2 guests, but we also want the
@@ -5792,6 +5790,154 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
 }
 
 /*
+ * Decode the memory-address operand of a vmx instruction, as recorded on an
+ * exit caused by such an instruction (run by a guest hypervisor).
+ * On success, returns 0. When the operand is invalid, returns 1 and throws
+ * #UD or #GP.
+ */
+static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
+				 unsigned long exit_qualification,
+				 u32 vmx_instruction_info, gva_t *ret)
+{
+	/*
+	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
+	 * Execution", on an exit, vmx_instruction_info holds most of the
+	 * addressing components of the operand. Only the displacement part
+	 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
+	 * For how an actual address is calculated from all these components,
+	 * refer to Vol. 1, "Operand Addressing".
+	 */
+	int  scaling = vmx_instruction_info & 3;
+	int  addr_size = (vmx_instruction_info >> 7) & 7;
+	bool is_reg = vmx_instruction_info & (1u << 10);
+	int  seg_reg = (vmx_instruction_info >> 15) & 7;
+	int  index_reg = (vmx_instruction_info >> 18) & 0xf;
+	bool index_is_valid = !(vmx_instruction_info & (1u << 22));
+	int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
+	bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
+
+	if (is_reg) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	/* Addr = segment_base + offset */
+	/* offset = base + [index * scale] + displacement */
+	*ret = vmx_get_segment_base(vcpu, seg_reg);
+	if (base_is_valid)
+		*ret += kvm_register_read(vcpu, base_reg);
+	if (index_is_valid)
+		*ret += kvm_register_read(vcpu, index_reg)<<scaling;
+	*ret += exit_qualification; /* holds the displacement */
+
+	if (addr_size == 1) /* 32 bit */
+		*ret &= 0xffffffff;
+
+	/*
+	 * TODO: throw #GP (and return 1) in various cases that the VM*
+	 * instructions require it - e.g., offset beyond segment limit,
+	 * unusable or unreadable/unwritable segment, non-canonical 64-bit
+	 * address, and so on. Currently these are not checked.
+	 */
+	return 0;
+}
+
+/*
+ * This function performs the various checks including
+ * - if it's 4KB aligned
+ * - No bits beyond the physical address width are set
+ * - Returns 0 on success or else 1
+ * (Intel SDM Section 30.3)
+ */
+static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
+				  gpa_t *vmpointer)
+{
+	gva_t gva;
+	gpa_t vmptr;
+	struct x86_exception e;
+	struct page *page;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+			vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+		return 1;
+
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
+				sizeof(vmptr), &e)) {
+		kvm_inject_page_fault(vcpu, &e);
+		return 1;
+	}
+
+	switch (exit_reason) {
+	case EXIT_REASON_VMON:
+		/*
+		 * SDM 3: 24.11.5
+		 * The first 4 bytes of VMXON region contain the supported
+		 * VMCS revision identifier
+		 *
+		 * Note - IA32_VMX_BASIC[48] will never be 1
+		 * for the nested case;
+		 * which replaces physical address width with 32
+		 *
+		 */
+		if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) {
+			nested_vmx_failInvalid(vcpu);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+
+		page = nested_get_page(vcpu, vmptr);
+		if (page == NULL ||
+		    *(u32 *)kmap(page) != VMCS12_REVISION) {
+			nested_vmx_failInvalid(vcpu);
+			kunmap(page);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+		kunmap(page);
+		vmx->nested.vmxon_ptr = vmptr;
+		break;
+	case EXIT_REASON_VMCLEAR:
+		if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) {
+			nested_vmx_failValid(vcpu,
+					     VMXERR_VMCLEAR_INVALID_ADDRESS);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+
+		if (vmptr == vmx->nested.vmxon_ptr) {
+			nested_vmx_failValid(vcpu,
+					     VMXERR_VMCLEAR_VMXON_POINTER);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+		break;
+	case EXIT_REASON_VMPTRLD:
+		if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) {
+			nested_vmx_failValid(vcpu,
+					     VMXERR_VMPTRLD_INVALID_ADDRESS);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+
+		if (vmptr == vmx->nested.vmxon_ptr) {
+			nested_vmx_failValid(vcpu,
+					     VMXERR_VMCLEAR_VMXON_POINTER);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+		break;
+	default:
+		return 1; /* shouldn't happen */
+	}
+
+	if (vmpointer)
+		*vmpointer = vmptr;
+	return 0;
+}
+
+/*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
  * inspect the argument to VMXON (the so-called "VMXON pointer") because we
@@ -5829,6 +5975,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
+
+	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+		return 1;
+
 	if (vmx->nested.vmxon) {
 		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
 		skip_emulated_instruction(vcpu);
@@ -5951,88 +6101,20 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-/*
- * Decode the memory-address operand of a vmx instruction, as recorded on an
- * exit caused by such an instruction (run by a guest hypervisor).
- * On success, returns 0. When the operand is invalid, returns 1 and throws
- * #UD or #GP.
- */
-static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
-				 unsigned long exit_qualification,
-				 u32 vmx_instruction_info, gva_t *ret)
-{
-	/*
-	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
-	 * Execution", on an exit, vmx_instruction_info holds most of the
-	 * addressing components of the operand. Only the displacement part
-	 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
-	 * For how an actual address is calculated from all these components,
-	 * refer to Vol. 1, "Operand Addressing".
-	 */
-	int  scaling = vmx_instruction_info & 3;
-	int  addr_size = (vmx_instruction_info >> 7) & 7;
-	bool is_reg = vmx_instruction_info & (1u << 10);
-	int  seg_reg = (vmx_instruction_info >> 15) & 7;
-	int  index_reg = (vmx_instruction_info >> 18) & 0xf;
-	bool index_is_valid = !(vmx_instruction_info & (1u << 22));
-	int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
-	bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
-
-	if (is_reg) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
-
-	/* Addr = segment_base + offset */
-	/* offset = base + [index * scale] + displacement */
-	*ret = vmx_get_segment_base(vcpu, seg_reg);
-	if (base_is_valid)
-		*ret += kvm_register_read(vcpu, base_reg);
-	if (index_is_valid)
-		*ret += kvm_register_read(vcpu, index_reg)<<scaling;
-	*ret += exit_qualification; /* holds the displacement */
-
-	if (addr_size == 1) /* 32 bit */
-		*ret &= 0xffffffff;
-
-	/*
-	 * TODO: throw #GP (and return 1) in various cases that the VM*
-	 * instructions require it - e.g., offset beyond segment limit,
-	 * unusable or unreadable/unwritable segment, non-canonical 64-bit
-	 * address, and so on. Currently these are not checked.
-	 */
-	return 0;
-}
-
 /* Emulate the VMCLEAR instruction */
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	gva_t gva;
 	gpa_t vmptr;
 	struct vmcs12 *vmcs12;
 	struct page *page;
-	struct x86_exception e;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
 		return 1;
 
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-				sizeof(vmptr), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
-		return 1;
-	}
-
-	if (!IS_ALIGNED(vmptr, PAGE_SIZE)) {
-		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
-		skip_emulated_instruction(vcpu);
-		return 1;
-	}
-
 	if (vmptr == vmx->nested.current_vmptr) {
 		nested_release_vmcs12(vmx);
 		vmx->nested.current_vmptr = -1ull;
@@ -6352,30 +6434,15 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 static int handle_vmptrld(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	gva_t gva;
 	gpa_t vmptr;
-	struct x86_exception e;
 	u32 exec_control;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
 		return 1;
 
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-				sizeof(vmptr), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
-		return 1;
-	}
-
-	if (!IS_ALIGNED(vmptr, PAGE_SIZE)) {
-		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
-		skip_emulated_instruction(vcpu);
-		return 1;
-	}
-
 	if (vmx->nested.current_vmptr != vmptr) {
 		struct vmcs12 *new_vmcs12;
 		struct page *page;
@@ -6547,8 +6614,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
-	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
-	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
+	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
@@ -7389,7 +7456,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
 				  | (1 << VCPU_EXREG_RFLAGS)
-				  | (1 << VCPU_EXREG_CPL)
 				  | (1 << VCPU_EXREG_PDPTR)
 				  | (1 << VCPU_EXREG_SEGMENTS)
 				  | (1 << VCPU_EXREG_CR3));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5582c385bc0..57eac309c22e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -701,10 +701,11 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		return 0;
 	}
 
-	if (is_long_mode(vcpu) && (cr3 & CR3_L_MODE_RESERVED_BITS))
-		return 1;
-	if (is_pae(vcpu) && is_paging(vcpu) &&
-	    !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+	if (is_long_mode(vcpu)) {
+		if (cr3 & CR3_L_MODE_RESERVED_BITS)
+			return 1;
+	} else if (is_pae(vcpu) && is_paging(vcpu) &&
+		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;
 
 	vcpu->arch.cr3 = cr3;
@@ -1917,6 +1918,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
 		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
 			vcpu->arch.hv_vapic = data;
+			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+				return 1;
 			break;
 		}
 		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
@@ -1927,6 +1930,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 			return 1;
 		vcpu->arch.hv_vapic = data;
 		mark_page_dirty(vcpu->kvm, gfn);
+		if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+			return 1;
 		break;
 	}
 	case HV_X64_MSR_EOI:
@@ -4480,8 +4485,6 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
 			       unsigned short port, void *val,
 			       unsigned int count, bool in)
 {
-	trace_kvm_pio(!in, port, size, count);
-
 	vcpu->arch.pio.port = port;
 	vcpu->arch.pio.in = in;
 	vcpu->arch.pio.count  = count;
@@ -4516,6 +4519,7 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	if (ret) {
 data_avail:
 		memcpy(val, vcpu->arch.pio_data, size * count);
+		trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
 		vcpu->arch.pio.count = 0;
 		return 1;
 	}
@@ -4530,6 +4534,7 @@ static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
 	memcpy(vcpu->arch.pio_data, val, size * count);
+	trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
 	return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
 }
 
@@ -4641,11 +4646,6 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
 	return res;
 }
 
-static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
-{
-	kvm_set_rflags(emul_to_vcpu(ctxt), val);
-}
-
 static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
 {
 	return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
@@ -4830,7 +4830,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.set_idt	     = emulator_set_idt,
 	.get_cr              = emulator_get_cr,
 	.set_cr              = emulator_set_cr,
-	.set_rflags          = emulator_set_rflags,
 	.cpl                 = emulator_get_cpl,
 	.get_dr              = emulator_get_dr,
 	.set_dr              = emulator_set_dr,
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index b57fe0efb422..1918d9dff45d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -27,7 +27,9 @@ struct read_info_sccb {
 	u8	loadparm[8];		/* 24-31 */
 	u8	_reserved1[48 - 32];	/* 32-47 */
 	u64	facilities;		/* 48-55 */
-	u8	_reserved2[84 - 56];	/* 56-83 */
+	u8	_reserved2a[76 - 56];	/* 56-75 */
+	u32	ibc;			/* 76-79 */
+	u8	_reserved2b[84 - 80];	/* 80-83 */
 	u8	fac84;			/* 84 */
 	u8	fac85;			/* 85 */
 	u8	_reserved3[91 - 86];	/* 86-90 */
@@ -47,6 +49,7 @@ static unsigned long sclp_hsa_size;
 static unsigned int sclp_max_cpu;
 static struct sclp_ipl_info sclp_ipl_info;
 static unsigned char sclp_siif;
+static u32 sclp_ibc;
 
 u64 sclp_facilities;
 u8 sclp_fac84;
@@ -111,6 +114,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 	sclp_rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
 	sclp_rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
 	sclp_rzm <<= 20;
+	sclp_ibc = sccb->ibc;
 
 	if (!sccb->hcpua) {
 		if (MACHINE_IS_VM)
@@ -168,6 +172,12 @@ int sclp_has_siif(void)
 }
 EXPORT_SYMBOL(sclp_has_siif);
 
+unsigned int sclp_get_ibc(void)
+{
+	return sclp_ibc;
+}
+EXPORT_SYMBOL(sclp_get_ibc);
+
 /*
  * This function will be called after sclp_facilities_detect(), which gets
  * called from early.c code. The sclp_facilities_detect() function retrieves
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 820fc2e1d9df..970c68197c69 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,8 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_EPR_EXIT          20
 #define KVM_REQ_SCAN_IOAPIC       21
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
+#define KVM_REQ_ENABLE_IBS        23
+#define KVM_REQ_DISABLE_IBS       24
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -368,6 +370,7 @@ struct kvm {
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 16cb1a14993b..32cf446f1911 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -424,6 +424,8 @@ struct kvm_s390_psw {
 #define KVM_S390_INT_PFAULT_INIT	0xfffe0004u
 #define KVM_S390_INT_PFAULT_DONE	0xfffe0005u
 #define KVM_S390_MCHK			0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP		0xffff1004u
+#define KVM_S390_INT_CPU_TIMER		0xffff1005u
 #define KVM_S390_INT_VIRTIO		0xffff2603u
 #define KVM_S390_INT_SERVICE		0xffff2401u
 #define KVM_S390_INT_EMERGENCY		0xffff1201u
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 10df100c4514..62f4223f4c9e 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	use_mm(mm);
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
+	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	kvm_async_page_present_sync(vcpu, apf);
-	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 912ec5a95e2c..20c3af7692c5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
+#include <linux/srcu.h>
 #include <linux/slab.h>
 
 #include "iodev.h"
@@ -118,19 +119,22 @@ static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
 	struct _irqfd_resampler *resampler;
+	struct kvm *kvm;
 	struct _irqfd *irqfd;
+	int idx;
 
 	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	kvm = resampler->kvm;
 
-	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 		    resampler->notifier.gsi, 0, false);
 
-	rcu_read_lock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
 
 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
 		eventfd_signal(irqfd->resamplefd, 1);
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 	mutex_lock(&kvm->irqfds.resampler_lock);
 
 	list_del_rcu(&irqfd->resampler_link);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 
 	if (list_empty(&resampler->list)) {
 		list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	unsigned long flags = (unsigned long)key;
 	struct kvm_kernel_irq_routing_entry *irq;
 	struct kvm *kvm = irqfd->kvm;
+	int idx;
 
 	if (flags & POLLIN) {
-		rcu_read_lock();
-		irq = rcu_dereference(irqfd->irq_entry);
+		idx = srcu_read_lock(&kvm->irq_srcu);
+		irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
 			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
-		rcu_read_unlock();
+		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
 
 	if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		}
 
 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
-		synchronize_rcu();
+		synchronize_srcu(&kvm->irq_srcu);
 
 		mutex_unlock(&kvm->irqfds.resampler_lock);
 	}
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_rcu done by caller
+			 * It is paired with synchronize_srcu done by caller
 			 * of that function.
 			 */
 			rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
 
 /*
  * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
 void kvm_irq_routing_update(struct kvm *kvm,
 			    struct kvm_irq_routing_table *irq_rt)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..ced4a542a031 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
+	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * Since there's no easy way to do this, we only support injecting MSI
 	 * which is limited to 1:1 GSI mapping.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
 			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 				ret = -EWOULDBLOCK;
 			break;
 		}
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
 
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
-	int gsi;
+	int idx, gsi;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..b43c275775cd 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/export.h>
 #include <trace/events/kvm.h>
 #include "irq.h"
@@ -33,19 +34,19 @@
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi) {
-				rcu_read_unlock();
+				srcu_read_unlock(&kvm->irq_srcu, idx);
 				return true;
 			}
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	return false;
 }
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi)
 				kian->irq_acked(kian);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 #ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
 #endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
 	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0;
+	int ret = -1, i = 0, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link)
 			irq_set[i++] = *e;
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
 		int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
 
-	synchronize_rcu();
+	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
 	r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fa70c6e642b4..95b4c2b3906a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -457,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 	r = hardware_enable_all();
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -473,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	r = -ENOMEM;
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
 	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
+	if (init_srcu_struct(&kvm->irq_srcu))
+		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
 					GFP_KERNEL);
@@ -505,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	return kvm;
 
 out_err:
+	cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
 	cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
 	hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);