78 files changed, 7414 insertions, 725 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 6c695ff9caba..028aba9e72af 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,6 +19,16 @@ config RATIONAL
 config GENERIC_FIND_FIRST_BIT
 	bool
 
+config NO_GENERIC_PCI_IOPORT_MAP
+	bool
+
+config GENERIC_PCI_IOMAP
+	bool
+
+config GENERIC_IOMAP
+	bool
+	select GENERIC_PCI_IOMAP
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
@@ -244,6 +254,9 @@ config CPU_RMAP
 	bool
 	depends on SMP
 
+config DQL
+	bool
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
@@ -269,14 +282,38 @@ config AVERAGE
 
 	  If unsure, say N.
 
+config CLZ_TAB
+	bool
+
 config CORDIC
-	tristate "Cordic function"
+	tristate "CORDIC algorithm"
+	help
+	  This option provides an implementation of the CORDIC algorithm;
+	  calculations are in fixed point. Module will be called cordic.
+
+config MPILIB
+	tristate
+	select CLZ_TAB
 	help
-	  The option provides arithmetic function using cordic algorithm
-	  so its calculations are in fixed point. Modules can select this
-	  when they require this function. Module will be called cordic.
+	  Multiprecision maths library from GnuPG.
+	  It is used to implement RSA digital signature verification,
+	  which is used by IMA/EVM digital signature extension.
 
-config LLIST
+config MPILIB_EXTRA
 	bool
+	depends on MPILIB
+	help
+	  Additional sources of multiprecision maths library from GnuPG.
+	  This code is unnecessary for RSA digital signature verification,
+	  but can be compiled if needed.
+
+config SIGNATURE
+	tristate
+	depends on KEYS && CRYPTO
+	select CRYPTO_SHA1
+	select MPILIB
+	help
+	  Digital signature verification. Currently only RSA is supported.
+	  Implementation is done using GnuPG MPI library
 
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c0cb9c4bc46d..05037dc9bde7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -117,31 +117,31 @@ config DEBUG_SECTION_MISMATCH
 	help
 	  The section mismatch analysis checks if there are illegal
 	  references from one section to another section.
-	  Linux will during link or during runtime drop some sections
-	  and any use of code/data previously in these sections will
+	  During linktime or runtime, some sections are dropped;
+	  any use of code/data previously in these sections would
 	  most likely result in an oops.
-	  In the code functions and variables are annotated with
-	  __init, __devinit etc. (see full list in include/linux/init.h)
+	  In the code, functions and variables are annotated with
+	  __init, __devinit, etc. (see the full list in include/linux/init.h),
 	  which results in the code/data being placed in specific sections.
-	  The section mismatch analysis is always done after a full
-	  kernel build but enabling this option will in addition
-	  do the following:
-	  - Add the option -fno-inline-functions-called-once to gcc
-	    When inlining a function annotated __init in a non-init
-	    function we would lose the section information and thus
+	  The section mismatch analysis is always performed after a full
+	  kernel build, and enabling this option causes the following
+	  additional steps to occur:
+	  - Add the option -fno-inline-functions-called-once to gcc commands.
+	    When inlining a function annotated with __init in a non-init
+	    function, we would lose the section information and thus
 	    the analysis would not catch the illegal reference.
-	    This option tells gcc to inline less but will also
-	    result in a larger kernel.
-	  - Run the section mismatch analysis for each module/built-in.o
-	    When we run the section mismatch analysis on vmlinux.o we
+	    This option tells gcc to inline less (but it does result in
+	    a larger kernel).
+	  - Run the section mismatch analysis for each module/built-in.o file.
+	    When we run the section mismatch analysis on vmlinux.o, we
 	    lose valueble information about where the mismatch was
 	    introduced.
 	    Running the analysis for each module/built-in.o file
-	    will tell where the mismatch happens much closer to the
-	    source. The drawback is that we will report the same
-	    mismatch at least twice.
-	  - Enable verbose reporting from modpost to help solving
-	    the section mismatches reported.
+	    tells where the mismatch happens much closer to the
+	    source. The drawback is that the same mismatch is
+	    reported at least twice.
+	  - Enable verbose reporting from modpost in order to help resolve
+	    the section mismatches that are reported.
 
 config DEBUG_KERNEL
 	bool "Kernel debugging"
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR
 	  hard and soft lockups.
 
 	  Softlockups are bugs that cause the kernel to loop in kernel
-	  mode for more than 60 seconds, without giving other tasks a
+	  mode for more than 20 seconds, without giving other tasks a
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
 	  Hardlockups are bugs that cause the CPU to loop in kernel mode
-	  for more than 60 seconds, without letting other interrupts have a
+	  for more than 10 seconds, without letting other interrupts have a
 	  chance to run.  The current stack trace is displayed upon detection
 	  and the system will stay locked up.
 
 	  The overhead should be minimal.  A periodic hrtimer runs to
-	  generate interrupts and kick the watchdog task every 10-12 seconds.
-	  An NMI is generated every 60 seconds or so to check for hardlockups.
+	  generate interrupts and kick the watchdog task every 4 seconds.
+	  An NMI is generated every 10 seconds or so to check for hardlockups.
+
+	  The frequency of hrtimer and NMI events and the soft and hard lockup
+	  thresholds can be controlled through the sysctl watchdog_thresh.
 
 config HARDLOCKUP_DETECTOR
 	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC
 	help
 	  Say Y here to enable the kernel to panic on "hard lockups",
 	  which are bugs that cause the kernel to loop in kernel
-	  mode with interrupts disabled for more than 60 seconds.
+	  mode with interrupts disabled for more than 10 seconds (configurable
+	  using the watchdog_thresh sysctl).
 
 	  Say N if unsure.
 
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 	help
 	  Say Y here to enable the kernel to panic on "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 60 seconds, without giving other tasks a
-	  chance to run.
+	  mode for more than 20 seconds (configurable using the watchdog_thresh
+	  sysctl), without giving other tasks a chance to run.
 
 	  The panic can be used in combination with panic_timeout,
 	  to cause the system to reboot automatically after a
@@ -248,8 +252,9 @@ config DEFAULT_HUNG_TASK_TIMEOUT
 	  to determine when a task has become non-responsive and should
 	  be considered hung.
 
-	  It can be adjusted at runtime via the kernel.hung_task_timeout
-	  sysctl or by writing a value to /proc/sys/kernel/hung_task_timeout.
+	  It can be adjusted at runtime via the kernel.hung_task_timeout_secs
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_timeout_secs.
 
 	  A timeout of 0 disables the check.  The default is two minutes.
 	  Keeping the default should be fine in most cases.
@@ -413,7 +418,7 @@ config SLUB_STATS
 
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
+	depends on DEBUG_KERNEL && EXPERIMENTAL && \
 		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
 
 	select DEBUG_FS
@@ -835,7 +840,7 @@ config DEBUG_CREDENTIALS
 
 #
 # Select this config option from the architecture Kconfig, if it
-# it is preferred to always offer frame pointers as a config
+# is preferred to always offer frame pointers as a config
 # option on the architecture (regardless of KERNEL_DEBUG):
 #
 config ARCH_WANT_FRAME_POINTERS
@@ -926,6 +931,30 @@ config RCU_CPU_STALL_VERBOSE
 
 	  Say Y if you want to enable such checks.
 
+config RCU_CPU_STALL_INFO
+	bool "Print additional diagnostics on RCU CPU stall"
+	depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL
+	default n
+	help
+	  For each stalled CPU that is aware of the current RCU grace
+	  period, print out additional per-CPU diagnostic information
+	  regarding scheduling-clock ticks, idle state, and,
+	  for RCU_FAST_NO_HZ kernels, idle-entry state.
+
+	  Say N if you are unsure.
+
+	  Say Y if you want to enable such diagnostics.
+
+config RCU_TRACE
+	bool "Enable tracing for RCU"
+	depends on DEBUG_KERNEL
+	help
+	  This option provides tracing in RCU which presents stats
+	  in debugfs for debugging RCU implementation.
+
+	  Say Y here if you want to enable RCU tracing
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
@@ -1070,6 +1099,17 @@ config FAIL_IO_TIMEOUT
 	  Only works with drivers that use the generic timeout handling,
 	  for others it wont do anything.
 
+config FAIL_MMC_REQUEST
+	bool "Fault-injection capability for MMC IO"
+	select DEBUG_FS
+	depends on FAULT_INJECTION && MMC
+	help
+	  Provide fault-injection capability for MMC IO.
+	  This will make the mmc core return data errors. This is
+	  useful to test the error handling in the mmc block device
+	  and to test how the mmc host driver handles retries from
+	  the block device.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@ -1081,7 +1121,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
+	select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
@@ -1091,7 +1131,7 @@ config LATENCYTOP
 	depends on DEBUG_KERNEL
 	depends on STACKTRACE_SUPPORT
 	depends on PROC_FS
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
diff --git a/lib/Makefile b/lib/Makefile
index 3f5bc6d903e0..18515f0267c4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -17,12 +17,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 
-lib-y	+= kobject.o kref.o klist.o
+lib-y	+= kobject.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
-	 bsearch.o find_last_bit.o find_next_bit.o
+	 bsearch.o find_last_bit.o find_next_bit.o llist.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
@@ -33,6 +33,7 @@ endif
 
 lib-$(CONFIG_HOTPLUG) += kobject_uevent.o
 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
 obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
@@ -115,7 +116,12 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
-obj-$(CONFIG_LLIST) += llist.o
+obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+
+obj-$(CONFIG_MPILIB) += mpi/
+obj-$(CONFIG_SIGNATURE) += digsig.o
+
+obj-$(CONFIG_CLZ_TAB) += clz_tab.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/lib/atomic64.c b/lib/atomic64.c
index e12ae0dd08a8..3975470caf4f 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -29,11 +29,11 @@
  * Ensure each lock is in a separate cacheline.
  */
 static union {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	char pad[L1_CACHE_BYTES];
 } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
 
-static inline spinlock_t *lock_addr(const atomic64_t *v)
+static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
 {
 	unsigned long addr = (unsigned long) v;
 
@@ -45,12 +45,12 @@ static inline spinlock_t *lock_addr(const atomic64_t *v)
 long long atomic64_read(const atomic64_t *v)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	long long val;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	val = v->counter;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
 EXPORT_SYMBOL(atomic64_read);
@@ -58,34 +58,34 @@ EXPORT_SYMBOL(atomic64_read);
 void atomic64_set(atomic64_t *v, long long i)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	v->counter = i;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(atomic64_set);
 
 void atomic64_add(long long a, atomic64_t *v)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	v->counter += a;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(atomic64_add);
 
 long long atomic64_add_return(long long a, atomic64_t *v)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	long long val;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	val = v->counter += a;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
 EXPORT_SYMBOL(atomic64_add_return);
@@ -93,23 +93,23 @@ EXPORT_SYMBOL(atomic64_add_return);
 void atomic64_sub(long long a, atomic64_t *v)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	v->counter -= a;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(atomic64_sub);
 
 long long atomic64_sub_return(long long a, atomic64_t *v)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	long long val;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	val = v->counter -= a;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
 EXPORT_SYMBOL(atomic64_sub_return);
@@ -117,14 +117,14 @@ EXPORT_SYMBOL(atomic64_sub_return);
 long long atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	long long val;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	val = v->counter - 1;
 	if (val >= 0)
 		v->counter = val;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
 EXPORT_SYMBOL(atomic64_dec_if_positive);
@@ -132,14 +132,14 @@ EXPORT_SYMBOL(atomic64_dec_if_positive);
 long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	long long val;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	val = v->counter;
 	if (val == o)
 		v->counter = n;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
 EXPORT_SYMBOL(atomic64_cmpxchg);
@@ -147,13 +147,13 @@ EXPORT_SYMBOL(atomic64_cmpxchg);
 long long atomic64_xchg(atomic64_t *v, long long new)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	long long val;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	val = v->counter;
 	v->counter = new;
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
 EXPORT_SYMBOL(atomic64_xchg);
@@ -161,15 +161,15 @@ EXPORT_SYMBOL(atomic64_xchg);
 int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
-	spinlock_t *lock = lock_addr(v);
+	raw_spinlock_t *lock = lock_addr(v);
 	int ret = 0;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	if (v->counter != u) {
 		v->counter += a;
 		ret = 1;
 	}
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(atomic64_add_unless);
@@ -179,7 +179,7 @@ static int init_atomic64_lock(void)
 	int i;
 
 	for (i = 0; i < NR_LOCKS; ++i)
-		spin_lock_init(&atomic64_lock[i].lock);
+		raw_spin_lock_init(&atomic64_lock[i].lock);
 	return 0;
 }
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2f4412e4d071..0d4a127dd9b3 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -419,7 +419,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 {
 	int c, old_c, totaldigits, ndigits, nchunks, nbits;
 	u32 chunk;
-	const char __user *ubuf = buf;
+	const char __user __force *ubuf = (const char __user __force *)buf;
 
 	bitmap_zero(maskp, nmaskbits);
 
@@ -504,7 +504,9 @@ int bitmap_parse_user(const char __user *ubuf,
 {
 	if (!access_ok(VERIFY_READ, ubuf, ulen))
 		return -EFAULT;
-	return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits);
+	return __bitmap_parse((const char __force *)ubuf,
+				ulen, 1, maskp, nmaskbits);
+
 }
 EXPORT_SYMBOL(bitmap_parse_user);
 
@@ -594,7 +596,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
 {
 	unsigned a, b;
 	int c, old_c, totaldigits;
-	const char __user *ubuf = buf;
+	const char __user __force *ubuf = (const char __user __force *)buf;
 	int exp_digit, in_range;
 
 	totaldigits = c = 0;
@@ -694,7 +696,7 @@ int bitmap_parselist_user(const char __user *ubuf,
 {
 	if (!access_ok(VERIFY_READ, ubuf, ulen))
 		return -EFAULT;
-	return __bitmap_parselist((const char *)ubuf,
+	return __bitmap_parselist((const char __force *)ubuf,
 					ulen, 1, maskp, nmaskbits);
 }
 EXPORT_SYMBOL(bitmap_parselist_user);
diff --git a/lib/btree.c b/lib/btree.c
index 2a34392bcecc..e5ec1e9c1aa5 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -357,6 +357,7 @@ miss:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(btree_get_prev);
 
 static int getpos(struct btree_geo *geo, unsigned long *node,
 		unsigned long *key)
diff --git a/lib/bug.c b/lib/bug.c
index 19552096d16b..a28c1415357c 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -169,7 +169,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		return BUG_TRAP_TYPE_WARN;
 	}
 
-	printk(KERN_EMERG "------------[ cut here ]------------\n");
+	printk(KERN_DEFAULT "------------[ cut here ]------------\n");
 
 	if (file)
 		printk(KERN_CRIT "kernel BUG at %s:%u!\n",
diff --git a/lib/clz_tab.c b/lib/clz_tab.c
new file mode 100644
index 000000000000..7287b4a991a7
--- /dev/null
+++ b/lib/clz_tab.c
@@ -0,0 +1,18 @@
+const unsigned char __clz_tab[] = {
+	0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+	    5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	    6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	    7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	    7, 7, 7, 7, 7, 7, 7, 7,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+};
diff --git a/lib/cordic.c b/lib/cordic.c
index aa27a88d7e04..6cf477839ebd 100644
--- a/lib/cordic.c
+++ b/lib/cordic.c
@@ -96,6 +96,6 @@ struct cordic_iq cordic_calc_iq(s32 theta)
 }
 EXPORT_SYMBOL(cordic_calc_iq);
 
-MODULE_DESCRIPTION("Cordic functions");
+MODULE_DESCRIPTION("CORDIC algorithm");
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/crc32.c b/lib/crc32.c
index a6e633a48cea..4b35d2b4437c 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -51,20 +51,21 @@ static inline u32
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
-#  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
-		tab[2][(crc >> 8) & 255] ^ \
-		tab[1][(crc >> 16) & 255] ^ \
-		tab[0][(crc >> 24) & 255]
+#  define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
+#  define DO_CRC4 crc = t3[(crc) & 255] ^ \
+		t2[(crc >> 8) & 255] ^ \
+		t1[(crc >> 16) & 255] ^ \
+		t0[(crc >> 24) & 255]
 # else
-#  define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
-#  define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
-		tab[1][(crc >> 8) & 255] ^ \
-		tab[2][(crc >> 16) & 255] ^ \
-		tab[3][(crc >> 24) & 255]
+#  define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC4 crc = t0[(crc) & 255] ^ \
+		t1[(crc >> 8) & 255] ^  \
+		t2[(crc >> 16) & 255] ^	\
+		t3[(crc >> 24) & 255]
 # endif
 	const u32 *b;
 	size_t    rem_len;
+	const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
 
 	/* Align it */
 	if (unlikely((long)buf & 3 && len)) {
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a78b7c6e042c..0ab9ae8057f0 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -268,12 +268,16 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
  * Try to repair the damage, so we have a better chance to get useful
  * debug output.
  */
-static void
+static int
 debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
 		   void * addr, enum debug_obj_state state)
 {
+	int fixed = 0;
+
 	if (fixup)
-		debug_objects_fixups += fixup(addr, state);
+		fixed = fixup(addr, state);
+	debug_objects_fixups += fixed;
+	return fixed;
 }
 
 static void debug_object_is_on_stack(void *addr, int onstack)
@@ -386,6 +390,9 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
 	struct debug_bucket *db;
 	struct debug_obj *obj;
 	unsigned long flags;
+	struct debug_obj o = { .object = addr,
+			       .state = ODEBUG_STATE_NOTAVAILABLE,
+			       .descr = descr };
 
 	if (!debug_objects_enabled)
 		return;
@@ -425,8 +432,9 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
 	 * let the type specific code decide whether this is
 	 * true or not.
 	 */
-	debug_object_fixup(descr->fixup_activate, addr,
-			   ODEBUG_STATE_NOTAVAILABLE);
+	if (debug_object_fixup(descr->fixup_activate, addr,
+			   ODEBUG_STATE_NOTAVAILABLE))
+		debug_print_object(&o, "activate");
 }
 
 /**
@@ -563,6 +571,44 @@ out_unlock:
 }
 
 /**
+ * debug_object_assert_init - debug checks when object should be init-ed
+ * @addr:	address of the object
+ * @descr:	pointer to an object specific debug description structure
+ */
+void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
+{
+	struct debug_bucket *db;
+	struct debug_obj *obj;
+	unsigned long flags;
+
+	if (!debug_objects_enabled)
+		return;
+
+	db = get_bucket((unsigned long) addr);
+
+	raw_spin_lock_irqsave(&db->lock, flags);
+
+	obj = lookup_object(addr, db);
+	if (!obj) {
+		struct debug_obj o = { .object = addr,
+				       .state = ODEBUG_STATE_NOTAVAILABLE,
+				       .descr = descr };
+
+		raw_spin_unlock_irqrestore(&db->lock, flags);
+		/*
+		 * Maybe the object is static.  Let the type specific
+		 * code decide what to do.
+		 */
+		if (debug_object_fixup(descr->fixup_assert_init, addr,
+				       ODEBUG_STATE_NOTAVAILABLE))
+			debug_print_object(&o, "assert_init");
+		return;
+	}
+
+	raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+/**
  * debug_object_active_state - debug checks object usage state machine
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
@@ -772,17 +818,9 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state)
 		if (obj->static_init == 1) {
 			debug_object_init(obj, &descr_type_test);
 			debug_object_activate(obj, &descr_type_test);
-			/*
-			 * Real code should return 0 here ! This is
-			 * not a fixup of some bad behaviour. We
-			 * merily call the debug_init function to keep
-			 * track of the object.
-			 */
-			return 1;
-		} else {
-			/* Real code needs to emit a warning here */
+			return 0;
 		}
-		return 0;
+		return 1;
 
 	case ODEBUG_STATE_ACTIVE:
 		debug_object_deactivate(obj, &descr_type_test);
@@ -921,7 +959,7 @@ static void __init debug_objects_selftest(void)
 
 	obj.static_init = 1;
 	debug_object_activate(&obj, &descr_type_test);
-	if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings))
+	if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
 		goto out;
 	debug_object_init(&obj, &descr_type_test);
 	if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index a7b80c1d6a0d..31c5f7675fbf 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -1,4 +1,3 @@
-/* vi: set sw = 4 ts = 4: */
 /*	Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
 
 	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
@@ -691,7 +690,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 		outbuf = malloc(BZIP2_IOBUF_SIZE);
 
 	if (!outbuf) {
-		error("Could not allocate output bufer");
+		error("Could not allocate output buffer");
 		return RETVAL_OUT_OF_MEMORY;
 	}
 	if (buf)
@@ -699,7 +698,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 	else
 		inbuf = malloc(BZIP2_IOBUF_SIZE);
 	if (!inbuf) {
-		error("Could not allocate input bufer");
+		error("Could not allocate input buffer");
 		i = RETVAL_OUT_OF_MEMORY;
 		goto exit_0;
 	}
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 476c65af9709..32adb73a9038 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -562,7 +562,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
 	else
 		inbuf = malloc(LZMA_IOBUF_SIZE);
 	if (!inbuf) {
-		error("Could not allocate input bufer");
+		error("Could not allocate input buffer");
 		goto exit_0;
 	}
 
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 5a7a2adf4c4c..4531294fa62f 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -279,7 +279,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
 	ret = 0;
 exit_2:
 	if (!input)
-		free(in_buf);
+		free(in_buf_save);
 exit_1:
 	if (!output)
 		free(out_buf);
diff --git a/lib/devres.c b/lib/devres.c
index 7c0e953a7486..9676617b4486 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -85,6 +85,57 @@ void devm_iounmap(struct device *dev, void __iomem *addr)
 }
 EXPORT_SYMBOL(devm_iounmap);
 
+/**
+ * devm_request_and_ioremap() - Check, request region, and ioremap resource
+ * @dev: Generic device to handle the resource for
+ * @res: resource to be handled
+ *
+ * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
+ * everything is undone on driver detach. Checks arguments, so you can feed
+ * it the result from e.g. platform_get_resource() directly. Returns the
+ * remapped pointer or NULL on error. Usage example:
+ *
+ *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ *	base = devm_request_and_ioremap(&pdev->dev, res);
+ *	if (!base)
+ *		return -EADDRNOTAVAIL;
+ */
+void __iomem *devm_request_and_ioremap(struct device *dev,
+			struct resource *res)
+{
+	resource_size_t size;
+	const char *name;
+	void __iomem *dest_ptr;
+
+	BUG_ON(!dev);
+
+	if (!res || resource_type(res) != IORESOURCE_MEM) {
+		dev_err(dev, "invalid resource\n");
+		return NULL;
+	}
+
+	size = resource_size(res);
+	name = res->name ?: dev_name(dev);
+
+	if (!devm_request_mem_region(dev, res->start, size, name)) {
+		dev_err(dev, "can't request region for resource %pR\n", res);
+		return NULL;
+	}
+
+	if (res->flags & IORESOURCE_CACHEABLE)
+		dest_ptr = devm_ioremap(dev, res->start, size);
+	else
+		dest_ptr = devm_ioremap_nocache(dev, res->start, size);
+
+	if (!dest_ptr) {
+		dev_err(dev, "ioremap failed for resource %pR\n", res);
+		devm_release_mem_region(dev, res->start, size);
+	}
+
+	return dest_ptr;
+}
+EXPORT_SYMBOL(devm_request_and_ioremap);
+
 #ifdef CONFIG_HAS_IOPORT
 /*
  * Generic iomap devres
@@ -253,7 +304,7 @@ EXPORT_SYMBOL(pcim_iounmap);
  *
  * Request and iomap regions specified by @mask.
  */
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
+int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name)
 {
 	void __iomem * const *iomap;
 	int i, rc;
@@ -306,7 +357,7 @@ EXPORT_SYMBOL(pcim_iomap_regions);
  *
  * Request all PCI BARs and iomap regions specified by @mask.
  */
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name)
 {
 	int request_mask = ((1 << 6) - 1) & ~mask;
@@ -330,7 +381,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
  *
  * Unmap and release regions specified by @mask.
  */
-void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
+void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
 {
 	void __iomem * const *iomap;
 	int i;
@@ -348,5 +399,5 @@ void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
 	}
 }
 EXPORT_SYMBOL(pcim_iounmap_regions);
-#endif
-#endif
+#endif /* CONFIG_PCI */
+#endif /* CONFIG_HAS_IOPORT */
diff --git a/lib/digsig.c b/lib/digsig.c
new file mode 100644
index 000000000000..286d558033e2
--- /dev/null
+++ b/lib/digsig.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (C) 2011 Nokia Corporation
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Author:
+ * Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *                 <dmitry.kasatkin@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * File: sign.c
+ *	implements signature (RSA) verification
+ *	pkcs decoding is based on LibTomCrypt code
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/user-type.h>
+#include <linux/mpi.h>
+#include <linux/digsig.h>
+
+static struct crypto_shash *shash;
+
+static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
+			unsigned long  msglen,
+			unsigned long  modulus_bitlen,
+			unsigned char *out,
+			unsigned long *outlen)
+{
+	unsigned long modulus_len, ps_len, i;
+
+	modulus_len = (modulus_bitlen >> 3) + (modulus_bitlen & 7 ? 1 : 0);
+
+	/* test message size */
+	if ((msglen > modulus_len) || (modulus_len < 11))
+		return -EINVAL;
+
+	/* separate encoded message */
+	if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1))
+		return -EINVAL;
+
+	for (i = 2; i < modulus_len - 1; i++)
+		if (msg[i] != 0xFF)
+			break;
+
+	/* separator check */
+	if (msg[i] != 0)
+		/* There was no octet with hexadecimal value 0x00
+		to separate ps from m. */
+		return -EINVAL;
+
+	ps_len = i - 2;
+
+	if (*outlen < (msglen - (2 + ps_len + 1))) {
+		*outlen = msglen - (2 + ps_len + 1);
+		return -EOVERFLOW;
+	}
+
+	*outlen = (msglen - (2 + ps_len + 1));
+	memcpy(out, &msg[2 + ps_len + 1], *outlen);
+
+	return 0;
+}
+
+/*
+ * RSA Signature verification with public key
+ */
+static int digsig_verify_rsa(struct key *key,
+		    const char *sig, int siglen,
+		       const char *h, int hlen)
+{
+	int err = -EINVAL;
+	unsigned long len;
+	unsigned long mlen, mblen;
+	unsigned nret, l;
+	int head, i;
+	unsigned char *out1 = NULL, *out2 = NULL;
+	MPI in = NULL, res = NULL, pkey[2];
+	uint8_t *p, *datap, *endp;
+	struct user_key_payload *ukp;
+	struct pubkey_hdr *pkh;
+
+	down_read(&key->sem);
+	ukp = key->payload.data;
+
+	if (ukp->datalen < sizeof(*pkh))
+		goto err1;
+
+	pkh = (struct pubkey_hdr *)ukp->data;
+
+	if (pkh->version != 1)
+		goto err1;
+
+	if (pkh->algo != PUBKEY_ALGO_RSA)
+		goto err1;
+
+	if (pkh->nmpi != 2)
+		goto err1;
+
+	datap = pkh->mpi;
+	endp = ukp->data + ukp->datalen;
+
+	err = -ENOMEM;
+
+	for (i = 0; i < pkh->nmpi; i++) {
+		unsigned int remaining = endp - datap;
+		pkey[i] = mpi_read_from_buffer(datap, &remaining);
+		if (!pkey[i])
+			goto err;
+		datap += remaining;
+	}
+
+	mblen = mpi_get_nbits(pkey[0]);
+	mlen = (mblen + 7)/8;
+
+	if (mlen == 0)
+		goto err;
+
+	out1 = kzalloc(mlen, GFP_KERNEL);
+	if (!out1)
+		goto err;
+
+	out2 = kzalloc(mlen, GFP_KERNEL);
+	if (!out2)
+		goto err;
+
+	nret = siglen;
+	in = mpi_read_from_buffer(sig, &nret);
+	if (!in)
+		goto err;
+
+	res = mpi_alloc(mpi_get_nlimbs(in) * 2);
+	if (!res)
+		goto err;
+
+	err = mpi_powm(res, in, pkey[1], pkey[0]);
+	if (err)
+		goto err;
+
+	if (mpi_get_nlimbs(res) * BYTES_PER_MPI_LIMB > mlen) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	p = mpi_get_buffer(res, &l, NULL);
+	if (!p) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	len = mlen;
+	head = len - l;
+	memset(out1, 0, head);
+	memcpy(out1 + head, p, l);
+
+	err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len);
+
+	if (!err && len == hlen)
+		err = memcmp(out2, h, hlen);
+
+err:
+	mpi_free(in);
+	mpi_free(res);
+	kfree(out1);
+	kfree(out2);
+	while (--i >= 0)
+		mpi_free(pkey[i]);
+err1:
+	up_read(&key->sem);
+
+	return err;
+}
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring:	keyring to search key in
+ * @sig:	digital signature
+ * @sigen:	length of the signature
+ * @data:	data
+ * @datalen:	length of the data
+ * @return:	0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+						const char *data, int datalen)
+{
+	int err = -ENOMEM;
+	struct signature_hdr *sh = (struct signature_hdr *)sig;
+	struct shash_desc *desc = NULL;
+	unsigned char hash[SHA1_DIGEST_SIZE];
+	struct key *key;
+	char name[20];
+
+	if (siglen < sizeof(*sh) + 2)
+		return -EINVAL;
+
+	if (sh->algo != PUBKEY_ALGO_RSA)
+		return -ENOTSUPP;
+
+	sprintf(name, "%llX", __be64_to_cpup((uint64_t *)sh->keyid));
+
+	if (keyring) {
+		/* search in specific keyring */
+		key_ref_t kref;
+		kref = keyring_search(make_key_ref(keyring, 1UL),
+						&key_type_user, name);
+		if (IS_ERR(kref))
+			key = ERR_PTR(PTR_ERR(kref));
+		else
+			key = key_ref_to_ptr(kref);
+	} else {
+		key = request_key(&key_type_user, name, NULL);
+	}
+	if (IS_ERR(key)) {
+		pr_err("key not found, id: %s\n", name);
+		return PTR_ERR(key);
+	}
+
+	desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(shash),
+		       GFP_KERNEL);
+	if (!desc)
+		goto err;
+
+	desc->tfm = shash;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	crypto_shash_init(desc);
+	crypto_shash_update(desc, data, datalen);
+	crypto_shash_update(desc, sig, sizeof(*sh));
+	crypto_shash_final(desc, hash);
+
+	kfree(desc);
+
+	/* pass signature mpis address */
+	err = digsig_verify_rsa(key, sig + sizeof(*sh), siglen - sizeof(*sh),
+			     hash, sizeof(hash));
+
+err:
+	key_put(key);
+
+	return err ? -EINVAL : 0;
+}
+EXPORT_SYMBOL_GPL(digsig_verify);
+
+static int __init digsig_init(void)
+{
+	shash = crypto_alloc_shash("sha1", 0, 0);
+	if (IS_ERR(shash)) {
+		pr_err("shash allocation failed\n");
+		return  PTR_ERR(shash);
+	}
+
+	return 0;
+
+}
+
+static void __exit digsig_cleanup(void)
+{
+	crypto_free_shash(shash);
+}
+
+module_init(digsig_init);
+module_exit(digsig_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index db07bfd9298e..13ef2338be41 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
+#include <linux/export.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/sched.h>
@@ -62,6 +63,8 @@ struct dma_debug_entry {
 #endif
 };
 
+typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
+
 struct hash_bucket {
 	struct list_head list;
 	spinlock_t lock;
@@ -167,7 +170,7 @@ static bool driver_filter(struct device *dev)
 		return false;
 
 	/* driver filter on but not yet initialized */
-	drv = get_driver(dev->driver);
+	drv = dev->driver;
 	if (!drv)
 		return false;
 
@@ -182,7 +185,6 @@ static bool driver_filter(struct device *dev)
 	}
 
 	read_unlock_irqrestore(&driver_name_lock, flags);
-	put_driver(drv);
 
 	return ret;
 }
@@ -240,18 +242,37 @@ static void put_hash_bucket(struct hash_bucket *bucket,
 	spin_unlock_irqrestore(&bucket->lock, __flags);
 }
 
+static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
+{
+	return ((a->dev_addr == b->dev_addr) &&
+		(a->dev == b->dev)) ? true : false;
+}
+
+static bool containing_match(struct dma_debug_entry *a,
+			     struct dma_debug_entry *b)
+{
+	if (a->dev != b->dev)
+		return false;
+
+	if ((b->dev_addr <= a->dev_addr) &&
+	    ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
+		return true;
+
+	return false;
+}
+
 /*
  * Search a given entry in the hash bucket list
  */
-static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
-						struct dma_debug_entry *ref)
+static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
+						  struct dma_debug_entry *ref,
+						  match_fn match)
 {
 	struct dma_debug_entry *entry, *ret = NULL;
 	int matches = 0, match_lvl, last_lvl = 0;
 
 	list_for_each_entry(entry, &bucket->list, list) {
-		if ((entry->dev_addr != ref->dev_addr) ||
-		    (entry->dev != ref->dev))
+		if (!match(ref, entry))
 			continue;
 
 		/*
@@ -293,6 +314,39 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
 	return ret;
 }
 
+static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
+						 struct dma_debug_entry *ref)
+{
+	return __hash_bucket_find(bucket, ref, exact_match);
+}
+
+static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
+						   struct dma_debug_entry *ref,
+						   unsigned long *flags)
+{
+
+	unsigned int max_range = dma_get_max_seg_size(ref->dev);
+	struct dma_debug_entry *entry, index = *ref;
+	unsigned int range = 0;
+
+	while (range <= max_range) {
+		entry = __hash_bucket_find(*bucket, &index, containing_match);
+
+		if (entry)
+			return entry;
+
+		/*
+		 * Nothing found, go back a hash bucket
+		 */
+		put_hash_bucket(*bucket, flags);
+		range          += (1 << HASH_FN_SHIFT);
+		index.dev_addr -= (1 << HASH_FN_SHIFT);
+		*bucket = get_hash_bucket(&index, flags);
+	}
+
+	return NULL;
+}
+
 /*
  * Add an entry to a hash bucket
  */
@@ -802,7 +856,7 @@ static void check_unmap(struct dma_debug_entry *ref)
 	}
 
 	bucket = get_hash_bucket(ref, &flags);
-	entry = hash_bucket_find(bucket, ref);
+	entry = bucket_find_exact(bucket, ref);
 
 	if (!entry) {
 		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
@@ -902,7 +956,7 @@ static void check_sync(struct device *dev,
 
 	bucket = get_hash_bucket(ref, &flags);
 
-	entry = hash_bucket_find(bucket, ref);
+	entry = bucket_find_contain(&bucket, ref, &flags);
 
 	if (!entry) {
 		err_printk(dev, NULL, "DMA-API: device driver tries "
@@ -1060,7 +1114,7 @@ static int get_nr_mapped_entries(struct device *dev,
 	int mapped_ents;
 
 	bucket       = get_hash_bucket(ref, &flags);
-	entry        = hash_bucket_find(bucket, ref);
+	entry        = bucket_find_exact(bucket, ref);
 	mapped_ents  = 0;
 
 	if (entry)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 75ca78f3a8c9..310c753cf83e 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -10,11 +10,12 @@
  * Copyright (C) 2011 Bart Van Assche.  All Rights Reserved.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kallsyms.h>
-#include <linux/version.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/proc_fs.h>
@@ -30,6 +31,8 @@
 #include <linux/jump_label.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
 
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
@@ -38,7 +41,6 @@ struct ddebug_table {
 	struct list_head link;
 	char *mod_name;
 	unsigned int num_ddebugs;
-	unsigned int num_enabled;
 	struct _ddebug *ddebugs;
 };
 
@@ -58,6 +60,7 @@ struct ddebug_iter {
 static DEFINE_MUTEX(ddebug_lock);
 static LIST_HEAD(ddebug_tables);
 static int verbose = 0;
+module_param(verbose, int, 0644);
 
 /* Return the last part of a pathname */
 static inline const char *basename(const char *path)
@@ -66,12 +69,24 @@ static inline const char *basename(const char *path)
 	return tail ? tail+1 : path;
 }
 
+/* Return the path relative to source root */
+static inline const char *trim_prefix(const char *path)
+{
+	int skip = strlen(__FILE__) - strlen("lib/dynamic_debug.c");
+
+	if (strncmp(path, __FILE__, skip))
+		skip = 0; /* prefix mismatch, don't skip */
+
+	return path + skip;
+}
+
 static struct { unsigned flag:8; char opt_char; } opt_array[] = {
 	{ _DPRINTK_FLAGS_PRINT, 'p' },
 	{ _DPRINTK_FLAGS_INCL_MODNAME, 'm' },
 	{ _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' },
 	{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
 	{ _DPRINTK_FLAGS_INCL_TID, 't' },
+	{ _DPRINTK_FLAGS_NONE, '_' },
 };
 
 /* format a string into buf[] which describes the _ddebug's flags */
@@ -81,58 +96,74 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 	char *p = buf;
 	int i;
 
-	BUG_ON(maxlen < 4);
+	BUG_ON(maxlen < 6);
 	for (i = 0; i < ARRAY_SIZE(opt_array); ++i)
 		if (dp->flags & opt_array[i].flag)
 			*p++ = opt_array[i].opt_char;
 	if (p == buf)
-		*p++ = '-';
+		*p++ = '_';
 	*p = '\0';
 
 	return buf;
 }
 
+#define vpr_info_dq(q, msg)						\
+do {									\
+	if (verbose)							\
+		/* trim last char off format print */			\
+		pr_info("%s: func=\"%s\" file=\"%s\" "			\
+			"module=\"%s\" format=\"%.*s\" "		\
+			"lineno=%u-%u",					\
+			msg,						\
+			q->function ? q->function : "",			\
+			q->filename ? q->filename : "",			\
+			q->module ? q->module : "",			\
+			(int)(q->format ? strlen(q->format) - 1 : 0),	\
+			q->format ? q->format : "",			\
+			q->first_lineno, q->last_lineno);		\
+} while (0)
+
 /*
- * Search the tables for _ddebug's which match the given
- * `query' and apply the `flags' and `mask' to them.  Tells
- * the user which ddebug's were changed, or whether none
- * were matched.
+ * Search the tables for _ddebug's which match the given `query' and
+ * apply the `flags' and `mask' to them.  Returns number of matching
+ * callsites, normally the same as number of changes.  If verbose,
+ * logs the changes.  Takes ddebug_lock.
  */
-static void ddebug_change(const struct ddebug_query *query,
-			   unsigned int flags, unsigned int mask)
+static int ddebug_change(const struct ddebug_query *query,
+			unsigned int flags, unsigned int mask)
 {
 	int i;
 	struct ddebug_table *dt;
 	unsigned int newflags;
 	unsigned int nfound = 0;
-	char flagbuf[8];
+	char flagbuf[10];
 
 	/* search for matching ddebugs */
 	mutex_lock(&ddebug_lock);
 	list_for_each_entry(dt, &ddebug_tables, link) {
 
 		/* match against the module name */
-		if (query->module != NULL &&
-		    strcmp(query->module, dt->mod_name))
+		if (query->module && strcmp(query->module, dt->mod_name))
 			continue;
 
 		for (i = 0 ; i < dt->num_ddebugs ; i++) {
 			struct _ddebug *dp = &dt->ddebugs[i];
 
 			/* match against the source filename */
-			if (query->filename != NULL &&
+			if (query->filename &&
 			    strcmp(query->filename, dp->filename) &&
-			    strcmp(query->filename, basename(dp->filename)))
+			    strcmp(query->filename, basename(dp->filename)) &&
+			    strcmp(query->filename, trim_prefix(dp->filename)))
 				continue;
 
 			/* match against the function */
-			if (query->function != NULL &&
+			if (query->function &&
 			    strcmp(query->function, dp->function))
 				continue;
 
 			/* match against the format */
-			if (query->format != NULL &&
-			    strstr(dp->format, query->format) == NULL)
+			if (query->format &&
+			    !strstr(dp->format, query->format))
 				continue;
 
 			/* match against the line number range */
@@ -148,20 +179,10 @@ static void ddebug_change(const struct ddebug_query *query,
 			newflags = (dp->flags & mask) | flags;
 			if (newflags == dp->flags)
 				continue;
-
-			if (!newflags)
-				dt->num_enabled--;
-			else if (!dp->flags)
-				dt->num_enabled++;
 			dp->flags = newflags;
-			if (newflags)
-				dp->enabled = 1;
-			else
-				dp->enabled = 0;
 			if (verbose)
-				printk(KERN_INFO
-					"ddebug: changed %s:%d [%s]%s %s\n",
-					dp->filename, dp->lineno,
+				pr_info("changed %s:%d [%s]%s =%s\n",
+					trim_prefix(dp->filename), dp->lineno,
 					dt->mod_name, dp->function,
 					ddebug_describe_flags(dp, flagbuf,
 							sizeof(flagbuf)));
@@ -170,7 +191,9 @@ static void ddebug_change(const struct ddebug_query *query,
 	mutex_unlock(&ddebug_lock);
 
 	if (!nfound && verbose)
-		printk(KERN_INFO "ddebug: no matches for query\n");
+		pr_info("no matches for query\n");
+
+	return nfound;
 }
 
 /*
@@ -190,8 +213,10 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
 		buf = skip_spaces(buf);
 		if (!*buf)
 			break;	/* oh, it was trailing whitespace */
+		if (*buf == '#')
+			break;	/* token starts comment, skip rest of line */
 
-		/* Run `end' over a word, either whitespace separated or quoted */
+		/* find `end' of word, whitespace separated or quoted */
 		if (*buf == '"' || *buf == '\'') {
 			int quote = *buf++;
 			for (end = buf ; *end && *end != quote ; end++)
@@ -203,8 +228,8 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
 				;
 			BUG_ON(end == buf);
 		}
-		/* Here `buf' is the start of the word, `end' is one past the end */
 
+		/* `buf' is start of word, `end' is one past its end */
 		if (nwords == maxwords)
 			return -EINVAL;	/* ran out of words[] before bytes */
 		if (*end)
@@ -215,10 +240,10 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
 
 	if (verbose) {
 		int i;
-		printk(KERN_INFO "%s: split into words:", __func__);
+		pr_info("split into words:");
 		for (i = 0 ; i < nwords ; i++)
-			printk(" \"%s\"", words[i]);
-		printk("\n");
+			pr_cont(" \"%s\"", words[i]);
+		pr_cont("\n");
 	}
 
 	return nwords;
@@ -283,6 +308,19 @@ static char *unescape(char *str)
 	return str;
 }
 
+static int check_set(const char **dest, char *src, char *name)
+{
+	int rc = 0;
+
+	if (*dest) {
+		rc = -EINVAL;
+		pr_err("match-spec:%s val:%s overridden by %s",
+			name, *dest, src);
+	}
+	*dest = src;
+	return rc;
+}
+
 /*
  * Parse words[] as a ddebug query specification, which is a series
  * of (keyword, value) pairs chosen from these possibilities:
@@ -294,11 +332,15 @@ static char *unescape(char *str)
  * format <escaped-string-to-find-in-format>
  * line <lineno>
  * line <first-lineno>-<last-lineno> // where either may be empty
+ *
+ * Only 1 of each type is allowed.
+ * Returns 0 on success, <0 on error.
  */
 static int ddebug_parse_query(char *words[], int nwords,
 			       struct ddebug_query *query)
 {
 	unsigned int i;
+	int rc;
 
 	/* check we have an even number of words */
 	if (nwords % 2 != 0)
@@ -307,42 +349,43 @@ static int ddebug_parse_query(char *words[], int nwords,
 
 	for (i = 0 ; i < nwords ; i += 2) {
 		if (!strcmp(words[i], "func"))
-			query->function = words[i+1];
+			rc = check_set(&query->function, words[i+1], "func");
 		else if (!strcmp(words[i], "file"))
-			query->filename = words[i+1];
+			rc = check_set(&query->filename, words[i+1], "file");
 		else if (!strcmp(words[i], "module"))
-			query->module = words[i+1];
+			rc = check_set(&query->module, words[i+1], "module");
 		else if (!strcmp(words[i], "format"))
-			query->format = unescape(words[i+1]);
+			rc = check_set(&query->format, unescape(words[i+1]),
+				"format");
 		else if (!strcmp(words[i], "line")) {
 			char *first = words[i+1];
 			char *last = strchr(first, '-');
+			if (query->first_lineno || query->last_lineno) {
+				pr_err("match-spec:line given 2 times\n");
+				return -EINVAL;
+			}
 			if (last)
 				*last++ = '\0';
 			if (parse_lineno(first, &query->first_lineno) < 0)
 				return -EINVAL;
-			if (last != NULL) {
+			if (last) {
 				/* range <first>-<last> */
-				if (parse_lineno(last, &query->last_lineno) < 0)
+				if (parse_lineno(last, &query->last_lineno)
+				    < query->first_lineno) {
+					pr_err("last-line < 1st-line\n");
 					return -EINVAL;
+				}
 			} else {
 				query->last_lineno = query->first_lineno;
 			}
 		} else {
-			if (verbose)
-				printk(KERN_ERR "%s: unknown keyword \"%s\"\n",
-					__func__, words[i]);
+			pr_err("unknown keyword \"%s\"\n", words[i]);
 			return -EINVAL;
 		}
+		if (rc)
+			return rc;
 	}
-
-	if (verbose)
-		printk(KERN_INFO "%s: q->function=\"%s\" q->filename=\"%s\" "
-		       "q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n",
-			__func__, query->function, query->filename,
-			query->module, query->format, query->first_lineno,
-			query->last_lineno);
-
+	vpr_info_dq(query, "parsed");
 	return 0;
 }
 
@@ -368,7 +411,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 		return -EINVAL;
 	}
 	if (verbose)
-		printk(KERN_INFO "%s: op='%c'\n", __func__, op);
+		pr_info("op='%c'\n", op);
 
 	for ( ; *str ; ++str) {
 		for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) {
@@ -380,10 +423,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 		if (i < 0)
 			return -EINVAL;
 	}
-	if (flags == 0)
-		return -EINVAL;
 	if (verbose)
-		printk(KERN_INFO "%s: flags=0x%x\n", __func__, flags);
+		pr_info("flags=0x%x\n", flags);
 
 	/* calculate final *flagsp, *maskp according to mask and op */
 	switch (op) {
@@ -401,8 +442,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 		break;
 	}
 	if (verbose)
-		printk(KERN_INFO "%s: *flagsp=0x%x *maskp=0x%x\n",
-			__func__, *flagsp, *maskp);
+		pr_info("*flagsp=0x%x *maskp=0x%x\n", *flagsp, *maskp);
 	return 0;
 }
 
@@ -411,7 +451,7 @@ static int ddebug_exec_query(char *query_string)
 	unsigned int flags = 0, mask = 0;
 	struct ddebug_query query;
 #define MAXWORDS 9
-	int nwords;
+	int nwords, nfound;
 	char *words[MAXWORDS];
 
 	nwords = ddebug_tokenize(query_string, words, MAXWORDS);
@@ -423,47 +463,166 @@ static int ddebug_exec_query(char *query_string)
 		return -EINVAL;
 
 	/* actually go and implement the change */
-	ddebug_change(&query, flags, mask);
+	nfound = ddebug_change(&query, flags, mask);
+	vpr_info_dq((&query), (nfound) ? "applied" : "no-match");
+
+	return nfound;
+}
+
+/* handle multiple queries in query string, continue on error, return
+   last error or number of matching callsites.  Module name is either
+   in param (for boot arg) or perhaps in query string.
+*/
+static int ddebug_exec_queries(char *query)
+{
+	char *split;
+	int i, errs = 0, exitcode = 0, rc, nfound = 0;
+
+	for (i = 0; query; query = split) {
+		split = strpbrk(query, ";\n");
+		if (split)
+			*split++ = '\0';
+
+		query = skip_spaces(query);
+		if (!query || !*query || *query == '#')
+			continue;
+
+		if (verbose)
+			pr_info("query %d: \"%s\"\n", i, query);
+
+		rc = ddebug_exec_query(query);
+		if (rc < 0) {
+			errs++;
+			exitcode = rc;
+		} else
+			nfound += rc;
+		i++;
+	}
+	pr_info("processed %d queries, with %d matches, %d errs\n",
+		 i, nfound, errs);
+
+	if (exitcode)
+		return exitcode;
+	return nfound;
+}
+
+#define PREFIX_SIZE 64
+
+static int remaining(int wrote)
+{
+	if (PREFIX_SIZE - wrote > 0)
+		return PREFIX_SIZE - wrote;
 	return 0;
 }
 
+static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
+{
+	int pos_after_tid;
+	int pos = 0;
+
+	pos += snprintf(buf + pos, remaining(pos), "%s", KERN_DEBUG);
+	if (desc->flags & _DPRINTK_FLAGS_INCL_TID) {
+		if (in_interrupt())
+			pos += snprintf(buf + pos, remaining(pos), "%s ",
+						"<intr>");
+		else
+			pos += snprintf(buf + pos, remaining(pos), "[%d] ",
+						task_pid_vnr(current));
+	}
+	pos_after_tid = pos;
+	if (desc->flags & _DPRINTK_FLAGS_INCL_MODNAME)
+		pos += snprintf(buf + pos, remaining(pos), "%s:",
+					desc->modname);
+	if (desc->flags & _DPRINTK_FLAGS_INCL_FUNCNAME)
+		pos += snprintf(buf + pos, remaining(pos), "%s:",
+					desc->function);
+	if (desc->flags & _DPRINTK_FLAGS_INCL_LINENO)
+		pos += snprintf(buf + pos, remaining(pos), "%d:",
+					desc->lineno);
+	if (pos - pos_after_tid)
+		pos += snprintf(buf + pos, remaining(pos), " ");
+	if (pos >= PREFIX_SIZE)
+		buf[PREFIX_SIZE - 1] = '\0';
+
+	return buf;
+}
+
 int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
 	va_list args;
 	int res;
+	struct va_format vaf;
+	char buf[PREFIX_SIZE];
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
 
 	va_start(args, fmt);
-	res = printk(KERN_DEBUG);
-	if (descriptor->flags & _DPRINTK_FLAGS_INCL_TID) {
-		if (in_interrupt())
-			res += printk(KERN_CONT "<intr> ");
-		else
-			res += printk(KERN_CONT "[%d] ", task_pid_vnr(current));
-	}
-	if (descriptor->flags & _DPRINTK_FLAGS_INCL_MODNAME)
-		res += printk(KERN_CONT "%s:", descriptor->modname);
-	if (descriptor->flags & _DPRINTK_FLAGS_INCL_FUNCNAME)
-		res += printk(KERN_CONT "%s:", descriptor->function);
-	if (descriptor->flags & _DPRINTK_FLAGS_INCL_LINENO)
-		res += printk(KERN_CONT "%d ", descriptor->lineno);
-	res += vprintk(fmt, args);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	res = printk("%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
 	va_end(args);
 
 	return res;
 }
 EXPORT_SYMBOL(__dynamic_pr_debug);
 
-static __initdata char ddebug_setup_string[1024];
+int __dynamic_dev_dbg(struct _ddebug *descriptor,
+		      const struct device *dev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int res;
+	char buf[PREFIX_SIZE];
+
+	BUG_ON(!descriptor);
+	BUG_ON(!fmt);
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	res = __dev_printk(dynamic_emit_prefix(descriptor, buf), dev, &vaf);
+	va_end(args);
+
+	return res;
+}
+EXPORT_SYMBOL(__dynamic_dev_dbg);
+
+#ifdef CONFIG_NET
+
+int __dynamic_netdev_dbg(struct _ddebug *descriptor,
+		      const struct net_device *dev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int res;
+	char buf[PREFIX_SIZE];
+
+	BUG_ON(!descriptor);
+	BUG_ON(!fmt);
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	res = __netdev_printk(dynamic_emit_prefix(descriptor, buf), dev, &vaf);
+	va_end(args);
+
+	return res;
+}
+EXPORT_SYMBOL(__dynamic_netdev_dbg);
+
+#endif
+
+#define DDEBUG_STRING_SIZE 1024
+static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE];
+
 static __init int ddebug_setup_query(char *str)
 {
-	if (strlen(str) >= 1024) {
-		pr_warning("ddebug boot param string too large\n");
+	if (strlen(str) >= DDEBUG_STRING_SIZE) {
+		pr_warn("ddebug boot param string too large\n");
 		return 0;
 	}
-	strcpy(ddebug_setup_string, str);
+	strlcpy(ddebug_setup_string, str, DDEBUG_STRING_SIZE);
 	return 1;
 }
 
@@ -473,26 +632,33 @@ __setup("ddebug_query=", ddebug_setup_query);
  * File_ops->write method for <debugfs>/dynamic_debug/conrol.  Gathers the
  * command text from userspace, parses and executes it.
  */
+#define USER_BUF_PAGE 4096
 static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
 				  size_t len, loff_t *offp)
 {
-	char tmpbuf[256];
+	char *tmpbuf;
 	int ret;
 
 	if (len == 0)
 		return 0;
-	/* we don't check *offp -- multiple writes() are allowed */
-	if (len > sizeof(tmpbuf)-1)
+	if (len > USER_BUF_PAGE - 1) {
+		pr_warn("expected <%d bytes into control\n", USER_BUF_PAGE);
 		return -E2BIG;
-	if (copy_from_user(tmpbuf, ubuf, len))
+	}
+	tmpbuf = kmalloc(len + 1, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
+	if (copy_from_user(tmpbuf, ubuf, len)) {
+		kfree(tmpbuf);
 		return -EFAULT;
+	}
 	tmpbuf[len] = '\0';
 	if (verbose)
-		printk(KERN_INFO "%s: read %d bytes from userspace\n",
-			__func__, (int)len);
+		pr_info("read %d bytes from userspace\n", (int)len);
 
-	ret = ddebug_exec_query(tmpbuf);
-	if (ret)
+	ret = ddebug_exec_queries(tmpbuf);
+	kfree(tmpbuf);
+	if (ret < 0)
 		return ret;
 
 	*offp += len;
@@ -552,8 +718,7 @@ static void *ddebug_proc_start(struct seq_file *m, loff_t *pos)
 	int n = *pos;
 
 	if (verbose)
-		printk(KERN_INFO "%s: called m=%p *pos=%lld\n",
-			__func__, m, (unsigned long long)*pos);
+		pr_info("called m=%p *pos=%lld\n", m, (unsigned long long)*pos);
 
 	mutex_lock(&ddebug_lock);
 
@@ -578,8 +743,8 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos)
 	struct _ddebug *dp;
 
 	if (verbose)
-		printk(KERN_INFO "%s: called m=%p p=%p *pos=%lld\n",
-			__func__, m, p, (unsigned long long)*pos);
+		pr_info("called m=%p p=%p *pos=%lld\n",
+			m, p, (unsigned long long)*pos);
 
 	if (p == SEQ_START_TOKEN)
 		dp = ddebug_iter_first(iter);
@@ -599,11 +764,10 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 {
 	struct ddebug_iter *iter = m->private;
 	struct _ddebug *dp = p;
-	char flagsbuf[8];
+	char flagsbuf[10];
 
 	if (verbose)
-		printk(KERN_INFO "%s: called m=%p p=%p\n",
-			__func__, m, p);
+		pr_info("called m=%p p=%p\n", m, p);
 
 	if (p == SEQ_START_TOKEN) {
 		seq_puts(m,
@@ -611,10 +775,10 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 		return 0;
 	}
 
-	seq_printf(m, "%s:%u [%s]%s %s \"",
-		   dp->filename, dp->lineno,
-		   iter->table->mod_name, dp->function,
-		   ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
+	seq_printf(m, "%s:%u [%s]%s =%s \"",
+		trim_prefix(dp->filename), dp->lineno,
+		iter->table->mod_name, dp->function,
+		ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
 	seq_escape(m, dp->format, "\t\r\n\"");
 	seq_puts(m, "\"\n");
 
@@ -628,8 +792,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 static void ddebug_proc_stop(struct seq_file *m, void *p)
 {
 	if (verbose)
-		printk(KERN_INFO "%s: called m=%p p=%p\n",
-			__func__, m, p);
+		pr_info("called m=%p p=%p\n", m, p);
 	mutex_unlock(&ddebug_lock);
 }
 
@@ -641,10 +804,11 @@ static const struct seq_operations ddebug_proc_seqops = {
 };
 
 /*
- * File_ops->open method for <debugfs>/dynamic_debug/control.  Does the seq_file
- * setup dance, and also creates an iterator to walk the _ddebugs.
- * Note that we create a seq_file always, even for O_WRONLY files
- * where it's not needed, as doing so simplifies the ->release method.
+ * File_ops->open method for <debugfs>/dynamic_debug/control.  Does
+ * the seq_file setup dance, and also creates an iterator to walk the
+ * _ddebugs.  Note that we create a seq_file always, even for O_WRONLY
+ * files where it's not needed, as doing so simplifies the ->release
+ * method.
  */
 static int ddebug_proc_open(struct inode *inode, struct file *file)
 {
@@ -652,7 +816,7 @@ static int ddebug_proc_open(struct inode *inode, struct file *file)
 	int err;
 
 	if (verbose)
-		printk(KERN_INFO "%s: called\n", __func__);
+		pr_info("called\n");
 
 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 	if (iter == NULL)
@@ -696,7 +860,6 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 	}
 	dt->mod_name = new_name;
 	dt->num_ddebugs = n;
-	dt->num_enabled = 0;
 	dt->ddebugs = tab;
 
 	mutex_lock(&ddebug_lock);
@@ -704,8 +867,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 	mutex_unlock(&ddebug_lock);
 
 	if (verbose)
-		printk(KERN_INFO "%u debug prints in module %s\n",
-				 n, dt->mod_name);
+		pr_info("%u debug prints in module %s\n", n, dt->mod_name);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ddebug_add_module);
@@ -727,8 +889,7 @@ int ddebug_remove_module(const char *mod_name)
 	int ret = -ENOENT;
 
 	if (verbose)
-		printk(KERN_INFO "%s: removing module \"%s\"\n",
-				__func__, mod_name);
+		pr_info("removing module \"%s\"\n", mod_name);
 
 	mutex_lock(&ddebug_lock);
 	list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) {
@@ -782,33 +943,40 @@ static int __init dynamic_debug_init(void)
 	int ret = 0;
 	int n = 0;
 
-	if (__start___verbose != __stop___verbose) {
-		iter = __start___verbose;
-		modname = iter->modname;
-		iter_start = iter;
-		for (; iter < __stop___verbose; iter++) {
-			if (strcmp(modname, iter->modname)) {
-				ret = ddebug_add_module(iter_start, n, modname);
-				if (ret)
-					goto out_free;
-				n = 0;
-				modname = iter->modname;
-				iter_start = iter;
-			}
-			n++;
+	if (__start___verbose == __stop___verbose) {
+		pr_warn("_ddebug table is empty in a "
+			"CONFIG_DYNAMIC_DEBUG build");
+		return 1;
+	}
+	iter = __start___verbose;
+	modname = iter->modname;
+	iter_start = iter;
+	for (; iter < __stop___verbose; iter++) {
+		if (strcmp(modname, iter->modname)) {
+			ret = ddebug_add_module(iter_start, n, modname);
+			if (ret)
+				goto out_free;
+			n = 0;
+			modname = iter->modname;
+			iter_start = iter;
 		}
-		ret = ddebug_add_module(iter_start, n, modname);
+		n++;
 	}
+	ret = ddebug_add_module(iter_start, n, modname);
+	if (ret)
+		goto out_free;
 
 	/* ddebug_query boot param got passed -> set it up */
 	if (ddebug_setup_string[0] != '\0') {
-		ret = ddebug_exec_query(ddebug_setup_string);
-		if (ret)
-			pr_warning("Invalid ddebug boot param %s",
-				   ddebug_setup_string);
-		else
-			pr_info("ddebug initialized with string %s",
+		ret = ddebug_exec_queries(ddebug_setup_string);
+		if (ret < 0)
+			pr_warn("Invalid ddebug boot param %s",
 				ddebug_setup_string);
+		else
+			pr_info("%d changes by ddebug_query\n", ret);
+
+		/* keep tables even on ddebug_query parse error */
+		ret = 0;
 	}
 
 out_free:
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
new file mode 100644
index 000000000000..6ab4587d052b
--- /dev/null
+++ b/lib/dynamic_queue_limits.c
@@ -0,0 +1,134 @@
+/*
+ * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/dynamic_queue_limits.h>
+
+#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+
+/* Records completed count and recalculates the queue limit */
+void dql_completed(struct dql *dql, unsigned int count)
+{
+	unsigned int inprogress, prev_inprogress, limit;
+	unsigned int ovlimit, all_prev_completed, completed;
+
+	/* Can't complete more than what's in queue */
+	BUG_ON(count > dql->num_queued - dql->num_completed);
+
+	completed = dql->num_completed + count;
+	limit = dql->limit;
+	ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
+	inprogress = dql->num_queued - completed;
+	prev_inprogress = dql->prev_num_queued - dql->num_completed;
+	all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+
+	if ((ovlimit && !inprogress) ||
+	    (dql->prev_ovlimit && all_prev_completed)) {
+		/*
+		 * Queue considered starved if:
+		 *   - The queue was over-limit in the last interval,
+		 *     and there is no more data in the queue.
+		 *  OR
+		 *   - The queue was over-limit in the previous interval and
+		 *     when enqueuing it was possible that all queued data
+		 *     had been consumed.  This covers the case when queue
+		 *     may have becomes starved between completion processing
+		 *     running and next time enqueue was scheduled.
+		 *
+		 *     When queue is starved increase the limit by the amount
+		 *     of bytes both sent and completed in the last interval,
+		 *     plus any previous over-limit.
+		 */
+		limit += POSDIFF(completed, dql->prev_num_queued) +
+		     dql->prev_ovlimit;
+		dql->slack_start_time = jiffies;
+		dql->lowest_slack = UINT_MAX;
+	} else if (inprogress && prev_inprogress && !all_prev_completed) {
+		/*
+		 * Queue was not starved, check if the limit can be decreased.
+		 * A decrease is only considered if the queue has been busy in
+		 * the whole interval (the check above).
+		 *
+		 * If there is slack, the amount of execess data queued above
+		 * the the amount needed to prevent starvation, the queue limit
+		 * can be decreased.  To avoid hysteresis we consider the
+		 * minimum amount of slack found over several iterations of the
+		 * completion routine.
+		 */
+		unsigned int slack, slack_last_objs;
+
+		/*
+		 * Slack is the maximum of
+		 *   - The queue limit plus previous over-limit minus twice
+		 *     the number of objects completed.  Note that two times
+		 *     number of completed bytes is a basis for an upper bound
+		 *     of the limit.
+		 *   - Portion of objects in the last queuing operation that
+		 *     was not part of non-zero previous over-limit.  That is
+		 *     "round down" by non-overlimit portion of the last
+		 *     queueing operation.
+		 */
+		slack = POSDIFF(limit + dql->prev_ovlimit,
+		    2 * (completed - dql->num_completed));
+		slack_last_objs = dql->prev_ovlimit ?
+		    POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
+
+		slack = max(slack, slack_last_objs);
+
+		if (slack < dql->lowest_slack)
+			dql->lowest_slack = slack;
+
+		if (time_after(jiffies,
+			       dql->slack_start_time + dql->slack_hold_time)) {
+			limit = POSDIFF(limit, dql->lowest_slack);
+			dql->slack_start_time = jiffies;
+			dql->lowest_slack = UINT_MAX;
+		}
+	}
+
+	/* Enforce bounds on limit */
+	limit = clamp(limit, dql->min_limit, dql->max_limit);
+
+	if (limit != dql->limit) {
+		dql->limit = limit;
+		ovlimit = 0;
+	}
+
+	dql->adj_limit = limit + completed;
+	dql->prev_ovlimit = ovlimit;
+	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->num_completed = completed;
+	dql->prev_num_queued = dql->num_queued;
+}
+EXPORT_SYMBOL(dql_completed);
+
+void dql_reset(struct dql *dql)
+{
+	/* Reset all dynamic values */
+	dql->limit = 0;
+	dql->num_queued = 0;
+	dql->num_completed = 0;
+	dql->last_obj_cnt = 0;
+	dql->prev_num_queued = 0;
+	dql->prev_last_obj_cnt = 0;
+	dql->prev_ovlimit = 0;
+	dql->lowest_slack = UINT_MAX;
+	dql->slack_start_time = jiffies;
+}
+EXPORT_SYMBOL(dql_reset);
+
+int dql_init(struct dql *dql, unsigned hold_time)
+{
+	dql->max_limit = DQL_MAX_LIMIT;
+	dql->min_limit = 0;
+	dql->slack_hold_time = hold_time;
+	dql_reset(dql);
+	return 0;
+}
+EXPORT_SYMBOL(dql_init);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index f193b7796449..b4801f51b607 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -14,7 +14,7 @@
  * setup_fault_attr() is a helper function for various __setup handlers, so it
  * returns 0 on error, because that is what __setup handlers do.
  */
-int __init setup_fault_attr(struct fault_attr *attr, char *str)
+int setup_fault_attr(struct fault_attr *attr, char *str)
 {
 	unsigned long probability;
 	unsigned long interval;
@@ -36,6 +36,7 @@ int __init setup_fault_attr(struct fault_attr *attr, char *str)
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(setup_fault_attr);
 
 static void fail_dump(struct fault_attr *attr)
 {
@@ -130,6 +131,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(should_fail);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 
@@ -147,7 +149,7 @@ static int debugfs_ul_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");
 
-static struct dentry *debugfs_create_ul(const char *name, mode_t mode,
+static struct dentry *debugfs_create_ul(const char *name, umode_t mode,
 				struct dentry *parent, unsigned long *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_ul);
@@ -167,7 +169,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_stacktrace_depth, debugfs_ul_get,
 			debugfs_stacktrace_depth_set, "%llu\n");
 
 static struct dentry *debugfs_create_stacktrace_depth(
-	const char *name, mode_t mode,
+	const char *name, umode_t mode,
 	struct dentry *parent, unsigned long *value)
 {
 	return debugfs_create_file(name, mode, parent, value,
@@ -191,7 +193,7 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
 			debugfs_atomic_t_set, "%lld\n");
 
-static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
+static struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
 				struct dentry *parent, atomic_t *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
@@ -200,7 +202,7 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
 struct dentry *fault_create_debugfs_attr(const char *name,
 			struct dentry *parent, struct fault_attr *attr)
 {
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
 
 	dir = debugfs_create_dir(name, parent);
@@ -243,5 +245,6 @@ fail:
 
 	return ERR_PTR(-ENOMEM);
 }
+EXPORT_SYMBOL_GPL(fault_create_debugfs_attr);
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/hexdump.c b/lib/hexdump.c
index f5fe6ba7a3ab..51d5ae210244 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -38,14 +38,21 @@ EXPORT_SYMBOL(hex_to_bin);
  * @dst: binary result
  * @src: ascii hexadecimal string
  * @count: result length
+ *
+ * Return 0 on success, -1 in case of bad input.
  */
-void hex2bin(u8 *dst, const char *src, size_t count)
+int hex2bin(u8 *dst, const char *src, size_t count)
 {
 	while (count--) {
-		*dst = hex_to_bin(*src++) << 4;
-		*dst += hex_to_bin(*src++);
-		dst++;
+		int hi = hex_to_bin(*src++);
+		int lo = hex_to_bin(*src++);
+
+		if ((hi < 0) || (lo < 0))
+			return -1;
+
+		*dst++ = (hi << 4) | lo;
 	}
+	return 0;
 }
 EXPORT_SYMBOL(hex2bin);
 
diff --git a/lib/idr.c b/lib/idr.c
index db040ce3fa73..12499ba7967e 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -595,8 +595,10 @@ EXPORT_SYMBOL(idr_for_each);
  * Returns pointer to registered object with id, which is next number to
  * given id. After being looked up, *@nextidp will be updated for the next
  * iteration.
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
  */
-
 void *idr_get_next(struct idr *idp, int *nextidp)
 {
 	struct idr_layer *p, *pa[MAX_LEVEL];
@@ -605,11 +607,11 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	int n, max;
 
 	/* find first ent */
-	n = idp->layers * IDR_BITS;
-	max = 1 << n;
 	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
+	n = (p->layer + 1) * IDR_BITS;
+	max = 1 << n;
 
 	while (id < max) {
 		while (n > 0 && p) {
@@ -767,8 +769,8 @@ EXPORT_SYMBOL(ida_pre_get);
  * @starting_id: id to start search at
  * @p_id:	pointer to the allocated handle
  *
- * Allocate new ID above or equal to @ida.  It should be called with
- * any required locks.
+ * Allocate new ID above or equal to @starting_id.  It should be called
+ * with any required locks.
  *
  * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the ida_pre_get() call.  If the ida is full, it will
@@ -860,7 +862,7 @@ EXPORT_SYMBOL(ida_get_new_above);
  * and go back to the idr_pre_get() call.  If the idr is full, it will
  * return %-ENOSPC.
  *
- * @id returns a value in the range %0 ... %0x7fffffff.
+ * @p_id returns a value in the range %0 ... %0x7fffffff.
  */
 int ida_get_new(struct ida *ida, int *p_id)
 {
@@ -944,6 +946,7 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 {
 	int ret, id;
 	unsigned int max;
+	unsigned long flags;
 
 	BUG_ON((int)start < 0);
 	BUG_ON((int)end < 0);
@@ -959,7 +962,7 @@ again:
 	if (!ida_pre_get(ida, gfp_mask))
 		return -ENOMEM;
 
-	spin_lock(&simple_ida_lock);
+	spin_lock_irqsave(&simple_ida_lock, flags);
 	ret = ida_get_new_above(ida, start, &id);
 	if (!ret) {
 		if (id > max) {
@@ -969,7 +972,7 @@ again:
 			ret = id;
 		}
 	}
-	spin_unlock(&simple_ida_lock);
+	spin_unlock_irqrestore(&simple_ida_lock, flags);
 
 	if (unlikely(ret == -EAGAIN))
 		goto again;
@@ -985,10 +988,12 @@ EXPORT_SYMBOL(ida_simple_get);
  */
 void ida_simple_remove(struct ida *ida, unsigned int id)
 {
+	unsigned long flags;
+
 	BUG_ON((int)id < 0);
-	spin_lock(&simple_ida_lock);
+	spin_lock_irqsave(&simple_ida_lock, flags);
 	ida_remove(ida, id);
-	spin_unlock(&simple_ida_lock);
+	spin_unlock_irqrestore(&simple_ida_lock, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
 
diff --git a/lib/iomap.c b/lib/iomap.c
index 5dbcb4b2d864..ada922a808e6 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -242,45 +242,11 @@ EXPORT_SYMBOL(ioport_unmap);
 #endif /* CONFIG_HAS_IOPORT */
 
 #ifdef CONFIG_PCI
-/**
- * pci_iomap - create a virtual mapping cookie for a PCI BAR
- * @dev: PCI device that owns the BAR
- * @bar: BAR number
- * @maxlen: length of the memory to map
- *
- * Using this function you will get a __iomem address to your device BAR.
- * You can access it using ioread*() and iowrite*(). These functions hide
- * the details if this is a MMIO or PIO address space and will just do what
- * you expect from them in the correct way.
- *
- * @maxlen specifies the maximum length to map. If you want to get access to
- * the complete BAR without checking for its length first, pass %0 here.
- * */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-	/* What? */
-	return NULL;
-}
-
+/* Hide the details if this is a MMIO or PIO address space and just do what
+ * you expect in the correct way. */
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
 	IO_COND(addr, /* nothing */, iounmap(addr));
 }
-EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
 #endif /* CONFIG_PCI */
diff --git a/lib/kobject.c b/lib/kobject.c
index 640bd98a4c8a..c33d7a18d635 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -746,43 +746,11 @@ void kset_unregister(struct kset *k)
  */
 struct kobject *kset_find_obj(struct kset *kset, const char *name)
 {
-	return kset_find_obj_hinted(kset, name, NULL);
-}
-
-/**
- * kset_find_obj_hinted - search for object in kset given a predecessor hint.
- * @kset: kset we're looking in.
- * @name: object's name.
- * @hint: hint to possible object's predecessor.
- *
- * Check the hint's next object and if it is a match return it directly,
- * otherwise, fall back to the behavior of kset_find_obj().  Either way
- * a reference for the returned object is held and the reference on the
- * hinted object is released.
- */
-struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
-				     struct kobject *hint)
-{
 	struct kobject *k;
 	struct kobject *ret = NULL;
 
 	spin_lock(&kset->list_lock);
 
-	if (!hint)
-		goto slow_search;
-
-	/* end of list detection */
-	if (hint->entry.next == kset->list.next)
-		goto slow_search;
-
-	k = container_of(hint->entry.next, struct kobject, entry);
-	if (!kobject_name(k) || strcmp(kobject_name(k), name))
-		goto slow_search;
-
-	ret = kobject_get(k);
-	goto unlock_exit;
-
-slow_search:
 	list_for_each_entry(k, &kset->list, entry) {
 		if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
 			ret = kobject_get(k);
@@ -790,12 +758,7 @@ slow_search:
 		}
 	}
 
-unlock_exit:
 	spin_unlock(&kset->list_lock);
-
-	if (hint)
-		kobject_put(hint);
-
 	return ret;
 }
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 70af0a7f97c0..75cbdb52bf5c 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -29,16 +29,17 @@
 
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
-static DEFINE_SPINLOCK(sequence_lock);
 #ifdef CONFIG_NET
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
 static LIST_HEAD(uevent_sock_list);
-static DEFINE_MUTEX(uevent_sock_mutex);
 #endif
 
+/* This lock protects uevent_seqnum and uevent_sock_list */
+static DEFINE_MUTEX(uevent_sock_mutex);
+
 /* the strings here must match the enum in include/linux/kobject.h */
 static const char *kobject_actions[] = {
 	[KOBJ_ADD] =		"add",
@@ -136,7 +137,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	struct kobject *top_kobj;
 	struct kset *kset;
 	const struct kset_uevent_ops *uevent_ops;
-	u64 seq;
 	int i = 0;
 	int retval = 0;
 #ifdef CONFIG_NET
@@ -243,22 +243,24 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	else if (action == KOBJ_REMOVE)
 		kobj->state_remove_uevent_sent = 1;
 
+	mutex_lock(&uevent_sock_mutex);
 	/* we will send an event, so request a new sequence number */
-	spin_lock(&sequence_lock);
-	seq = ++uevent_seqnum;
-	spin_unlock(&sequence_lock);
-	retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)seq);
-	if (retval)
+	retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)++uevent_seqnum);
+	if (retval) {
+		mutex_unlock(&uevent_sock_mutex);
 		goto exit;
+	}
 
 #if defined(CONFIG_NET)
 	/* send netlink message */
-	mutex_lock(&uevent_sock_mutex);
 	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
 		struct sock *uevent_sock = ue_sk->sk;
 		struct sk_buff *skb;
 		size_t len;
 
+		if (!netlink_has_listeners(uevent_sock, 1))
+			continue;
+
 		/* allocate message with the maximum possible size */
 		len = strlen(action_string) + strlen(devpath) + 2;
 		skb = alloc_skb(len + env->buflen, GFP_KERNEL);
@@ -282,13 +284,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 							    kobj_bcast_filter,
 							    kobj);
 			/* ENOBUFS should be handled in userspace */
-			if (retval == -ENOBUFS)
+			if (retval == -ENOBUFS || retval == -ESRCH)
 				retval = 0;
 		} else
 			retval = -ENOMEM;
 	}
-	mutex_unlock(&uevent_sock_mutex);
 #endif
+	mutex_unlock(&uevent_sock_mutex);
 
 	/* call uevent_helper, usually only enabled during early boot */
 	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
diff --git a/lib/kref.c b/lib/kref.c
deleted file mode 100644
index 3efb882b11db..000000000000
--- a/lib/kref.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * kref.c - library routines for handling generic reference counted objects
- *
- * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
- * Copyright (C) 2004 IBM Corp.
- *
- * based on lib/kobject.c which was:
- * Copyright (C) 2002-2003 Patrick Mochel <mochel@osdl.org>
- *
- * This file is released under the GPLv2.
- *
- */
-
-#include <linux/kref.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-/**
- * kref_init - initialize object.
- * @kref: object in question.
- */
-void kref_init(struct kref *kref)
-{
-	atomic_set(&kref->refcount, 1);
-	smp_mb();
-}
-
-/**
- * kref_get - increment refcount for object.
- * @kref: object.
- */
-void kref_get(struct kref *kref)
-{
-	WARN_ON(!atomic_read(&kref->refcount));
-	atomic_inc(&kref->refcount);
-	smp_mb__after_atomic_inc();
-}
-
-/**
- * kref_put - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- *
- * Decrement the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0.  Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory.  Only use the return value if you want to see if the kref is now
- * gone, not present.
- */
-int kref_put(struct kref *kref, void (*release)(struct kref *kref))
-{
-	WARN_ON(release == NULL);
-	WARN_ON(release == (void (*)(struct kref *))kfree);
-
-	if (atomic_dec_and_test(&kref->refcount)) {
-		release(kref);
-		return 1;
-	}
-	return 0;
-}
-
-
-/**
- * kref_sub - subtract a number of refcounts for object.
- * @kref: object.
- * @count: Number of recounts to subtract.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- *
- * Subtract @count from the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0.  Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory.  Only use the return value if you want to see if the kref is now
- * gone, not present.
- */
-int kref_sub(struct kref *kref, unsigned int count,
-	     void (*release)(struct kref *kref))
-{
-	WARN_ON(release == NULL);
-	WARN_ON(release == (void (*)(struct kref *))kfree);
-
-	if (atomic_sub_and_test((int) count, &kref->refcount)) {
-		release(kref);
-		return 1;
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL(kref_init);
-EXPORT_SYMBOL(kref_get);
-EXPORT_SYMBOL(kref_put);
-EXPORT_SYMBOL(kref_sub);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 5e066759f551..b1dd3e7d88cb 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -18,26 +18,41 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <asm/uaccess.h>
+#include "kstrtox.h"
 
-static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
 {
-	unsigned long long acc;
-	int ok;
-
-	if (base == 0) {
+	if (*base == 0) {
 		if (s[0] == '0') {
 			if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
-				base = 16;
+				*base = 16;
 			else
-				base = 8;
+				*base = 8;
 		} else
-			base = 10;
+			*base = 10;
 	}
-	if (base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+	if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
 		s += 2;
+	return s;
+}
 
-	acc = 0;
-	ok = 0;
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+	unsigned long long res;
+	unsigned int rv;
+	int overflow;
+
+	res = 0;
+	rv = 0;
+	overflow = 0;
 	while (*s) {
 		unsigned int val;
 
@@ -45,23 +60,47 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 			val = *s - '0';
 		else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
 			val = _tolower(*s) - 'a' + 10;
-		else if (*s == '\n' && *(s + 1) == '\0')
-			break;
 		else
-			return -EINVAL;
+			break;
 
 		if (val >= base)
-			return -EINVAL;
-		if (acc > div_u64(ULLONG_MAX - val, base))
-			return -ERANGE;
-		acc = acc * base + val;
-		ok = 1;
-
+			break;
+		/*
+		 * Check for overflow only if we are within range of
+		 * it in the max base we support (16)
+		 */
+		if (unlikely(res & (~0ull << 60))) {
+			if (res > div_u64(ULLONG_MAX - val, base))
+				overflow = 1;
+		}
+		res = res * base + val;
+		rv++;
 		s++;
 	}
-	if (!ok)
+	*p = res;
+	if (overflow)
+		rv |= KSTRTOX_OVERFLOW;
+	return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+	unsigned long long _res;
+	unsigned int rv;
+
+	s = _parse_integer_fixup_radix(s, &base);
+	rv = _parse_integer(s, base, &_res);
+	if (rv & KSTRTOX_OVERFLOW)
+		return -ERANGE;
+	rv &= ~KSTRTOX_OVERFLOW;
+	if (rv == 0)
+		return -EINVAL;
+	s += rv;
+	if (*s == '\n')
+		s++;
+	if (*s)
 		return -EINVAL;
-	*res = acc;
+	*res = _res;
 	return 0;
 }
 
diff --git a/lib/kstrtox.h b/lib/kstrtox.h
new file mode 100644
index 000000000000..f13eeeaf441d
--- /dev/null
+++ b/lib/kstrtox.h
@@ -0,0 +1,8 @@
+#ifndef _LIB_KSTRTOX_H
+#define _LIB_KSTRTOX_H
+
+#define KSTRTOX_OVERFLOW	(1U << 31)
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
+
+#endif
diff --git a/lib/llist.c b/lib/llist.c
index da445724fa1f..700cff77a387 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -3,8 +3,8 @@
  *
  * The basic atomic operation of this list is cmpxchg on long.  On
  * architectures that don't have NMI-safe cmpxchg implementation, the
- * list can NOT be used in NMI handler.  So code uses the list in NMI
- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ * list can NOT be used in NMI handlers.  So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
  *
  * Copyright 2010,2011 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
@@ -30,48 +30,28 @@
 #include <asm/system.h>
 
 /**
- * llist_add - add a new entry
- * @new:	new entry to be added
- * @head:	the head for your lock-less list
- */
-void llist_add(struct llist_node *new, struct llist_head *head)
-{
-	struct llist_node *entry, *old_entry;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	entry = head->first;
-	do {
-		old_entry = entry;
-		new->next = entry;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
-}
-EXPORT_SYMBOL_GPL(llist_add);
-
-/**
  * llist_add_batch - add several linked entries in batch
  * @new_first:	first entry in batch to be added
  * @new_last:	last entry in batch to be added
  * @head:	the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
  */
-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
 		     struct llist_head *head)
 {
 	struct llist_node *entry, *old_entry;
 
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
 	entry = head->first;
-	do {
+	for (;;) {
 		old_entry = entry;
 		new_last->next = entry;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
+		entry = cmpxchg(&head->first, old_entry, new_first);
+		if (entry == old_entry)
+			break;
+	}
+
+	return old_entry == NULL;
 }
 EXPORT_SYMBOL_GPL(llist_add_batch);
 
@@ -93,37 +73,17 @@ struct llist_node *llist_del_first(struct llist_head *head)
 {
 	struct llist_node *entry, *old_entry, *next;
 
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
 	entry = head->first;
-	do {
+	for (;;) {
 		if (entry == NULL)
 			return NULL;
 		old_entry = entry;
 		next = entry->next;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
+		entry = cmpxchg(&head->first, old_entry, next);
+		if (entry == old_entry)
+			break;
+	}
 
 	return entry;
 }
 EXPORT_SYMBOL_GPL(llist_del_first);
-
-/**
- * llist_del_all - delete all entries from lock-less list
- * @head:	the head of lock-less list to delete all entries
- *
- * If list is empty, return NULL, otherwise, delete all entries and
- * return the pointer to the first entry.  The order of entries
- * deleted is from the newest to the oldest added one.
- */
-struct llist_node *llist_del_all(struct llist_head *head)
-{
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	return xchg(&head->first, NULL);
-}
-EXPORT_SYMBOL_GPL(llist_del_all);
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
new file mode 100644
index 000000000000..567d52e74d77
--- /dev/null
+++ b/lib/mpi/Makefile
@@ -0,0 +1,32 @@
+#
+# MPI multiprecision maths library (from gpg)
+#
+
+obj-$(CONFIG_MPILIB) = mpi.o
+
+mpi-y = \
+	generic_mpih-lshift.o		\
+	generic_mpih-mul1.o		\
+	generic_mpih-mul2.o		\
+	generic_mpih-mul3.o		\
+	generic_mpih-rshift.o		\
+	generic_mpih-sub1.o		\
+	generic_mpih-add1.o		\
+	mpicoder.o			\
+	mpi-bit.o			\
+	mpih-cmp.o			\
+	mpih-div.o			\
+	mpih-mul.o			\
+	mpi-pow.o			\
+	mpiutil.o
+
+mpi-$(CONFIG_MPILIB_EXTRA) += \
+	mpi-add.o			\
+	mpi-div.o			\
+	mpi-cmp.o			\
+	mpi-gcd.o			\
+	mpi-inline.o			\
+	mpi-inv.o			\
+	mpi-mpow.o			\
+	mpi-mul.o			\
+	mpi-scan.o
diff --git a/lib/mpi/generic_mpi-asm-defs.h b/lib/mpi/generic_mpi-asm-defs.h
new file mode 100644
index 000000000000..047d1f5a7249
--- /dev/null
+++ b/lib/mpi/generic_mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c
new file mode 100644
index 000000000000..c94c7dd344b3
--- /dev/null
+++ b/lib/mpi/generic_mpih-add1.c
@@ -0,0 +1,61 @@
+/* mpihelp-add_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+	mpi_limb_t x, y, cy;
+	mpi_size_t j;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	   the loop becomes faster.  */
+	j = -size;
+
+	/* Offset the base pointers to compensate for the negative indices. */
+	s1_ptr -= j;
+	s2_ptr -= j;
+	res_ptr -= j;
+
+	cy = 0;
+	do {
+		y = s2_ptr[j];
+		x = s1_ptr[j];
+		y += cy;	/* add previous carry to one addend */
+		cy = y < cy;	/* get out carry from that addition */
+		y += x;		/* add other addend */
+		cy += y < x;	/* get out carry from that add, combine */
+		res_ptr[j] = y;
+	} while (++j);
+
+	return cy;
+}
diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c
new file mode 100644
index 000000000000..86318927231a
--- /dev/null
+++ b/lib/mpi/generic_mpih-lshift.c
@@ -0,0 +1,63 @@
+/* mpihelp-lshift.c  -	MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+
+mpi_limb_t
+mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt)
+{
+	mpi_limb_t high_limb, low_limb;
+	unsigned sh_1, sh_2;
+	mpi_size_t i;
+	mpi_limb_t retval;
+
+	sh_1 = cnt;
+	wp += 1;
+	sh_2 = BITS_PER_MPI_LIMB - sh_1;
+	i = usize - 1;
+	low_limb = up[i];
+	retval = low_limb >> sh_2;
+	high_limb = low_limb;
+	while (--i >= 0) {
+		low_limb = up[i];
+		wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+		high_limb = low_limb;
+	}
+	wp[i] = high_limb << sh_1;
+
+	return retval;
+}
diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c
new file mode 100644
index 000000000000..1668dfd9092c
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul1.c
@@ -0,0 +1,57 @@
+/* mpihelp-mul_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	      mpi_limb_t s2_limb)
+{
+	mpi_limb_t cy_limb;
+	mpi_size_t j;
+	mpi_limb_t prod_high, prod_low;
+
+	/* The loop counter and index J goes from -S1_SIZE to -1.  This way
+	 * the loop becomes faster.  */
+	j = -s1_size;
+
+	/* Offset the base pointers to compensate for the negative indices.  */
+	s1_ptr -= j;
+	res_ptr -= j;
+
+	cy_limb = 0;
+	do {
+		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+		prod_low += cy_limb;
+		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+		res_ptr[j] = prod_low;
+	} while (++j);
+
+	return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c
new file mode 100644
index 000000000000..8a7b29ee1740
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul2.c
@@ -0,0 +1,60 @@
+/* mpihelp-mul_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+		 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t cy_limb;
+	mpi_size_t j;
+	mpi_limb_t prod_high, prod_low;
+	mpi_limb_t x;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	 * the loop becomes faster.  */
+	j = -s1_size;
+	res_ptr -= j;
+	s1_ptr -= j;
+
+	cy_limb = 0;
+	do {
+		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+
+		prod_low += cy_limb;
+		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+
+		x = res_ptr[j];
+		prod_low = x + prod_low;
+		cy_limb += prod_low < x ? 1 : 0;
+		res_ptr[j] = prod_low;
+	} while (++j);
+	return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c
new file mode 100644
index 000000000000..f96df327be63
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul3.c
@@ -0,0 +1,61 @@
+/* mpihelp-mul_3.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+		 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t cy_limb;
+	mpi_size_t j;
+	mpi_limb_t prod_high, prod_low;
+	mpi_limb_t x;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	 * the loop becomes faster.  */
+	j = -s1_size;
+	res_ptr -= j;
+	s1_ptr -= j;
+
+	cy_limb = 0;
+	do {
+		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+
+		prod_low += cy_limb;
+		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+
+		x = res_ptr[j];
+		prod_low = x - prod_low;
+		cy_limb += prod_low > x ? 1 : 0;
+		res_ptr[j] = prod_low;
+	} while (++j);
+
+	return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c
new file mode 100644
index 000000000000..ffa328818ca6
--- /dev/null
+++ b/lib/mpi/generic_mpih-rshift.c
@@ -0,0 +1,63 @@
+/* mpih-rshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+
+mpi_limb_t
+mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+	mpi_limb_t high_limb, low_limb;
+	unsigned sh_1, sh_2;
+	mpi_size_t i;
+	mpi_limb_t retval;
+
+	sh_1 = cnt;
+	wp -= 1;
+	sh_2 = BITS_PER_MPI_LIMB - sh_1;
+	high_limb = up[0];
+	retval = high_limb << sh_2;
+	low_limb = high_limb;
+	for (i = 1; i < usize; i++) {
+		high_limb = up[i];
+		wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+		low_limb = high_limb;
+	}
+	wp[i] = low_limb >> sh_1;
+
+	return retval;
+}
diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c
new file mode 100644
index 000000000000..5d98ab7d6853
--- /dev/null
+++ b/lib/mpi/generic_mpih-sub1.c
@@ -0,0 +1,60 @@
+/* mpihelp-add_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+	mpi_limb_t x, y, cy;
+	mpi_size_t j;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	   the loop becomes faster.  */
+	j = -size;
+
+	/* Offset the base pointers to compensate for the negative indices.  */
+	s1_ptr -= j;
+	s2_ptr -= j;
+	res_ptr -= j;
+
+	cy = 0;
+	do {
+		y = s2_ptr[j];
+		x = s1_ptr[j];
+		y += cy;	/* add previous carry to subtrahend */
+		cy = y < cy;	/* get out carry from that addition */
+		y = x - y;	/* main subtract */
+		cy += y > x;	/* get out carry from the subtract, combine */
+		res_ptr[j] = y;
+	} while (++j);
+
+	return cy;
+}
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
new file mode 100644
index 000000000000..29f98624ef93
--- /dev/null
+++ b/lib/mpi/longlong.h
@@ -0,0 +1,1500 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ * Note: I added some stuff for use with gnupg
+ *
+ * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
+ *	2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this file; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+ *
+ * UWtype -- An unsigned type, default type for operations (typically a "word")
+ * UHWtype -- An unsigned type, at least half the size of UWtype.
+ * UDWtype -- An unsigned type, at least twice as large a UWtype
+ * W_TYPE_SIZE -- size in bits of UWtype
+ *
+ * SItype, USItype -- Signed and unsigned 32 bit types.
+ * DItype, UDItype -- Signed and unsigned 64 bit types.
+ *
+ * On a 32 bit machine UWtype should typically be USItype;
+ * on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+	that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+/* Define auxiliary asm macros.
+ *
+ * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ *
+ * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ * UDWtype product.  This is just a variant of umul_ppmm.
+
+ * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+ * is rounded towards 0.
+ *
+ * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * msb to the first non-zero bit in the UWtype X.  This is the number of
+ * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+ * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ *
+ * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * from the least significant end.
+ *
+ * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ *
+ * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ *
+ * If any of these macros are left undefined for a particular CPU,
+ * C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+ *
+ * Please add support for more CPUs here, or improve the current support
+ * for the CPUs below!	*/
+
+#if defined(__GNUC__) && !defined(NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+	understood by gcc1.	Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+/***************************************
+	**************  A29K  *****************
+	***************************************/
+#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %1,%4,%5\n" \
+		"addc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %1,%4,%5\n" \
+		"subc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+		USItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("multiplu %0,%1,%2" \
+		: "=r" ((USItype)(xl)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+		__asm__ ("multmu %0,%1,%2" \
+		: "=r" ((USItype)(xh)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("dividu %0,%3,%4" \
+	: "=r" ((USItype)(q)), \
+		"=q" ((USItype)(r)) \
+	: "1" ((USItype)(n1)), \
+		"r" ((USItype)(n0)), \
+		"r" ((USItype)(d)))
+
+#define count_leading_zeros(count, x) \
+	__asm__ ("clz %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined(__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+		UDItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("umulh %r1,%2,%0" \
+		: "=r" ((UDItype) ph) \
+		: "%rJ" (__m0), \
+			"rI" (__m1)); \
+		(pl) = __m0 * __m1; \
+	} while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { UDItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern UDItype __udiv_qrnnd();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+
+/***************************************
+	**************  ARM  ******************
+	***************************************/
+#if defined(__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("adds %1, %4, %5\n" \
+		"adc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subs %1, %4, %5\n" \
+		"sbc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"mov	%|r0, %2, lsr #16		@ AAAA\n" \
+		"mov	%|r2, %3, lsr #16		@ BBBB\n" \
+		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \
+		"bic	%0, %3, %|r2, lsl #16		@ bbbb\n" \
+		"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n" \
+		"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n" \
+		"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n" \
+		"mul	%0, %|r0, %0			@ AAAA * bbbb\n" \
+		"adds	%|r0, %1, %0			@ central sum\n" \
+		"addcs	%|r2, %|r2, #65536\n" \
+		"adds	%1, %|r1, %|r0, lsl #16\n" \
+		"adc	%0, %|r2, %|r0, lsr #16" \
+	: "=&r" ((USItype)(xh)), \
+		"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+		"r" ((USItype)(b)) \
+	: "r0", "r1", "r2")
+#else
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"umull %r1, %r0, %r2, %r3" \
+	: "=&r" ((USItype)(xh)), \
+			"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+			"r" ((USItype)(b)) \
+	: "r0", "r1")
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+/***************************************
+	**************  CLIPPER  **************
+	***************************************/
+#if defined(__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+		struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define smul_ppmm(w1, w0, u, v) \
+	({union {DItype __ll; \
+		struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwx %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((SItype)(u)), \
+		"r" ((SItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__w) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	__w; })
+#endif /* __clipper__ */
+
+/***************************************
+	**************  GMICRO  ***************
+	***************************************/
+#if defined(__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add.w %5,%1\n" \
+		"addx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"%1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub.w %5,%1\n" \
+		"subx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+	__asm__ ("mulx %3,%0,%1" \
+	: "=g" ((USItype)(ph)), \
+		"=r" ((USItype)(pl)) \
+	: "%0" ((USItype)(m0)), \
+		"g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("divx %4,%0,%1" \
+	: "=g" ((USItype)(q)), \
+		"=r" ((USItype)(r)) \
+	: "1" ((USItype)(nh)), \
+		"0" ((USItype)(nl)), \
+		"g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bsch/1 %1,%0" \
+	: "=g" (count) \
+	: "g" ((USItype)(x)), \
+	     "0" ((USItype)0))
+#endif
+
+/***************************************
+	**************  HPPA  *****************
+	***************************************/
+#if defined(__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %4,%5,%1\n" \
+		   "addc %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "%rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %4,%5,%1\n" \
+	   "subb %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#if defined(_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("xmpyu %1,%2,%0" \
+	: "=*f" (__xx.__ll) \
+	: "*f" ((USItype)(u)), \
+	       "*f" ((USItype)(v))); \
+	(wh) = __xx.__i.__h; \
+	(wl) = __xx.__i.__l; \
+} while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern USItype __udiv_qrnnd();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __tmp; \
+	__asm__ ( \
+	"ldi             1,%0\n" \
+	"extru,=	%1,15,16,%%r0  ; Bits 31..16 zero?\n" \
+	"extru,tr	%1,15,16,%1    ; No.  Shift down, skip add.\n" \
+	"ldo		16(%0),%0      ; Yes.	Perform add.\n" \
+	"extru,=	%1,23,8,%%r0   ; Bits 15..8 zero?\n" \
+	"extru,tr	%1,23,8,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		8(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,27,4,%%r0   ; Bits 7..4 zero?\n" \
+	"extru,tr	%1,27,4,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		4(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,29,2,%%r0   ; Bits 3..2 zero?\n" \
+	"extru,tr	%1,29,2,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		2(%0),%0       ; Yes.	Perform add.\n" \
+	"extru		%1,30,1,%1     ; Extract bit 1.\n" \
+	"sub		%0,%1,%0       ; Subtract it.              " \
+	: "=r" (count), "=r" (__tmp) : "1" (x)); \
+} while (0)
+#endif /* hppa */
+
+/***************************************
+	**************  I370  *****************
+	***************************************/
+#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	       "r" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	     + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (m0), \
+	       "r" (m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("dr %0,%2" \
+	: "=r" (__xx.__ll) \
+	: "0" (__xx.__ll), "r" (d)); \
+	(q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+} while (0)
+#endif
+
+/***************************************
+	**************  I386  *****************
+	***************************************/
+#undef __i386__
+#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl %5,%1\n" \
+	   "adcl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl %5,%1\n" \
+	   "sbbl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mull %3" \
+	: "=a" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divl %4" \
+	: "=a" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("bsrl %1,%0" \
+	: "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define count_trailing_zeros(count, x) \
+	__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+/***************************************
+	**************  I860  *****************
+	***************************************/
+#if defined(__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r, h, l, c) \
+	__asm__ ("shr %3,r0,r0\n" \
+	"shrd %1,%2,%0" \
+	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+/***************************************
+	**************  I960  *****************
+	***************************************/
+#if defined(__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 1,0\n" \
+	"addc %5,%4,%1\n" \
+	"addc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "%dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 0,0\n" \
+	"subc %5,%4,%1\n" \
+	"subc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("emul        %2,%1,%0" \
+	: "=d" (__xx.__ll) \
+	: "%dI" ((USItype)(u)), \
+	     "dI" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("emul      %2,%1,%0" \
+	: "=d" (__w) \
+	: "%dI" ((USItype)(u)), \
+	       "dI" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+	__asm__ ("ediv %d,%n,%0" \
+	: "=d" (__rq.__ll) \
+	: "dI" (__nn.__ll), \
+	     "dI" ((USItype)(d))); \
+	(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+} while (0)
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("scanbit %1,%0" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 (-32)	/* sic */
+#if defined(__i960mx)		/* what is the proper symbol to test??? */
+#define rshift_rhlc(r, h, l, c) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (h); __nn.__i.__l = (l); \
+	__asm__ ("shre %2,%1,%0" \
+	: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+}
+#endif /* i960mx */
+#endif /* i960 */
+
+/***************************************
+	**************  68000	****************
+	***************************************/
+#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add%.l %5,%1\n" \
+	   "addx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub%.l %5,%1\n" \
+	   "subx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mulu%.l %3,%1:%0" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divu%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divs%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bfffo %1{%b2:%b2},%0" \
+	: "=d" ((USItype)(count)) \
+	: "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+do { USItype __umul_tmp1, __umul_tmp2; \
+	__asm__ ("| Inlined umul_ppmm\n" \
+	"move%.l %5,%3\n" \
+	"move%.l %2,%0\n" \
+	"move%.w %3,%1\n" \
+	"swap	%3\n" \
+	"swap	%0\n" \
+	"mulu	%2,%1\n" \
+	"mulu	%3,%0\n" \
+	"mulu	%2,%3\n" \
+	"swap	%2\n" \
+	"mulu	%5,%2\n" \
+	"add%.l	%3,%2\n" \
+	"jcc	1f\n" \
+	"add%.l	%#0x10000,%0\n" \
+	"1:	move%.l %2,%3\n" \
+	"clr%.w	%2\n" \
+	"swap	%2\n" \
+	"swap	%3\n" \
+	"clr%.w	%3\n" \
+	"add%.l	%3,%1\n" \
+	"addx%.l %2,%0\n" \
+	"| End inlined umul_ppmm" \
+	: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+		"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+	: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+} while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+
+/***************************************
+	**************  88000	****************
+	***************************************/
+#if defined(__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addu.co %1,%r4,%r5\n" \
+	   "addu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "%rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subu.co %1,%r4,%r5\n" \
+	   "subu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("ff1 %0,%1" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 63	/* sic */
+#if defined(__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x; \
+	__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+	(wh) = __x.__i.__h; \
+	(wl) = __x.__i.__l; \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x, __q; \
+	__x.__i.__h = (n1); __x.__i.__l = (n0); \
+	__asm__ ("divu.d %0,%1,%2" \
+	: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+	(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+/***************************************
+	**************  MIPS  *****************
+	***************************************/
+#if defined(__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3" \
+	: "=l" ((USItype)(w0)), \
+	     "=h" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+
+/***************************************
+	**************  MIPS/64  **************
+	***************************************/
+#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3" \
+	: "=l" ((UDItype)(w0)), \
+	     "=h" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((UDItype)(w0)), \
+	     "=d" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+
+/***************************************
+	**************  32000	****************
+	***************************************/
+#if defined(__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+	     "g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__w) \
+	: "%0" ((USItype)(u)), \
+	       "g" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+	__asm__ ("deid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "0" (__xx.__ll), \
+	     "g" ((USItype)(d))); \
+	(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count, x) \
+do { \
+	__asm__("ffsd      %2,%0" \
+	: "=r"((USItype) (count)) \
+	: "0"((USItype) 0), "r"((USItype) (x))); \
+	} while (0)
+#endif /* __ns32000__ */
+
+/***************************************
+	**************  PPC  ******************
+	***************************************/
+#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else \
+		__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+} while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(ah) && (ah) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else \
+		__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+} while (0)
+#define count_leading_zeros(count, x) \
+	__asm__ ("{cntlz|cntlzw} %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined(_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhwu %0,%1,%2" \
+	: "=r" ((USItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+do { \
+	SItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhw %0,%1,%2" \
+	: "=r" ((SItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((USItype)(xh)), \
+	"=q" ((USItype)(xl)) \
+	: "r" (__m0), \
+	"r" (__m1)); \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((SItype)(xh)), \
+	"=q" ((SItype)(xl)) \
+	: "r" (m0), \
+	"r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("div %0,%2,%4" \
+	: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+	: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants.  */
+
+/***************************************
+	**************  PYR  ******************
+	***************************************/
+#if defined(__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addw        %5,%1\n" \
+	"addwc	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subw        %5,%1\n" \
+	"subwb	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+	/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("movw %1,%R0\n" \
+	"uemul %2,%0" \
+	: "=&r" (__xx.__ll) \
+	: "g" ((USItype) (u)), \
+	"g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#endif /* __pyr__ */
+
+/***************************************
+	**************  RT/ROMP  **************
+	***************************************/
+#if defined(__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("a %1,%5\n" \
+	"ae %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("s %1,%5\n" \
+	"se %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ( \
+	"s       r2,r2\n" \
+	"mts	r10,%2\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"cas	%0,r2,r0\n" \
+	"mfs	r10,%1" \
+	: "=r" ((USItype)(ph)), \
+	"=r" ((USItype)(pl)) \
+	: "%r" (__m0), \
+	"r" (__m1) \
+	: "r2"); \
+	(ph) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+do { \
+	if ((x) >= 0x10000) \
+		__asm__ ("clz     %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x) >> 16)); \
+	else { \
+		__asm__ ("clz   %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x))); \
+		(count) += 16; \
+	} \
+} while (0)
+#endif /* RT/ROMP */
+
+/***************************************
+	**************  SH2  ******************
+	***************************************/
+#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
+	&& W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ( \
+	"dmulu.l %2,%3\n" \
+	"sts	macl,%1\n" \
+	"sts	mach,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+/***************************************
+	**************  SPARC	****************
+	***************************************/
+#if defined(__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addcc %r4,%5,%1\n" \
+	"addx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"%rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subcc %r4,%5,%1\n" \
+	"subx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#if defined(__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+	2) the 'I' flag thinks of the range as a 13 bit signed interval,
+	while we want to match a 13 bit interval, sign extended to 32 bits,
+	but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC		/* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	USItype __q; \
+	__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+	: "=r" ((USItype)(__q)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"r" ((USItype)(d))); \
+	(r) = (n0) - __q * (d); \
+	(q) = __q; \
+} while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined(__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+	instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("! Inlined udiv_qrnnd\n" \
+	"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n" \
+	"tst	%%g0\n" \
+	"divscc	%3,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%0\n" \
+	"rd	%%y,%1\n" \
+	"bl,a 1f\n" \
+	"add	%1,%4,%1\n" \
+	"1:	! End of inline udiv_qrnnd" \
+	: "=r" ((USItype)(q)), \
+	"=r" ((USItype)(r)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"rI" ((USItype)(d)) \
+	: "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+	__asm__ ("scan %1,0,%0" \
+	: "=r" ((USItype)(x)) \
+	: "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+	implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+	undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+	/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("! Inlined umul_ppmm\n" \
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
+	"sra	%3,31,%%g2	! Don't move this insn\n" \
+	"and	%2,%%g2,%%g2	! Don't move this insn\n" \
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,0,%%g1\n" \
+	"add	%%g1,%%g2,%0\n" \
+	"rd	%%y,%1" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "%rI" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n\t"					\
+	   "mov	32,%%g1\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "1:	bcs	5f\n\t"						\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "2:	bne	1b\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "bcs	3f\n\t"							\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "b		3f\n\t"						\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "4:	sub	%1,%2,%1\n\t"					\
+	   "5:	addxcc	%1,%1,%1\n\t"					\
+	   "bcc	2b\n\t"							\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
+	   "bne	4b\n\t"							\
+	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \
+	   "sub	%1,%2,%1\n\t"						\
+	   "3:	xnor	%0,0,%0\n\t"					\
+	   "! End of inline udiv_qrnnd\n"				\
+	   : "=&r" ((USItype)(q)),					\
+	     "=&r" ((USItype)(r))					\
+	   : "r" ((USItype)(d)),					\
+	     "1" ((USItype)(n1)),					\
+	     "0" ((USItype)(n0)) : "%g1", "cc")
+#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
+#endif
+#endif /* __sparc__ */
+
+/***************************************
+	**************  VAX  ******************
+	***************************************/
+#if defined(__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl2 %5,%1\n" \
+	"adwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl2 %5,%1\n" \
+	"sbwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __l, __h; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("emul %1,%2,$0,%0" \
+	: "=g" (__xx.__ll) \
+	: "g" (__m0), \
+	"g" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("ediv %3,%2,%0,%1" \
+	: "=g" (q), "=g" (r) \
+	: "g" (__xx.__ll), "g" (d)); \
+} while (0)
+#endif /* __vax__ */
+
+/***************************************
+	**************  Z8000	****************
+	***************************************/
+#if defined(__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "%0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"%1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {long int __ll; \
+	struct {unsigned int __h, __l; } __i; \
+	} __xx; \
+	unsigned int __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mult      %S0,%H3" \
+	: "=r" (__xx.__i.__h), \
+	"=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	"rQR" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((signed int) __m0 >> 15) & __m1) \
+	+ (((signed int) __m1 >> 15) & __m0)); \
+} while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/***************************************
+	***********  Generic Versions	********
+	***************************************/
+#if !defined(umul_ppmm) && defined(__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+{ \
+	UDWtype __ll = __umulsidi3(m0, m1); \
+	ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+	pl = (UWtype) __ll; \
+}
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) \
+	({UWtype __hi, __lo; \
+	umul_ppmm(__hi, __lo, u, v); \
+	((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+	/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined(add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) + (bl); \
+	(sh) = (ah) + (bh) + (__x < (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) - (bl); \
+	(sh) = (ah) - (bh) - (__x > (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __x0, __x1, __x2, __x3; \
+	UHWtype __ul, __vl, __uh, __vh; \
+	UWtype __u = (u), __v = (v); \
+	\
+	__ul = __ll_lowpart(__u); \
+	__uh = __ll_highpart(__u); \
+	__vl = __ll_lowpart(__v); \
+	__vh = __ll_highpart(__v); \
+	\
+	__x0 = (UWtype) __ul * __vl; \
+	__x1 = (UWtype) __ul * __vh; \
+	__x2 = (UWtype) __uh * __vl; \
+	__x3 = (UWtype) __uh * __vh; \
+	\
+	__x1 += __ll_highpart(__x0);/* this can't give carry */ \
+	__x1 += __x2;		/* but this indeed can */ \
+	if (__x1 < __x2)		/* did we get it? */ \
+	__x3 += __ll_B;		/* yes, add it in the proper pos. */ \
+	\
+	(w1) = __x3 + __ll_highpart(__x1); \
+	(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __w1; \
+	UWtype __m0 = (u), __m1 = (v); \
+	umul_ppmm(__w1, w0, __m0, __m1); \
+	(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+	- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+} while (0)
+#endif
+
+	/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+do { \
+	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+	__d1 = __ll_highpart(d); \
+	__d0 = __ll_lowpart(d); \
+	\
+	__r1 = (n1) % __d1; \
+	__q1 = (n1) / __d1; \
+	__m = (UWtype) __q1 * __d0; \
+	__r1 = __r1 * __ll_B | __ll_highpart(n0); \
+	if (__r1 < __m) { \
+		__q1--, __r1 += (d); \
+		if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+		if (__r1 < __m) \
+			__q1--, __r1 += (d); \
+	} \
+	__r1 -= __m; \
+	\
+	__r0 = __r1 % __d1; \
+	__q0 = __r1 / __d1; \
+	__m = (UWtype) __q0 * __d0; \
+	__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+	if (__r0 < __m) { \
+		__q0--, __r0 += (d); \
+		if (__r0 >= (d)) \
+			if (__r0 < __m) \
+				__q0--, __r0 += (d); \
+	} \
+	__r0 -= __m; \
+	\
+	(q) = (UWtype) __q1 * __ll_B | __q0; \
+	(r) = __r0; \
+} while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+	__udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	UWtype __r; \
+	(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+	(r) = __r; \
+} while (0)
+#endif
+
+	/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#undef count_leading_zeros
+#if !defined(count_leading_zeros)
+	extern
+#ifdef __STDC__
+			const
+#endif
+			unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+do { \
+	UWtype __xr = (x); \
+	UWtype __a; \
+	\
+	if (W_TYPE_SIZE <= 32) { \
+		__a = __xr < ((UWtype) 1 << 2*__BITS4) \
+		? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+		: (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
+	} \
+	else { \
+		for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+			if (((__xr >> __a) & 0xff) != 0) \
+				break; \
+	} \
+	\
+	(count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+} while (0)
+	/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined(count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+	defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+do { \
+	UWtype __ctz_x = (x); \
+	UWtype __ctz_c; \
+	count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
+	(count) = W_TYPE_SIZE - 1 - __ctz_c; \
+} while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c
new file mode 100644
index 000000000000..f56b9ba295e6
--- /dev/null
+++ b/lib/mpi/mpi-add.c
@@ -0,0 +1,234 @@
+/* mpi-add.c  -  MPI functions
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+int mpi_add_ui(MPI w, const MPI u, unsigned long v)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize, wsize;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+	wsign = 0;
+
+	/* If not space for W (and possible carry), increase space.  */
+	wsize = usize + 1;
+	if (w->alloced < wsize)
+		if (mpi_resize(w, wsize) < 0)
+			return -ENOMEM;
+
+	/* These must be after realloc (U may be the same as W).  */
+	up = u->d;
+	wp = w->d;
+
+	if (!usize) {		/* simple */
+		wp[0] = v;
+		wsize = v ? 1 : 0;
+	} else if (!usign) {	/* mpi is not negative */
+		mpi_limb_t cy;
+		cy = mpihelp_add_1(wp, up, usize, v);
+		wp[usize] = cy;
+		wsize = usize + cy;
+	} else {		/* The signs are different.  Need exact comparison to determine
+				 * which operand to subtract from which.  */
+		if (usize == 1 && up[0] < v) {
+			wp[0] = v - up[0];
+			wsize = 1;
+		} else {
+			mpihelp_sub_1(wp, up, usize, v);
+			/* Size can decrease with at most one limb. */
+			wsize = usize - (wp[usize - 1] == 0);
+			wsign = 1;
+		}
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+int mpi_add(MPI w, MPI u, MPI v)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t usize, vsize, wsize;
+	int usign, vsign, wsign;
+
+	if (u->nlimbs < v->nlimbs) {	/* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		wsize = usize + 1;
+		if (RESIZE_IF_NEEDED(w, wsize) < 0)
+			return -ENOMEM;
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = v->d;
+		vp = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		wsize = usize + 1;
+		if (RESIZE_IF_NEEDED(w, wsize) < 0)
+			return -ENOMEM;
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = u->d;
+		vp = v->d;
+	}
+	wp = w->d;
+	wsign = 0;
+
+	if (!vsize) {		/* simple */
+		MPN_COPY(wp, up, usize);
+		wsize = usize;
+		wsign = usign;
+	} else if (usign != vsign) {	/* different sign */
+		/* This test is right since USIZE >= VSIZE */
+		if (usize != vsize) {
+			mpihelp_sub(wp, up, usize, vp, vsize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			wsign = usign;
+		} else if (mpihelp_cmp(up, vp, usize) < 0) {
+			mpihelp_sub_n(wp, vp, up, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (!usign)
+				wsign = 1;
+		} else {
+			mpihelp_sub_n(wp, up, vp, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (usign)
+				wsign = 1;
+		}
+	} else {		/* U and V have same sign. Add them. */
+		mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+		wp[usize] = cy;
+		wsize = usize + cy;
+		if (usign)
+			wsign = 1;
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+/****************
+ * Subtract the unsigned integer V from the mpi-integer U and store the
+ * result in W.
+ */
+int mpi_sub_ui(MPI w, MPI u, unsigned long v)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize, wsize;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+	wsign = 0;
+
+	/* If not space for W (and possible carry), increase space.  */
+	wsize = usize + 1;
+	if (w->alloced < wsize)
+		if (mpi_resize(w, wsize) < 0)
+			return -ENOMEM;
+
+	/* These must be after realloc (U may be the same as W).  */
+	up = u->d;
+	wp = w->d;
+
+	if (!usize) {		/* simple */
+		wp[0] = v;
+		wsize = v ? 1 : 0;
+		wsign = 1;
+	} else if (usign) {	/* mpi and v are negative */
+		mpi_limb_t cy;
+		cy = mpihelp_add_1(wp, up, usize, v);
+		wp[usize] = cy;
+		wsize = usize + cy;
+	} else {		/* The signs are different.  Need exact comparison to determine
+				 * which operand to subtract from which.  */
+		if (usize == 1 && up[0] < v) {
+			wp[0] = v - up[0];
+			wsize = 1;
+			wsign = 1;
+		} else {
+			mpihelp_sub_1(wp, up, usize, v);
+			/* Size can decrease with at most one limb. */
+			wsize = usize - (wp[usize - 1] == 0);
+		}
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+int mpi_sub(MPI w, MPI u, MPI v)
+{
+	int rc;
+
+	if (w == v) {
+		MPI vv;
+		if (mpi_copy(&vv, v) < 0)
+			return -ENOMEM;
+		vv->sign = !vv->sign;
+		rc = mpi_add(w, u, vv);
+		mpi_free(vv);
+	} else {
+		/* fixme: this is not thread-save (we temp. modify v) */
+		v->sign = !v->sign;
+		rc = mpi_add(w, u, v);
+		v->sign = !v->sign;
+	}
+	return rc;
+}
+
+int mpi_addm(MPI w, MPI u, MPI v, MPI m)
+{
+	if (mpi_add(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0)
+		return -ENOMEM;
+	return 0;
+}
+
+int mpi_subm(MPI w, MPI u, MPI v, MPI m)
+{
+	if (mpi_sub(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0)
+		return -ENOMEM;
+	return 0;
+}
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
new file mode 100644
index 000000000000..2f526627e4f5
--- /dev/null
+++ b/lib/mpi/mpi-bit.c
@@ -0,0 +1,217 @@
+/* mpi-bit.c  -  MPI bit level fucntions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#define A_LIMB_1 ((mpi_limb_t) 1)
+
+/****************
+ * Sometimes we have MSL (most significant limbs) which are 0;
+ * this is for some reasons not good, so this function removes them.
+ */
+void mpi_normalize(MPI a)
+{
+	for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
+		;
+}
+
+/****************
+ * Return the number of bits in A.
+ */
+unsigned mpi_get_nbits(MPI a)
+{
+	unsigned n;
+
+	mpi_normalize(a);
+
+	if (a->nlimbs) {
+		mpi_limb_t alimb = a->d[a->nlimbs - 1];
+		if (alimb)
+			count_leading_zeros(n, alimb);
+		else
+			n = BITS_PER_MPI_LIMB;
+		n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB;
+	} else
+		n = 0;
+	return n;
+}
+EXPORT_SYMBOL_GPL(mpi_get_nbits);
+
+/****************
+ * Test whether bit N is set.
+ */
+int mpi_test_bit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+	mpi_limb_t limb;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return 0;	/* too far left: this is a 0 */
+	limb = a->d[limbno];
+	return (limb & (A_LIMB_1 << bitno)) ? 1 : 0;
+}
+
+/****************
+ * Set bit N of A.
+ */
+int mpi_set_bit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {	/* resize */
+		if (a->alloced >= limbno)
+			if (mpi_resize(a, limbno + 1) < 0)
+				return -ENOMEM;
+		a->nlimbs = limbno + 1;
+	}
+	a->d[limbno] |= (A_LIMB_1 << bitno);
+	return 0;
+}
+
+/****************
+ * Set bit N of A. and clear all bits above
+ */
+int mpi_set_highbit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {	/* resize */
+		if (a->alloced >= limbno)
+			if (mpi_resize(a, limbno + 1) < 0)
+				return -ENOMEM;
+		a->nlimbs = limbno + 1;
+	}
+	a->d[limbno] |= (A_LIMB_1 << bitno);
+	for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno + 1;
+	return 0;
+}
+
+/****************
+ * clear bit N of A and all bits above
+ */
+void mpi_clear_highbit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return;		/* not allocated, so need to clear bits :-) */
+
+	for (; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno + 1;
+}
+
+/****************
+ * Clear bit N of A.
+ */
+void mpi_clear_bit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return;		/* don't need to clear this bit, it's to far to left */
+	a->d[limbno] &= ~(A_LIMB_1 << bitno);
+}
+
+/****************
+ * Shift A by N bits to the right
+ * FIXME: should use alloc_limb if X and A are same.
+ */
+int mpi_rshift(MPI x, MPI a, unsigned n)
+{
+	mpi_ptr_t xp;
+	mpi_size_t xsize;
+
+	xsize = a->nlimbs;
+	x->sign = a->sign;
+	if (RESIZE_IF_NEEDED(x, (size_t) xsize) < 0)
+		return -ENOMEM;
+	xp = x->d;
+
+	if (xsize) {
+		mpihelp_rshift(xp, a->d, xsize, n);
+		MPN_NORMALIZE(xp, xsize);
+	}
+	x->nlimbs = xsize;
+	return 0;
+}
+
+/****************
+ * Shift A by COUNT limbs to the left
+ * This is used only within the MPI library
+ */
+int mpi_lshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap = a->d;
+	int n = a->nlimbs;
+	int i;
+
+	if (!count || !n)
+		return 0;
+
+	if (RESIZE_IF_NEEDED(a, n + count) < 0)
+		return -ENOMEM;
+
+	for (i = n - 1; i >= 0; i--)
+		ap[i + count] = ap[i];
+	for (i = 0; i < count; i++)
+		ap[i] = 0;
+	a->nlimbs += count;
+	return 0;
+}
+
+/****************
+ * Shift A by COUNT limbs to the right
+ * This is used only within the MPI library
+ */
+void mpi_rshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap = a->d;
+	mpi_size_t n = a->nlimbs;
+	unsigned int i;
+
+	if (count >= n) {
+		a->nlimbs = 0;
+		return;
+	}
+
+	for (i = 0; i < n - count; i++)
+		ap[i] = ap[i + count];
+	ap[i] = 0;
+	a->nlimbs -= count;
+}
diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c
new file mode 100644
index 000000000000..914bc42a8a80
--- /dev/null
+++ b/lib/mpi/mpi-cmp.c
@@ -0,0 +1,68 @@
+/* mpi-cmp.c  -  MPI functions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+int mpi_cmp_ui(MPI u, unsigned long v)
+{
+	mpi_limb_t limb = v;
+
+	mpi_normalize(u);
+	if (!u->nlimbs && !limb)
+		return 0;
+	if (u->sign)
+		return -1;
+	if (u->nlimbs > 1)
+		return 1;
+
+	if (u->d[0] == limb)
+		return 0;
+	else if (u->d[0] > limb)
+		return 1;
+	else
+		return -1;
+}
+
+int mpi_cmp(MPI u, MPI v)
+{
+	mpi_size_t usize, vsize;
+	int cmp;
+
+	mpi_normalize(u);
+	mpi_normalize(v);
+	usize = u->nlimbs;
+	vsize = v->nlimbs;
+	if (!u->sign && v->sign)
+		return 1;
+	if (u->sign && !v->sign)
+		return -1;
+	if (usize != vsize && !u->sign && !v->sign)
+		return usize - vsize;
+	if (usize != vsize && u->sign && v->sign)
+		return vsize + usize;
+	if (!usize)
+		return 0;
+	cmp = mpihelp_cmp(u->d, v->d, usize);
+	if (!cmp)
+		return 0;
+	if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0))
+		return 1;
+	return -1;
+}
diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c
new file mode 100644
index 000000000000..f68cbbb4d4a4
--- /dev/null
+++ b/lib/mpi/mpi-div.c
@@ -0,0 +1,338 @@
+/* mpi-div.c  -  MPI functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
+{
+	int rc = -ENOMEM;
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	/* We need the original value of the divisor after the remainder has been
+	 * preliminary calculated.      We have to copy it to temporary space if it's
+	 * the same variable as REM.  */
+	if (rem == divisor) {
+		if (mpi_copy(&temp_divisor, divisor) < 0)
+			goto nomem;
+		divisor = temp_divisor;
+	}
+
+	if (mpi_tdiv_qr(NULL, rem, dividend, divisor) < 0)
+		goto nomem;
+	if (((divisor_sign ? 1 : 0) ^ (dividend->sign ? 1 : 0)) && rem->nlimbs)
+		if (mpi_add(rem, rem, divisor) < 0)
+			goto nomem;
+
+	rc = 0;
+
+nomem:
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+	return rc;
+}
+
+/****************
+ * Division rounding the quotient towards -infinity.
+ * The remainder gets the same sign as the denominator.
+ * rem is optional
+ */
+
+ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor)
+{
+	mpi_limb_t rlimb;
+
+	rlimb = mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor);
+	if (rlimb && dividend->sign)
+		rlimb = divisor - rlimb;
+
+	if (rem) {
+		rem->d[0] = rlimb;
+		rem->nlimbs = rlimb ? 1 : 0;
+	}
+	return rlimb;
+}
+
+int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor)
+{
+	MPI tmp = mpi_alloc(mpi_get_nlimbs(quot));
+	if (!tmp)
+		return -ENOMEM;
+	mpi_fdiv_qr(quot, tmp, dividend, divisor);
+	mpi_free(tmp);
+	return 0;
+}
+
+int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
+{
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	if (quot == divisor || rem == divisor) {
+		if (mpi_copy(&temp_divisor, divisor) < 0)
+			return -ENOMEM;
+		divisor = temp_divisor;
+	}
+
+	if (mpi_tdiv_qr(quot, rem, dividend, divisor) < 0)
+		goto nomem;
+
+	if ((divisor_sign ^ dividend->sign) && rem->nlimbs) {
+		if (mpi_sub_ui(quot, quot, 1) < 0)
+			goto nomem;
+		if (mpi_add(rem, rem, divisor) < 0)
+			goto nomem;
+	}
+
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+
+	return 0;
+
+nomem:
+	mpi_free(temp_divisor);
+	return -ENOMEM;
+}
+
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ *   i.e no extra storage should be allocated.
+ */
+
+int mpi_tdiv_r(MPI rem, MPI num, MPI den)
+{
+	return mpi_tdiv_qr(NULL, rem, num, den);
+}
+
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
+{
+	int rc = -ENOMEM;
+	mpi_ptr_t np, dp;
+	mpi_ptr_t qp, rp;
+	mpi_size_t nsize = num->nlimbs;
+	mpi_size_t dsize = den->nlimbs;
+	mpi_size_t qsize, rsize;
+	mpi_size_t sign_remainder = num->sign;
+	mpi_size_t sign_quotient = num->sign ^ den->sign;
+	unsigned normalization_steps;
+	mpi_limb_t q_limb;
+	mpi_ptr_t marker[5];
+	int markidx = 0;
+
+	if (!dsize)
+		return -EINVAL;
+
+	memset(marker, 0, sizeof(marker));
+
+	/* Ensure space is enough for quotient and remainder.
+	 * We need space for an extra limb in the remainder, because it's
+	 * up-shifted (normalized) below.  */
+	rsize = nsize + 1;
+	if (mpi_resize(rem, rsize) < 0)
+		goto nomem;
+
+	qsize = rsize - dsize;	/* qsize cannot be bigger than this.  */
+	if (qsize <= 0) {
+		if (num != rem) {
+			rem->nlimbs = num->nlimbs;
+			rem->sign = num->sign;
+			MPN_COPY(rem->d, num->d, nsize);
+		}
+		if (quot) {
+			/* This needs to follow the assignment to rem, in case the
+			 * numerator and quotient are the same.  */
+			quot->nlimbs = 0;
+			quot->sign = 0;
+		}
+		return 0;
+	}
+
+	if (quot)
+		if (mpi_resize(quot, qsize) < 0)
+			goto nomem;
+
+	/* Read pointers here, when reallocation is finished.  */
+	np = num->d;
+	dp = den->d;
+	rp = rem->d;
+
+	/* Optimize division by a single-limb divisor.  */
+	if (dsize == 1) {
+		mpi_limb_t rlimb;
+		if (quot) {
+			qp = quot->d;
+			rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]);
+			qsize -= qp[qsize - 1] == 0;
+			quot->nlimbs = qsize;
+			quot->sign = sign_quotient;
+		} else
+			rlimb = mpihelp_mod_1(np, nsize, dp[0]);
+		rp[0] = rlimb;
+		rsize = rlimb != 0 ? 1 : 0;
+		rem->nlimbs = rsize;
+		rem->sign = sign_remainder;
+		return 0;
+	}
+
+	if (quot) {
+		qp = quot->d;
+		/* Make sure QP and NP point to different objects.  Otherwise the
+		 * numerator would be gradually overwritten by the quotient limbs.  */
+		if (qp == np) {	/* Copy NP object to temporary space.  */
+			np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+			if (!np)
+				goto nomem;
+			MPN_COPY(np, qp, nsize);
+		}
+	} else			/* Put quotient at top of remainder. */
+		qp = rp + dsize;
+
+	count_leading_zeros(normalization_steps, dp[dsize - 1]);
+
+	/* Normalize the denominator, i.e. make its most significant bit set by
+	 * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
+	 * numerator the same number of steps (to keep the quotient the same!).
+	 */
+	if (normalization_steps) {
+		mpi_ptr_t tp;
+		mpi_limb_t nlimb;
+
+		/* Shift up the denominator setting the most significant bit of
+		 * the most significant word.  Use temporary storage not to clobber
+		 * the original contents of the denominator.  */
+		tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+		if (!tp)
+			goto nomem;
+		mpihelp_lshift(tp, dp, dsize, normalization_steps);
+		dp = tp;
+
+		/* Shift up the numerator, possibly introducing a new most
+		 * significant word.  Move the shifted numerator in the remainder
+		 * meanwhile.  */
+		nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+		if (nlimb) {
+			rp[nsize] = nlimb;
+			rsize = nsize + 1;
+		} else
+			rsize = nsize;
+	} else {
+		/* The denominator is already normalized, as required.  Copy it to
+		 * temporary space if it overlaps with the quotient or remainder.  */
+		if (dp == rp || (quot && (dp == qp))) {
+			mpi_ptr_t tp;
+
+			tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+			if (!tp)
+				goto nomem;
+			MPN_COPY(tp, dp, dsize);
+			dp = tp;
+		}
+
+		/* Move the numerator to the remainder.  */
+		if (rp != np)
+			MPN_COPY(rp, np, nsize);
+
+		rsize = nsize;
+	}
+
+	q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize);
+
+	if (quot) {
+		qsize = rsize - dsize;
+		if (q_limb) {
+			qp[qsize] = q_limb;
+			qsize += 1;
+		}
+
+		quot->nlimbs = qsize;
+		quot->sign = sign_quotient;
+	}
+
+	rsize = dsize;
+	MPN_NORMALIZE(rp, rsize);
+
+	if (normalization_steps && rsize) {
+		mpihelp_rshift(rp, rp, rsize, normalization_steps);
+		rsize -= rp[rsize - 1] == 0 ? 1 : 0;
+	}
+
+	rem->nlimbs = rsize;
+	rem->sign = sign_remainder;
+
+	rc = 0;
+nomem:
+	while (markidx)
+		mpi_free_limb_space(marker[--markidx]);
+	return rc;
+}
+
+int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count)
+{
+	mpi_size_t usize, wsize;
+	mpi_size_t limb_cnt;
+
+	usize = u->nlimbs;
+	limb_cnt = count / BITS_PER_MPI_LIMB;
+	wsize = usize - limb_cnt;
+	if (limb_cnt >= usize)
+		w->nlimbs = 0;
+	else {
+		mpi_ptr_t wp;
+		mpi_ptr_t up;
+
+		if (RESIZE_IF_NEEDED(w, wsize) < 0)
+			return -ENOMEM;
+		wp = w->d;
+		up = u->d;
+
+		count %= BITS_PER_MPI_LIMB;
+		if (count) {
+			mpihelp_rshift(wp, up + limb_cnt, wsize, count);
+			wsize -= !wp[wsize - 1];
+		} else {
+			MPN_COPY_INCR(wp, up + limb_cnt, wsize);
+		}
+
+		w->nlimbs = wsize;
+	}
+	return 0;
+}
+
+/****************
+ * Check whether dividend is divisible by divisor
+ * (note: divisor must fit into a limb)
+ */
+int mpi_divisible_ui(MPI dividend, ulong divisor)
+{
+	return !mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor);
+}
diff --git a/lib/mpi/mpi-gcd.c b/lib/mpi/mpi-gcd.c
new file mode 100644
index 000000000000..13c48aef9c4e
--- /dev/null
+++ b/lib/mpi/mpi-gcd.c
@@ -0,0 +1,59 @@
+/* mpi-gcd.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Find the greatest common divisor G of A and B.
+ * Return: true if this 1, false in all other cases
+ */
+int mpi_gcd(MPI g, const MPI xa, const MPI xb)
+{
+	MPI a = NULL, b = NULL;
+
+	if (mpi_copy(&a, xa) < 0)
+		goto nomem;
+
+	if (mpi_copy(&b, xb) < 0)
+		goto nomem;
+
+	/* TAOCP Vol II, 4.5.2, Algorithm A */
+	a->sign = 0;
+	b->sign = 0;
+	while (mpi_cmp_ui(b, 0)) {
+		if (mpi_fdiv_r(g, a, b) < 0)	/* g used as temorary variable */
+			goto nomem;
+		if (mpi_set(a, b) < 0)
+			goto nomem;
+		if (mpi_set(b, g) < 0)
+			goto nomem;
+	}
+	if (mpi_set(g, a) < 0)
+		goto nomem;
+
+	mpi_free(a);
+	mpi_free(b);
+	return !mpi_cmp_ui(g, 1);
+
+nomem:
+	mpi_free(a);
+	mpi_free(b);
+	return -ENOMEM;
+}
diff --git a/lib/mpi/mpi-inline.c b/lib/mpi/mpi-inline.c
new file mode 100644
index 000000000000..654f68aeed8b
--- /dev/null
+++ b/lib/mpi/mpi-inline.c
@@ -0,0 +1,31 @@
+/* mpi-inline.c
+ * Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* put the inline functions as real functions into the lib */
+#define G10_MPI_INLINE_DECL
+
+#include "mpi-internal.h"
+
+/* always include the header becuase it is only
+ * included by mpi-internal if __GCC__ is defined but we
+ * need it here in all cases and the above definition of
+ * of the macro allows us to do so
+ */
+#include "mpi-inline.h"
diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
new file mode 100644
index 000000000000..e2b39852b30a
--- /dev/null
+++ b/lib/mpi/mpi-inline.h
@@ -0,0 +1,122 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+
+#ifndef G10_MPI_INLINE_DECL
+#define G10_MPI_INLINE_DECL  extern inline
+#endif
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb += x;
+	*res_ptr++ = s2_limb;
+	if (s2_limb < x) {	/* sum is less than the left operand: handle carry */
+		while (--s1_size) {
+			x = *s1_ptr++ + 1;	/* add carry */
+			*res_ptr++ = x;	/* and store */
+			if (x)	/* not 0 (no overflow): we can stop */
+				goto leave;
+		}
+		return 1;	/* return carry (size of s1 to small) */
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {	/* not the same variable */
+		mpi_size_t i;	/* copy the rest */
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;		/* no carry */
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb = x - s2_limb;
+	*res_ptr++ = s2_limb;
+	if (s2_limb > x) {
+		while (--s1_size) {
+			x = *s1_ptr++;
+			*res_ptr++ = x - 1;
+			if (x)
+				goto leave;
+		}
+		return 1;
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {
+		mpi_size_t i;
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+#endif /*G10_MPI_INLINE_H */
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
new file mode 100644
index 000000000000..77adcf6bc257
--- /dev/null
+++ b/lib/mpi/mpi-internal.h
@@ -0,0 +1,261 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/mpi.h>
+#include <linux/errno.h>
+
+#define log_debug printk
+#define log_bug printk
+
+#define assert(x) \
+	do { \
+		if (!x) \
+			log_bug("failed assertion\n"); \
+	} while (0);
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 16
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+
+typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
+typedef int mpi_size_t;		/* (must be a signed type) */
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l, o) ((l) < (o) ? (l) : (o))
+#define MAX(h, i) ((h) > (i) ? (h) : (i))
+
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+	if (a->alloced < b)
+		return mpi_resize(a, b);
+	return 0;
+}
+
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+#define MPN_COPY_INCR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (d)[_i];	\
+	} while (0)
+
+#define MPN_COPY_DECR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = (n)-1; _i >= 0; _i--) \
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+	do {					\
+		int  _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = 0;		\
+	} while (0)
+
+#define MPN_NORMALIZE(d, n)  \
+	do {					\
+		while ((n) > 0) {		\
+			if ((d)[(n)-1])		\
+				break;		\
+			(n)--;			\
+		}				\
+	} while (0)
+
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+	do {				\
+		for (;;) {		\
+			if ((d)[(n)-1])	\
+				break;	\
+			(n)--;		\
+		}			\
+	} while (0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mul_n_basecase(prodp, up, vp, size);	\
+		else						\
+			mul_n(prodp, up, vp, size, tspace);	\
+	} while (0);
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+	do {								\
+		mpi_limb_t _q, _ql, _r;					\
+		mpi_limb_t _xh, _xl;					\
+		umul_ppmm(_q, _ql, (nh), (di));				\
+		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
+		umul_ppmm(_xh, _xl, _q, (d));				\
+		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
+		if (_xh) {						\
+			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
+			_q++;						\
+			if (_xh) {					\
+				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
+				_q++;					\
+			}						\
+		}							\
+		if (_r >= (d)) {					\
+			_r -= (d);					\
+			_q++;						\
+		}							\
+		(r) = _r;						\
+		(q) = _q;						\
+	} while (0)
+
+/*-- mpiutil.c --*/
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+void mpi_free_limb_space(mpi_ptr_t a);
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs(MPI a, unsigned int count);
+int mpi_lshift_limbs(MPI a, unsigned int count);
+
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+
+/*-- mpihelp-mul.c --*/
+
+struct karatsuba_ctx {
+	struct karatsuba_ctx *next;
+	mpi_ptr_t tspace;
+	mpi_size_t tspace_size;
+	mpi_ptr_t tp;
+	mpi_size_t tp_size;
+};
+
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+
+mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
+int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
+void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+		mpi_ptr_t tspace);
+
+int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+			       mpi_ptr_t up, mpi_size_t usize,
+			       mpi_ptr_t vp, mpi_size_t vsize,
+			       struct karatsuba_ctx *ctx);
+
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			 mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+			  mpi_ptr_t np, mpi_size_t nsize,
+			  mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			    mpi_limb_t divisor_limb);
+
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+typedef mpi_limb_t UWtype;
+typedef unsigned int UHWtype;
+#if defined(__GNUC__)
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#endif
+
+#ifdef __GNUC__
+#include "mpi-inline.h"
+#endif
+
+#endif /*G10_MPI_INTERNAL_H */
diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c
new file mode 100644
index 000000000000..0951f9847745
--- /dev/null
+++ b/lib/mpi/mpi-inv.c
@@ -0,0 +1,187 @@
+/* mpi-inv.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Calculate the multiplicative inverse X of A mod N
+ * That is: Find the solution x for
+ *		1 = (a*x) mod n
+ */
+int mpi_invm(MPI x, const MPI a, const MPI n)
+{
+	/* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+	 * modified according to Michael Penk's solution for Exercice 35
+	 * with further enhancement */
+	MPI u = NULL, v = NULL;
+	MPI u1 = NULL, u2 = NULL, u3 = NULL;
+	MPI v1 = NULL, v2 = NULL, v3 = NULL;
+	MPI t1 = NULL, t2 = NULL, t3 = NULL;
+	unsigned k;
+	int sign;
+	int odd = 0;
+	int rc = -ENOMEM;
+
+	if (mpi_copy(&u, a) < 0)
+		goto cleanup;
+	if (mpi_copy(&v, n) < 0)
+		goto cleanup;
+
+	for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) {
+		if (mpi_rshift(u, u, 1) < 0)
+			goto cleanup;
+		if (mpi_rshift(v, v, 1) < 0)
+			goto cleanup;
+	}
+	odd = mpi_test_bit(v, 0);
+
+	u1 = mpi_alloc_set_ui(1);
+	if (!u1)
+		goto cleanup;
+	if (!odd) {
+		u2 = mpi_alloc_set_ui(0);
+		if (!u2)
+			goto cleanup;
+	}
+	if (mpi_copy(&u3, u) < 0)
+		goto cleanup;
+	if (mpi_copy(&v1, v) < 0)
+		goto cleanup;
+	if (!odd) {
+		v2 = mpi_alloc(mpi_get_nlimbs(u));
+		if (!v2)
+			goto cleanup;
+		if (mpi_sub(v2, u1, u) < 0)
+			goto cleanup;	/* U is used as const 1 */
+	}
+	if (mpi_copy(&v3, v) < 0)
+		goto cleanup;
+	if (mpi_test_bit(u, 0)) {	/* u is odd */
+		t1 = mpi_alloc_set_ui(0);
+		if (!t1)
+			goto cleanup;
+		if (!odd) {
+			t2 = mpi_alloc_set_ui(1);
+			if (!t2)
+				goto cleanup;
+			t2->sign = 1;
+		}
+		if (mpi_copy(&t3, v) < 0)
+			goto cleanup;
+		t3->sign = !t3->sign;
+		goto Y4;
+	} else {
+		t1 = mpi_alloc_set_ui(1);
+		if (!t1)
+			goto cleanup;
+		if (!odd) {
+			t2 = mpi_alloc_set_ui(0);
+			if (!t2)
+				goto cleanup;
+		}
+		if (mpi_copy(&t3, u) < 0)
+			goto cleanup;
+	}
+	do {
+		do {
+			if (!odd) {
+				if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) {	/* one is odd */
+					if (mpi_add(t1, t1, v) < 0)
+						goto cleanup;
+					if (mpi_sub(t2, t2, u) < 0)
+						goto cleanup;
+				}
+				if (mpi_rshift(t1, t1, 1) < 0)
+					goto cleanup;
+				if (mpi_rshift(t2, t2, 1) < 0)
+					goto cleanup;
+				if (mpi_rshift(t3, t3, 1) < 0)
+					goto cleanup;
+			} else {
+				if (mpi_test_bit(t1, 0))
+					if (mpi_add(t1, t1, v) < 0)
+						goto cleanup;
+				if (mpi_rshift(t1, t1, 1) < 0)
+					goto cleanup;
+				if (mpi_rshift(t3, t3, 1) < 0)
+					goto cleanup;
+			}
+Y4:
+			;
+		} while (!mpi_test_bit(t3, 0));	/* while t3 is even */
+
+		if (!t3->sign) {
+			if (mpi_set(u1, t1) < 0)
+				goto cleanup;
+			if (!odd)
+				if (mpi_set(u2, t2) < 0)
+					goto cleanup;
+			if (mpi_set(u3, t3) < 0)
+				goto cleanup;
+		} else {
+			if (mpi_sub(v1, v, t1) < 0)
+				goto cleanup;
+			sign = u->sign;
+			u->sign = !u->sign;
+			if (!odd)
+				if (mpi_sub(v2, u, t2) < 0)
+					goto cleanup;
+			u->sign = sign;
+			sign = t3->sign;
+			t3->sign = !t3->sign;
+			if (mpi_set(v3, t3) < 0)
+				goto cleanup;
+			t3->sign = sign;
+		}
+		if (mpi_sub(t1, u1, v1) < 0)
+			goto cleanup;
+		if (!odd)
+			if (mpi_sub(t2, u2, v2) < 0)
+				goto cleanup;
+		if (mpi_sub(t3, u3, v3) < 0)
+			goto cleanup;
+		if (t1->sign) {
+			if (mpi_add(t1, t1, v) < 0)
+				goto cleanup;
+			if (!odd)
+				if (mpi_sub(t2, t2, u) < 0)
+					goto cleanup;
+		}
+	} while (mpi_cmp_ui(t3, 0));	/* while t3 != 0 */
+	/* mpi_lshift( u3, k ); */
+	rc = mpi_set(x, u1);
+
+cleanup:
+	mpi_free(u1);
+	mpi_free(v1);
+	mpi_free(t1);
+	if (!odd) {
+		mpi_free(u2);
+		mpi_free(v2);
+		mpi_free(t2);
+	}
+	mpi_free(u3);
+	mpi_free(v3);
+	mpi_free(t3);
+
+	mpi_free(u);
+	mpi_free(v);
+	return rc;
+}
diff --git a/lib/mpi/mpi-mpow.c b/lib/mpi/mpi-mpow.c
new file mode 100644
index 000000000000..7328d0d6c748
--- /dev/null
+++ b/lib/mpi/mpi-mpow.c
@@ -0,0 +1,134 @@
+/* mpi-mpow.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+static int build_index(const MPI *exparray, int k, int i, int t)
+{
+	int j, bitno;
+	int index = 0;
+
+	bitno = t - i;
+	for (j = k - 1; j >= 0; j--) {
+		index <<= 1;
+		if (mpi_test_bit(exparray[j], bitno))
+			index |= 1;
+	}
+	return index;
+}
+
+/****************
+ * RES = (BASE[0] ^ EXP[0]) *  (BASE[1] ^ EXP[1]) * ... * mod M
+ */
+int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI m)
+{
+	int rc = -ENOMEM;
+	int k;			/* number of elements */
+	int t;			/* bit size of largest exponent */
+	int i, j, idx;
+	MPI *G = NULL;		/* table with precomputed values of size 2^k */
+	MPI tmp = NULL;
+
+	for (k = 0; basearray[k]; k++)
+		;
+	if (!k) {
+		pr_emerg("mpi_mulpowm: assert(k) failed\n");
+		BUG();
+	}
+	for (t = 0, i = 0; (tmp = exparray[i]); i++) {
+		j = mpi_get_nbits(tmp);
+		if (j > t)
+			t = j;
+	}
+	if (i != k) {
+		pr_emerg("mpi_mulpowm: assert(i==k) failed\n");
+		BUG();
+	}
+	if (!t) {
+		pr_emerg("mpi_mulpowm: assert(t) failed\n");
+		BUG();
+	}
+	if (k >= 10) {
+		pr_emerg("mpi_mulpowm: assert(k<10) failed\n");
+		BUG();
+	}
+
+	G = kzalloc((1 << k) * sizeof *G, GFP_KERNEL);
+	if (!G)
+		goto err_out;
+
+	/* and calculate */
+	tmp = mpi_alloc(mpi_get_nlimbs(m) + 1);
+	if (!tmp)
+		goto nomem;
+	if (mpi_set_ui(res, 1) < 0)
+		goto nomem;
+	for (i = 1; i <= t; i++) {
+		if (mpi_mulm(tmp, res, res, m) < 0)
+			goto nomem;
+		idx = build_index(exparray, k, i, t);
+		if (!(idx >= 0 && idx < (1 << k))) {
+			pr_emerg("mpi_mulpowm: assert(idx >= 0 && idx < (1<<k)) failed\n");
+			BUG();
+		}
+		if (!G[idx]) {
+			if (!idx) {
+				G[0] = mpi_alloc_set_ui(1);
+				if (!G[0])
+					goto nomem;
+			} else {
+				for (j = 0; j < k; j++) {
+					if ((idx & (1 << j))) {
+						if (!G[idx]) {
+							if (mpi_copy
+							    (&G[idx],
+							     basearray[j]) < 0)
+								goto nomem;
+						} else {
+							if (mpi_mulm
+							    (G[idx], G[idx],
+							     basearray[j],
+							     m) < 0)
+								goto nomem;
+						}
+					}
+				}
+				if (!G[idx]) {
+					G[idx] = mpi_alloc(0);
+					if (!G[idx])
+						goto nomem;
+				}
+			}
+		}
+		if (mpi_mulm(res, tmp, G[idx], m) < 0)
+			goto nomem;
+	}
+
+	rc = 0;
+nomem:
+	/* cleanup */
+	mpi_free(tmp);
+	for (i = 0; i < (1 << k); i++)
+		mpi_free(G[i]);
+	kfree(G);
+err_out:
+	return rc;
+}
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
new file mode 100644
index 000000000000..1f3219e27292
--- /dev/null
+++ b/lib/mpi/mpi-mul.c
@@ -0,0 +1,194 @@
+/* mpi-mul.c  -  MPI functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+int mpi_mul_ui(MPI prod, MPI mult, unsigned long small_mult)
+{
+	mpi_size_t size, prod_size;
+	mpi_ptr_t prod_ptr;
+	mpi_limb_t cy;
+	int sign;
+
+	size = mult->nlimbs;
+	sign = mult->sign;
+
+	if (!size || !small_mult) {
+		prod->nlimbs = 0;
+		prod->sign = 0;
+		return 0;
+	}
+
+	prod_size = size + 1;
+	if (prod->alloced < prod_size)
+		if (mpi_resize(prod, prod_size) < 0)
+			return -ENOMEM;
+	prod_ptr = prod->d;
+
+	cy = mpihelp_mul_1(prod_ptr, mult->d, size, (mpi_limb_t) small_mult);
+	if (cy)
+		prod_ptr[size++] = cy;
+	prod->nlimbs = size;
+	prod->sign = sign;
+	return 0;
+}
+
+int mpi_mul_2exp(MPI w, MPI u, unsigned long cnt)
+{
+	mpi_size_t usize, wsize, limb_cnt;
+	mpi_ptr_t wp;
+	mpi_limb_t wlimb;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+
+	if (!usize) {
+		w->nlimbs = 0;
+		w->sign = 0;
+		return 0;
+	}
+
+	limb_cnt = cnt / BITS_PER_MPI_LIMB;
+	wsize = usize + limb_cnt + 1;
+	if (w->alloced < wsize)
+		if (mpi_resize(w, wsize) < 0)
+			return -ENOMEM;
+	wp = w->d;
+	wsize = usize + limb_cnt;
+	wsign = usign;
+
+	cnt %= BITS_PER_MPI_LIMB;
+	if (cnt) {
+		wlimb = mpihelp_lshift(wp + limb_cnt, u->d, usize, cnt);
+		if (wlimb) {
+			wp[wsize] = wlimb;
+			wsize++;
+		}
+	} else {
+		MPN_COPY_DECR(wp + limb_cnt, u->d, usize);
+	}
+
+	/* Zero all whole limbs at low end.  Do it here and not before calling
+	 * mpn_lshift, not to lose for U == W.  */
+	MPN_ZERO(wp, limb_cnt);
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+int mpi_mul(MPI w, MPI u, MPI v)
+{
+	int rc = -ENOMEM;
+	mpi_size_t usize, vsize, wsize;
+	mpi_ptr_t up, vp, wp;
+	mpi_limb_t cy;
+	int usign, vsign, sign_product;
+	int assign_wp = 0;
+	mpi_ptr_t tmp_limb = NULL;
+
+	if (u->nlimbs < v->nlimbs) {	/* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		up = v->d;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		vp = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		up = u->d;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		vp = v->d;
+	}
+	sign_product = usign ^ vsign;
+	wp = w->d;
+
+	/* Ensure W has space enough to store the result.  */
+	wsize = usize + vsize;
+	if (w->alloced < (size_t) wsize) {
+		if (wp == up || wp == vp) {
+			wp = mpi_alloc_limb_space(wsize);
+			if (!wp)
+				goto nomem;
+			assign_wp = 1;
+		} else {
+			if (mpi_resize(w, wsize) < 0)
+				goto nomem;
+			wp = w->d;
+		}
+	} else {		/* Make U and V not overlap with W.      */
+		if (wp == up) {
+			/* W and U are identical.  Allocate temporary space for U.      */
+			up = tmp_limb = mpi_alloc_limb_space(usize);
+			if (!up)
+				goto nomem;
+			/* Is V identical too?  Keep it identical with U.  */
+			if (wp == vp)
+				vp = up;
+			/* Copy to the temporary space.  */
+			MPN_COPY(up, wp, usize);
+		} else if (wp == vp) {
+			/* W and V are identical.  Allocate temporary space for V.      */
+			vp = tmp_limb = mpi_alloc_limb_space(vsize);
+			if (!vp)
+				goto nomem;
+			/* Copy to the temporary space.  */
+			MPN_COPY(vp, wp, vsize);
+		}
+	}
+
+	if (!vsize)
+		wsize = 0;
+	else {
+		if (mpihelp_mul(wp, up, usize, vp, vsize, &cy) < 0)
+			goto nomem;
+		wsize -= cy ? 0 : 1;
+	}
+
+	if (assign_wp)
+		mpi_assign_limb_space(w, wp, wsize);
+
+	w->nlimbs = wsize;
+	w->sign = sign_product;
+	rc = 0;
+nomem:
+	if (tmp_limb)
+		mpi_free_limb_space(tmp_limb);
+	return rc;
+}
+
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m)
+{
+	if (mpi_mul(w, u, v) < 0)
+		return -ENOMEM;
+	return mpi_fdiv_r(w, w, m);
+}
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
new file mode 100644
index 000000000000..67f3e79af914
--- /dev/null
+++ b/lib/mpi/mpi-pow.c
@@ -0,0 +1,323 @@
+/* mpi-pow.c  -  MPI functions
+ *	Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
+{
+	mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+	mpi_ptr_t xp_marker = NULL;
+	mpi_ptr_t tspace = NULL;
+	mpi_ptr_t rp, ep, mp, bp;
+	mpi_size_t esize, msize, bsize, rsize;
+	int esign, msign, bsign, rsign;
+	mpi_size_t size;
+	int mod_shift_cnt;
+	int negative_result;
+	int assign_rp = 0;
+	mpi_size_t tsize = 0;	/* to avoid compiler warning */
+	/* fixme: we should check that the warning is void */
+	int rc = -ENOMEM;
+
+	esize = exp->nlimbs;
+	msize = mod->nlimbs;
+	size = 2 * msize;
+	esign = exp->sign;
+	msign = mod->sign;
+
+	rp = res->d;
+	ep = exp->d;
+
+	if (!msize)
+		return -EINVAL;
+
+	if (!esize) {
+		/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+		 * depending on if MOD equals 1.  */
+		rp[0] = 1;
+		res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+		res->sign = 0;
+		goto leave;
+	}
+
+	/* Normalize MOD (i.e. make its most significant bit set) as required by
+	 * mpn_divrem.  This will make the intermediate values in the calculation
+	 * slightly larger, but the correct result is obtained after a final
+	 * reduction using the original MOD value.  */
+	mp = mp_marker = mpi_alloc_limb_space(msize);
+	if (!mp)
+		goto enomem;
+	count_leading_zeros(mod_shift_cnt, mod->d[msize - 1]);
+	if (mod_shift_cnt)
+		mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt);
+	else
+		MPN_COPY(mp, mod->d, msize);
+
+	bsize = base->nlimbs;
+	bsign = base->sign;
+	if (bsize > msize) {	/* The base is larger than the module. Reduce it. */
+		/* Allocate (BSIZE + 1) with space for remainder and quotient.
+		 * (The quotient is (bsize - msize + 1) limbs.)  */
+		bp = bp_marker = mpi_alloc_limb_space(bsize + 1);
+		if (!bp)
+			goto enomem;
+		MPN_COPY(bp, base->d, bsize);
+		/* We don't care about the quotient, store it above the remainder,
+		 * at BP + MSIZE.  */
+		mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize);
+		bsize = msize;
+		/* Canonicalize the base, since we are going to multiply with it
+		 * quite a few times.  */
+		MPN_NORMALIZE(bp, bsize);
+	} else
+		bp = base->d;
+
+	if (!bsize) {
+		res->nlimbs = 0;
+		res->sign = 0;
+		goto leave;
+	}
+
+	if (res->alloced < size) {
+		/* We have to allocate more space for RES.  If any of the input
+		 * parameters are identical to RES, defer deallocation of the old
+		 * space.  */
+		if (rp == ep || rp == mp || rp == bp) {
+			rp = mpi_alloc_limb_space(size);
+			if (!rp)
+				goto enomem;
+			assign_rp = 1;
+		} else {
+			if (mpi_resize(res, size) < 0)
+				goto enomem;
+			rp = res->d;
+		}
+	} else {		/* Make BASE, EXP and MOD not overlap with RES.  */
+		if (rp == bp) {
+			/* RES and BASE are identical.  Allocate temp. space for BASE.  */
+			BUG_ON(bp_marker);
+			bp = bp_marker = mpi_alloc_limb_space(bsize);
+			if (!bp)
+				goto enomem;
+			MPN_COPY(bp, rp, bsize);
+		}
+		if (rp == ep) {
+			/* RES and EXP are identical.  Allocate temp. space for EXP.  */
+			ep = ep_marker = mpi_alloc_limb_space(esize);
+			if (!ep)
+				goto enomem;
+			MPN_COPY(ep, rp, esize);
+		}
+		if (rp == mp) {
+			/* RES and MOD are identical.  Allocate temporary space for MOD. */
+			BUG_ON(mp_marker);
+			mp = mp_marker = mpi_alloc_limb_space(msize);
+			if (!mp)
+				goto enomem;
+			MPN_COPY(mp, rp, msize);
+		}
+	}
+
+	MPN_COPY(rp, bp, bsize);
+	rsize = bsize;
+	rsign = bsign;
+
+	{
+		mpi_size_t i;
+		mpi_ptr_t xp;
+		int c;
+		mpi_limb_t e;
+		mpi_limb_t carry_limb;
+		struct karatsuba_ctx karactx;
+
+		xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
+		if (!xp)
+			goto enomem;
+
+		memset(&karactx, 0, sizeof karactx);
+		negative_result = (ep[0] & 1) && base->sign;
+
+		i = esize - 1;
+		e = ep[i];
+		count_leading_zeros(c, e);
+		e = (e << c) << 1;	/* shift the exp bits to the left, lose msb */
+		c = BITS_PER_MPI_LIMB - 1 - c;
+
+		/* Main loop.
+		 *
+		 * Make the result be pointed to alternately by XP and RP.  This
+		 * helps us avoid block copying, which would otherwise be necessary
+		 * with the overlap restrictions of mpihelp_divmod. With 50% probability
+		 * the result after this loop will be in the area originally pointed
+		 * by RP (==RES->d), and with 50% probability in the area originally
+		 * pointed to by XP.
+		 */
+
+		for (;;) {
+			while (c) {
+				mpi_ptr_t tp;
+				mpi_size_t xsize;
+
+				/*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */
+				if (rsize < KARATSUBA_THRESHOLD)
+					mpih_sqr_n_basecase(xp, rp, rsize);
+				else {
+					if (!tspace) {
+						tsize = 2 * rsize;
+						tspace =
+						    mpi_alloc_limb_space(tsize);
+						if (!tspace)
+							goto enomem;
+					} else if (tsize < (2 * rsize)) {
+						mpi_free_limb_space(tspace);
+						tsize = 2 * rsize;
+						tspace =
+						    mpi_alloc_limb_space(tsize);
+						if (!tspace)
+							goto enomem;
+					}
+					mpih_sqr_n(xp, rp, rsize, tspace);
+				}
+
+				xsize = 2 * rsize;
+				if (xsize > msize) {
+					mpihelp_divrem(xp + msize, 0, xp, xsize,
+						       mp, msize);
+					xsize = msize;
+				}
+
+				tp = rp;
+				rp = xp;
+				xp = tp;
+				rsize = xsize;
+
+				if ((mpi_limb_signed_t) e < 0) {
+					/*mpihelp_mul( xp, rp, rsize, bp, bsize ); */
+					if (bsize < KARATSUBA_THRESHOLD) {
+						mpi_limb_t tmp;
+						if (mpihelp_mul
+						    (xp, rp, rsize, bp, bsize,
+						     &tmp) < 0)
+							goto enomem;
+					} else {
+						if (mpihelp_mul_karatsuba_case
+						    (xp, rp, rsize, bp, bsize,
+						     &karactx) < 0)
+							goto enomem;
+					}
+
+					xsize = rsize + bsize;
+					if (xsize > msize) {
+						mpihelp_divrem(xp + msize, 0,
+							       xp, xsize, mp,
+							       msize);
+						xsize = msize;
+					}
+
+					tp = rp;
+					rp = xp;
+					xp = tp;
+					rsize = xsize;
+				}
+				e <<= 1;
+				c--;
+			}
+
+			i--;
+			if (i < 0)
+				break;
+			e = ep[i];
+			c = BITS_PER_MPI_LIMB;
+		}
+
+		/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+		 * steps.  Adjust the result by reducing it with the original MOD.
+		 *
+		 * Also make sure the result is put in RES->d (where it already
+		 * might be, see above).
+		 */
+		if (mod_shift_cnt) {
+			carry_limb =
+			    mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt);
+			rp = res->d;
+			if (carry_limb) {
+				rp[rsize] = carry_limb;
+				rsize++;
+			}
+		} else {
+			MPN_COPY(res->d, rp, rsize);
+			rp = res->d;
+		}
+
+		if (rsize >= msize) {
+			mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+			rsize = msize;
+		}
+
+		/* Remove any leading zero words from the result.  */
+		if (mod_shift_cnt)
+			mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
+		MPN_NORMALIZE(rp, rsize);
+
+		mpihelp_release_karatsuba_ctx(&karactx);
+	}
+
+	if (negative_result && rsize) {
+		if (mod_shift_cnt)
+			mpihelp_rshift(mp, mp, msize, mod_shift_cnt);
+		mpihelp_sub(rp, mp, msize, rp, rsize);
+		rsize = msize;
+		rsign = msign;
+		MPN_NORMALIZE(rp, rsize);
+	}
+	res->nlimbs = rsize;
+	res->sign = rsign;
+
+leave:
+	rc = 0;
+enomem:
+	if (assign_rp)
+		mpi_assign_limb_space(res, rp, size);
+	if (mp_marker)
+		mpi_free_limb_space(mp_marker);
+	if (bp_marker)
+		mpi_free_limb_space(bp_marker);
+	if (ep_marker)
+		mpi_free_limb_space(ep_marker);
+	if (xp_marker)
+		mpi_free_limb_space(xp_marker);
+	if (tspace)
+		mpi_free_limb_space(tspace);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(mpi_powm);
diff --git a/lib/mpi/mpi-scan.c b/lib/mpi/mpi-scan.c
new file mode 100644
index 000000000000..b2da5ad96199
--- /dev/null
+++ b/lib/mpi/mpi-scan.c
@@ -0,0 +1,136 @@
+/* mpi-scan.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/****************
+ * Scan through an mpi and return byte for byte. a -1 is returned to indicate
+ * the end of the mpi. Scanning is done from the lsb to the msb, returned
+ * values are in the range of 0 .. 255.
+ *
+ * FIXME: This code is VERY ugly!
+ */
+int mpi_getbyte(const MPI a, unsigned idx)
+{
+	int i, j;
+	unsigned n;
+	mpi_ptr_t ap;
+	mpi_limb_t limb;
+
+	ap = a->d;
+	for (n = 0, i = 0; i < a->nlimbs; i++) {
+		limb = ap[i];
+		for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++)
+			if (n == idx)
+				return (limb >> j * 8) & 0xff;
+	}
+	return -1;
+}
+
+/****************
+ * Put a value at position IDX into A. idx counts from lsb to msb
+ */
+void mpi_putbyte(MPI a, unsigned idx, int xc)
+{
+	int i, j;
+	unsigned n;
+	mpi_ptr_t ap;
+	mpi_limb_t limb, c;
+
+	c = xc & 0xff;
+	ap = a->d;
+	for (n = 0, i = 0; i < a->alloced; i++) {
+		limb = ap[i];
+		for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++)
+			if (n == idx) {
+#if BYTES_PER_MPI_LIMB == 4
+				if (j == 0)
+					limb = (limb & 0xffffff00) | c;
+				else if (j == 1)
+					limb = (limb & 0xffff00ff) | (c << 8);
+				else if (j == 2)
+					limb = (limb & 0xff00ffff) | (c << 16);
+				else
+					limb = (limb & 0x00ffffff) | (c << 24);
+#elif BYTES_PER_MPI_LIMB == 8
+				if (j == 0)
+					limb = (limb & 0xffffffffffffff00) | c;
+				else if (j == 1)
+					limb =
+					    (limb & 0xffffffffffff00ff) | (c <<
+									   8);
+				else if (j == 2)
+					limb =
+					    (limb & 0xffffffffff00ffff) | (c <<
+									   16);
+				else if (j == 3)
+					limb =
+					    (limb & 0xffffffff00ffffff) | (c <<
+									   24);
+				else if (j == 4)
+					limb =
+					    (limb & 0xffffff00ffffffff) | (c <<
+									   32);
+				else if (j == 5)
+					limb =
+					    (limb & 0xffff00ffffffffff) | (c <<
+									   40);
+				else if (j == 6)
+					limb =
+					    (limb & 0xff00ffffffffffff) | (c <<
+									   48);
+				else
+					limb =
+					    (limb & 0x00ffffffffffffff) | (c <<
+									   56);
+#else
+#error please enhance this function, its ugly - i know.
+#endif
+				if (a->nlimbs <= i)
+					a->nlimbs = i + 1;
+				ap[i] = limb;
+				return;
+			}
+	}
+	log_bug("index out of range\n");
+}
+
+/****************
+ * Count the number of zerobits at the low end of A
+ */
+unsigned mpi_trailing_zeros(const MPI a)
+{
+	unsigned n, count = 0;
+
+	for (n = 0; n < a->nlimbs; n++) {
+		if (a->d[n]) {
+			unsigned nn;
+			mpi_limb_t alimb = a->d[n];
+
+			count_trailing_zeros(nn, alimb);
+			count += nn;
+			break;
+		}
+		count += BITS_PER_MPI_LIMB;
+	}
+	return count;
+
+}
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
new file mode 100644
index 000000000000..f26b41fcb48c
--- /dev/null
+++ b/lib/mpi/mpicoder.c
@@ -0,0 +1,280 @@
+/* mpicoder.c  -  Coder for the external representation of MPIs
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+#define MAX_EXTERN_MPI_BITS 16384
+
+MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
+{
+	const uint8_t *buffer = xbuffer;
+	int i, j;
+	unsigned nbits, nbytes, nlimbs, nread = 0;
+	mpi_limb_t a;
+	MPI val = NULL;
+
+	if (*ret_nread < 2)
+		goto leave;
+	nbits = buffer[0] << 8 | buffer[1];
+
+	if (nbits > MAX_EXTERN_MPI_BITS) {
+		pr_info("MPI: mpi too large (%u bits)\n", nbits);
+		goto leave;
+	}
+	buffer += 2;
+	nread = 2;
+
+	nbytes = (nbits + 7) / 8;
+	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+	val = mpi_alloc(nlimbs);
+	if (!val)
+		return NULL;
+	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+	i %= BYTES_PER_MPI_LIMB;
+	val->nbits = nbits;
+	j = val->nlimbs = nlimbs;
+	val->sign = 0;
+	for (; j > 0; j--) {
+		a = 0;
+		for (; i < BYTES_PER_MPI_LIMB; i++) {
+			if (++nread > *ret_nread) {
+				printk
+				    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
+				     nread, *ret_nread);
+				goto leave;
+			}
+			a <<= 8;
+			a |= *buffer++;
+		}
+		i = 0;
+		val->d[j - 1] = a;
+	}
+
+leave:
+	*ret_nread = nread;
+	return val;
+}
+EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
+
+/****************
+ * Make an mpi from a character string.
+ */
+int mpi_fromstr(MPI val, const char *str)
+{
+	int hexmode = 0, sign = 0, prepend_zero = 0, i, j, c, c1, c2;
+	unsigned nbits, nbytes, nlimbs;
+	mpi_limb_t a;
+
+	if (*str == '-') {
+		sign = 1;
+		str++;
+	}
+	if (*str == '0' && str[1] == 'x')
+		hexmode = 1;
+	else
+		return -EINVAL;	/* other bases are not yet supported */
+	str += 2;
+
+	nbits = strlen(str) * 4;
+	if (nbits % 8)
+		prepend_zero = 1;
+	nbytes = (nbits + 7) / 8;
+	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+	if (val->alloced < nlimbs)
+		if (!mpi_resize(val, nlimbs))
+			return -ENOMEM;
+	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+	i %= BYTES_PER_MPI_LIMB;
+	j = val->nlimbs = nlimbs;
+	val->sign = sign;
+	for (; j > 0; j--) {
+		a = 0;
+		for (; i < BYTES_PER_MPI_LIMB; i++) {
+			if (prepend_zero) {
+				c1 = '0';
+				prepend_zero = 0;
+			} else
+				c1 = *str++;
+			assert(c1);
+			c2 = *str++;
+			assert(c2);
+			if (c1 >= '0' && c1 <= '9')
+				c = c1 - '0';
+			else if (c1 >= 'a' && c1 <= 'f')
+				c = c1 - 'a' + 10;
+			else if (c1 >= 'A' && c1 <= 'F')
+				c = c1 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return 1;
+			}
+			c <<= 4;
+			if (c2 >= '0' && c2 <= '9')
+				c |= c2 - '0';
+			else if (c2 >= 'a' && c2 <= 'f')
+				c |= c2 - 'a' + 10;
+			else if (c2 >= 'A' && c2 <= 'F')
+				c |= c2 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return 1;
+			}
+			a <<= 8;
+			a |= c;
+		}
+		i = 0;
+
+		val->d[j - 1] = a;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_fromstr);
+
+/****************
+ * Return an allocated buffer with the MPI (msb first).
+ * NBYTES receives the length of this buffer. Caller must free the
+ * return string (This function does return a 0 byte buffer with NBYTES
+ * set to zero if the value of A is zero. If sign is not NULL, it will
+ * be set to the sign of the A.
+ */
+void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
+{
+	uint8_t *p, *buffer;
+	mpi_limb_t alimb;
+	int i;
+	unsigned int n;
+
+	if (sign)
+		*sign = a->sign;
+	*nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB;
+	if (!n)
+		n++;		/* avoid zero length allocation */
+	p = buffer = kmalloc(n, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+#if BYTES_PER_MPI_LIMB == 4
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+		*p++ = alimb >> 56;
+		*p++ = alimb >> 48;
+		*p++ = alimb >> 40;
+		*p++ = alimb >> 32;
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#else
+#error please implement for this limb size.
+#endif
+	}
+
+	/* this is sub-optimal but we need to do the shift operation
+	 * because the caller has to free the returned buffer */
+	for (p = buffer; !*p && *nbytes; p++, --*nbytes)
+		;
+	if (p != buffer)
+		memmove(buffer, p, *nbytes);
+
+	return buffer;
+}
+EXPORT_SYMBOL_GPL(mpi_get_buffer);
+
+/****************
+ * Use BUFFER to update MPI.
+ */
+int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
+{
+	const uint8_t *buffer = xbuffer, *p;
+	mpi_limb_t alimb;
+	int nlimbs;
+	int i;
+
+	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+	if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
+		return -ENOMEM;
+	a->sign = sign;
+
+	for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
+#if BYTES_PER_MPI_LIMB == 4
+		alimb = (mpi_limb_t) *p--;
+		alimb |= (mpi_limb_t) *p-- << 8;
+		alimb |= (mpi_limb_t) *p-- << 16;
+		alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+		alimb = (mpi_limb_t) *p--;
+		alimb |= (mpi_limb_t) *p-- << 8;
+		alimb |= (mpi_limb_t) *p-- << 16;
+		alimb |= (mpi_limb_t) *p-- << 24;
+		alimb |= (mpi_limb_t) *p-- << 32;
+		alimb |= (mpi_limb_t) *p-- << 40;
+		alimb |= (mpi_limb_t) *p-- << 48;
+		alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+		a->d[i++] = alimb;
+	}
+	if (p >= buffer) {
+#if BYTES_PER_MPI_LIMB == 4
+		alimb = *p--;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 8;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 16;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+		alimb = (mpi_limb_t) *p--;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 8;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 16;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 24;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 32;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 40;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 48;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+		a->d[i++] = alimb;
+	}
+	a->nlimbs = i;
+
+	if (i != nlimbs) {
+		pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
+		       nlimbs);
+		BUG();
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_set_buffer);
diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c
new file mode 100644
index 000000000000..b2fd39677f1b
--- /dev/null
+++ b/lib/mpi/mpih-cmp.c
@@ -0,0 +1,56 @@
+/* mpihelp-sub.c  -  MPI helper functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size)
+{
+	mpi_size_t i;
+	mpi_limb_t op1_word, op2_word;
+
+	for (i = size - 1; i >= 0; i--) {
+		op1_word = op1_ptr[i];
+		op2_word = op2_ptr[i];
+		if (op1_word != op2_word)
+			goto diff;
+	}
+	return 0;
+
+diff:
+	/* This can *not* be simplified to
+	 *   op2_word - op2_word
+	 * since that expression might give signed overflow.  */
+	return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
new file mode 100644
index 000000000000..cde1aaec18da
--- /dev/null
+++ b/lib/mpi/mpih-div.c
@@ -0,0 +1,545 @@
+/* mpihelp-div.c  -  MPI helper functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+ * here (not udiv_qrnnd).
+ */
+
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+	      mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	int dummy;
+
+	/* Botch: Should this be handled at all?  Rely on callers?  */
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 *   Does it ever help to use udiv_qrnnd_preinv?
+	 *     && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+	    && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		count_leading_zeros(normalization_steps, divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						  ((n1 << normalization_steps)
+						   | (n0 >>
+						      (BITS_PER_MPI_LIMB -
+						       normalization_steps))),
+						  divisor_limb,
+						  divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(dummy, r, r,
+					  n1 << normalization_steps,
+					  divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				i--;
+
+			for (; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						  n0, divisor_limb,
+						  divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			count_leading_zeros(normalization_steps, divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB -
+					   normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(dummy, r, r,
+						   ((n1 << normalization_steps)
+						    | (n0 >>
+						       (BITS_PER_MPI_LIMB -
+							normalization_steps))),
+						   divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(dummy, r, r,
+					   n1 << normalization_steps,
+					   divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.  */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			i--;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(dummy, r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ *    QP + DSIZE >= NP must hold true.	(This means that it's
+ *    possible to put the quotient in the high part of NUM, right after the
+ *    remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+
+mpi_limb_t
+mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+	       mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
+{
+	mpi_limb_t most_significant_q_limb = 0;
+
+	switch (dsize) {
+	case 0:
+		/* We are asked to divide by zero, so go ahead and do it!  (To make
+		   the compiler not remove this statement, return the value.)  */
+		/*
+		 * existing clients of this function have been modified
+		 * not to call it with dsize == 0, so this should not happen
+		 */
+		return 1 / dsize;
+
+	case 1:
+		{
+			mpi_size_t i;
+			mpi_limb_t n1;
+			mpi_limb_t d;
+
+			d = dp[0];
+			n1 = np[nsize - 1];
+
+			if (n1 >= d) {
+				n1 -= d;
+				most_significant_q_limb = 1;
+			}
+
+			qp += qextra_limbs;
+			for (i = nsize - 2; i >= 0; i--)
+				udiv_qrnnd(qp[i], n1, n1, np[i], d);
+			qp -= qextra_limbs;
+
+			for (i = qextra_limbs - 1; i >= 0; i--)
+				udiv_qrnnd(qp[i], n1, n1, 0, d);
+
+			np[0] = n1;
+		}
+		break;
+
+	case 2:
+		{
+			mpi_size_t i;
+			mpi_limb_t n1, n0, n2;
+			mpi_limb_t d1, d0;
+
+			np += nsize - 2;
+			d1 = dp[1];
+			d0 = dp[0];
+			n1 = np[1];
+			n0 = np[0];
+
+			if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
+				sub_ddmmss(n1, n0, n1, n0, d1, d0);
+				most_significant_q_limb = 1;
+			}
+
+			for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
+				mpi_limb_t q;
+				mpi_limb_t r;
+
+				if (i >= qextra_limbs)
+					np--;
+				else
+					np[0] = 0;
+
+				if (n1 == d1) {
+					/* Q should be either 111..111 or 111..110.  Need special
+					 * treatment of this rare case as normal division would
+					 * give overflow.  */
+					q = ~(mpi_limb_t) 0;
+
+					r = n0 + d1;
+					if (r < d1) {	/* Carry in the addition? */
+						add_ssaaaa(n1, n0, r - d0,
+							   np[0], 0, d0);
+						qp[i] = q;
+						continue;
+					}
+					n1 = d0 - (d0 != 0 ? 1 : 0);
+					n0 = -d0;
+				} else {
+					udiv_qrnnd(q, r, n1, n0, d1);
+					umul_ppmm(n1, n0, d0, q);
+				}
+
+				n2 = np[0];
+q_test:
+				if (n1 > r || (n1 == r && n0 > n2)) {
+					/* The estimated Q was too large.  */
+					q--;
+					sub_ddmmss(n1, n0, n1, n0, 0, d0);
+					r += d1;
+					if (r >= d1)	/* If not carry, test Q again.  */
+						goto q_test;
+				}
+
+				qp[i] = q;
+				sub_ddmmss(n1, n0, r, n2, n1, n0);
+			}
+			np[1] = n1;
+			np[0] = n0;
+		}
+		break;
+
+	default:
+		{
+			mpi_size_t i;
+			mpi_limb_t dX, d1, n0;
+
+			np += nsize - dsize;
+			dX = dp[dsize - 1];
+			d1 = dp[dsize - 2];
+			n0 = np[dsize - 1];
+
+			if (n0 >= dX) {
+				if (n0 > dX
+				    || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
+					mpihelp_sub_n(np, np, dp, dsize);
+					n0 = np[dsize - 1];
+					most_significant_q_limb = 1;
+				}
+			}
+
+			for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
+				mpi_limb_t q;
+				mpi_limb_t n1, n2;
+				mpi_limb_t cy_limb;
+
+				if (i >= qextra_limbs) {
+					np--;
+					n2 = np[dsize];
+				} else {
+					n2 = np[dsize - 1];
+					MPN_COPY_DECR(np + 1, np, dsize - 1);
+					np[0] = 0;
+				}
+
+				if (n0 == dX) {
+					/* This might over-estimate q, but it's probably not worth
+					 * the extra code here to find out.  */
+					q = ~(mpi_limb_t) 0;
+				} else {
+					mpi_limb_t r;
+
+					udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+					umul_ppmm(n1, n0, d1, q);
+
+					while (n1 > r
+					       || (n1 == r
+						   && n0 > np[dsize - 2])) {
+						q--;
+						r += dX;
+						if (r < dX)	/* I.e. "carry in previous addition?" */
+							break;
+						n1 -= n0 < d1;
+						n0 -= d1;
+					}
+				}
+
+				/* Possible optimization: We already have (q * n0) and (1 * n1)
+				 * after the calculation of q.  Taking advantage of that, we
+				 * could make this loop make two iterations less.  */
+				cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+
+				if (n2 != cy_limb) {
+					mpihelp_add_n(np, np, dp, dsize);
+					q--;
+				}
+
+				qp[i] = q;
+				n0 = np[dsize - 1];
+			}
+		}
+	}
+
+	return most_significant_q_limb;
+}
+
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+
+mpi_limb_t
+mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+		 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+		 mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	int dummy;
+
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 * Does it ever help to use udiv_qrnnd_preinv?
+	 * && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+	    && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		count_leading_zeros(normalization_steps, divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
+						  ((n1 << normalization_steps)
+						   | (n0 >>
+						      (BITS_PER_MPI_LIMB -
+						       normalization_steps))),
+						  divisor_limb,
+						  divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(quot_ptr[0], r, r,
+					  n1 << normalization_steps,
+					  divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				quot_ptr[i--] = 0;
+
+			for (; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i], r, r,
+						  n0, divisor_limb,
+						  divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			count_leading_zeros(normalization_steps, divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB -
+					   normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(quot_ptr[i + 1], r, r,
+						   ((n1 << normalization_steps)
+						    | (n0 >>
+						       (BITS_PER_MPI_LIMB -
+							normalization_steps))),
+						   divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(quot_ptr[0], r, r,
+					   n1 << normalization_steps,
+					   divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.  */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			quot_ptr[i--] = 0;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c
new file mode 100644
index 000000000000..c69c5eef233b
--- /dev/null
+++ b/lib/mpi/mpih-mul.c
@@ -0,0 +1,527 @@
+/* mpihelp-mul.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)		\
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mul_n_basecase(prodp, up, vp, size);	\
+		else						\
+			mul_n(prodp, up, vp, size, tspace);	\
+	} while (0);
+
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)		\
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mpih_sqr_n_basecase(prodp, up, size);	\
+		else						\
+			mpih_sqr_n(prodp, up, size, tspace);	\
+	} while (0);
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+ * always stored.  Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication.  All multiplies rely
+ * on this, both small and huge.  Small ones arrive here immediately.  Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+
+static mpi_limb_t
+mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+	mpi_size_t i;
+	mpi_limb_t cy;
+	mpi_limb_t v_limb;
+
+	/* Multiply by the first limb in V separately, as the result can be
+	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+	v_limb = vp[0];
+	if (v_limb <= 1) {
+		if (v_limb == 1)
+			MPN_COPY(prodp, up, size);
+		else
+			MPN_ZERO(prodp, size);
+		cy = 0;
+	} else
+		cy = mpihelp_mul_1(prodp, up, size, v_limb);
+
+	prodp[size] = cy;
+	prodp++;
+
+	/* For each iteration in the outer loop, multiply one limb from
+	 * U with one limb from V, and add it to PROD.  */
+	for (i = 1; i < size; i++) {
+		v_limb = vp[i];
+		if (v_limb <= 1) {
+			cy = 0;
+			if (v_limb == 1)
+				cy = mpihelp_add_n(prodp, prodp, up, size);
+		} else
+			cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+		prodp[size] = cy;
+		prodp++;
+	}
+
+	return cy;
+}
+
+static void
+mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+		mpi_size_t size, mpi_ptr_t tspace)
+{
+	if (size & 1) {
+		/* The size is odd, and the code below doesn't handle that.
+		 * Multiply the least significant (size - 1) limbs with a recursive
+		 * call, and handle the most significant limb of S1 and S2
+		 * separately.
+		 * A slightly faster way to do this would be to make the Karatsuba
+		 * code below behave as if the size were even, and let it check for
+		 * odd size in the end.  I.e., in essence move this code to the end.
+		 * Doing so would save us a recursive call, and potentially make the
+		 * stack grow a lot less.
+		 */
+		mpi_size_t esize = size - 1;	/* even size */
+		mpi_limb_t cy_limb;
+
+		MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
+		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
+		prodp[esize + esize] = cy_limb;
+		cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
+		prodp[esize + size] = cy_limb;
+	} else {
+		/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+		 *
+		 * Split U in two pieces, U1 and U0, such that
+		 * U = U0 + U1*(B**n),
+		 * and V in V1 and V0, such that
+		 * V = V0 + V1*(B**n).
+		 *
+		 * UV is then computed recursively using the identity
+		 *
+		 *        2n   n          n                     n
+		 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+		 *                1 1        1  0   0  1              0 0
+		 *
+		 * Where B = 2**BITS_PER_MP_LIMB.
+		 */
+		mpi_size_t hsize = size >> 1;
+		mpi_limb_t cy;
+		int negflg;
+
+		/* Product H.      ________________  ________________
+		 *                |_____U1 x V1____||____U0 x V0_____|
+		 * Put result in upper part of PROD and pass low part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
+				  tspace);
+
+		/* Product M.      ________________
+		 *                |_(U1-U0)(V0-V1)_|
+		 */
+		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
+			mpihelp_sub_n(prodp, up + hsize, up, hsize);
+			negflg = 0;
+		} else {
+			mpihelp_sub_n(prodp, up, up + hsize, hsize);
+			negflg = 1;
+		}
+		if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
+			mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+			negflg ^= 1;
+		} else {
+			mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+			/* No change of NEGFLG.  */
+		}
+		/* Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
+				  tspace + size);
+
+		/* Add/copy product H. */
+		MPN_COPY(prodp + hsize, prodp + size, hsize);
+		cy = mpihelp_add_n(prodp + size, prodp + size,
+				   prodp + size + hsize, hsize);
+
+		/* Add product M (if NEGFLG M is a negative number) */
+		if (negflg)
+			cy -=
+			    mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
+					  size);
+		else
+			cy +=
+			    mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
+					  size);
+
+		/* Product L.      ________________  ________________
+		 *                |________________||____U0 x V0_____|
+		 * Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+
+		/* Add/copy Product L (twice) */
+
+		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+		if (cy)
+			mpihelp_add_1(prodp + hsize + size,
+				      prodp + hsize + size, hsize, cy);
+
+		MPN_COPY(prodp, tspace, hsize);
+		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+				   hsize);
+		if (cy)
+			mpihelp_add_1(prodp + size, prodp + size, size, 1);
+	}
+}
+
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
+{
+	mpi_size_t i;
+	mpi_limb_t cy_limb;
+	mpi_limb_t v_limb;
+
+	/* Multiply by the first limb in V separately, as the result can be
+	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+	v_limb = up[0];
+	if (v_limb <= 1) {
+		if (v_limb == 1)
+			MPN_COPY(prodp, up, size);
+		else
+			MPN_ZERO(prodp, size);
+		cy_limb = 0;
+	} else
+		cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
+
+	prodp[size] = cy_limb;
+	prodp++;
+
+	/* For each iteration in the outer loop, multiply one limb from
+	 * U with one limb from V, and add it to PROD.  */
+	for (i = 1; i < size; i++) {
+		v_limb = up[i];
+		if (v_limb <= 1) {
+			cy_limb = 0;
+			if (v_limb == 1)
+				cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+		} else
+			cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+		prodp[size] = cy_limb;
+		prodp++;
+	}
+}
+
+void
+mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+	if (size & 1) {
+		/* The size is odd, and the code below doesn't handle that.
+		 * Multiply the least significant (size - 1) limbs with a recursive
+		 * call, and handle the most significant limb of S1 and S2
+		 * separately.
+		 * A slightly faster way to do this would be to make the Karatsuba
+		 * code below behave as if the size were even, and let it check for
+		 * odd size in the end.  I.e., in essence move this code to the end.
+		 * Doing so would save us a recursive call, and potentially make the
+		 * stack grow a lot less.
+		 */
+		mpi_size_t esize = size - 1;	/* even size */
+		mpi_limb_t cy_limb;
+
+		MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
+		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
+		prodp[esize + esize] = cy_limb;
+		cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
+
+		prodp[esize + size] = cy_limb;
+	} else {
+		mpi_size_t hsize = size >> 1;
+		mpi_limb_t cy;
+
+		/* Product H.      ________________  ________________
+		 *                |_____U1 x U1____||____U0 x U0_____|
+		 * Put result in upper part of PROD and pass low part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+
+		/* Product M.      ________________
+		 *                |_(U1-U0)(U0-U1)_|
+		 */
+		if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
+			mpihelp_sub_n(prodp, up + hsize, up, hsize);
+		else
+			mpihelp_sub_n(prodp, up, up + hsize, hsize);
+
+		/* Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.  */
+		MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+
+		/* Add/copy product H  */
+		MPN_COPY(prodp + hsize, prodp + size, hsize);
+		cy = mpihelp_add_n(prodp + size, prodp + size,
+				   prodp + size + hsize, hsize);
+
+		/* Add product M (if NEGFLG M is a negative number).  */
+		cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+
+		/* Product L.      ________________  ________________
+		 *                |________________||____U0 x U0_____|
+		 * Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.  */
+		MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
+
+		/* Add/copy Product L (twice).  */
+		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+		if (cy)
+			mpihelp_add_1(prodp + hsize + size,
+				      prodp + hsize + size, hsize, cy);
+
+		MPN_COPY(prodp, tspace, hsize);
+		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+				   hsize);
+		if (cy)
+			mpihelp_add_1(prodp + size, prodp + size, size, 1);
+	}
+}
+
+/* This should be made into an inline function in gmp.h.  */
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+	if (up == vp) {
+		if (size < KARATSUBA_THRESHOLD)
+			mpih_sqr_n_basecase(prodp, up, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			if (!tspace)
+				return -ENOMEM;
+			mpih_sqr_n(prodp, up, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	} else {
+		if (size < KARATSUBA_THRESHOLD)
+			mul_n_basecase(prodp, up, vp, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			if (!tspace)
+				return -ENOMEM;
+			mul_n(prodp, up, vp, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	}
+
+	return 0;
+}
+
+int
+mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+			   mpi_ptr_t up, mpi_size_t usize,
+			   mpi_ptr_t vp, mpi_size_t vsize,
+			   struct karatsuba_ctx *ctx)
+{
+	mpi_limb_t cy;
+
+	if (!ctx->tspace || ctx->tspace_size < vsize) {
+		if (ctx->tspace)
+			mpi_free_limb_space(ctx->tspace);
+		ctx->tspace = mpi_alloc_limb_space(2 * vsize);
+		if (!ctx->tspace)
+			return -ENOMEM;
+		ctx->tspace_size = vsize;
+	}
+
+	MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
+
+	prodp += vsize;
+	up += vsize;
+	usize -= vsize;
+	if (usize >= vsize) {
+		if (!ctx->tp || ctx->tp_size < vsize) {
+			if (ctx->tp)
+				mpi_free_limb_space(ctx->tp);
+			ctx->tp = mpi_alloc_limb_space(2 * vsize);
+			if (!ctx->tp) {
+				if (ctx->tspace)
+					mpi_free_limb_space(ctx->tspace);
+				ctx->tspace = NULL;
+				return -ENOMEM;
+			}
+			ctx->tp_size = vsize;
+		}
+
+		do {
+			MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
+			cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
+			mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
+				      cy);
+			prodp += vsize;
+			up += vsize;
+			usize -= vsize;
+		} while (usize >= vsize);
+	}
+
+	if (usize) {
+		if (usize < KARATSUBA_THRESHOLD) {
+			mpi_limb_t tmp;
+			if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
+			    < 0)
+				return -ENOMEM;
+		} else {
+			if (!ctx->next) {
+				ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
+				if (!ctx->next)
+					return -ENOMEM;
+			}
+			if (mpihelp_mul_karatsuba_case(ctx->tspace,
+						       vp, vsize,
+						       up, usize,
+						       ctx->next) < 0)
+				return -ENOMEM;
+		}
+
+		cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
+		mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
+	}
+
+	return 0;
+}
+
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
+{
+	struct karatsuba_ctx *ctx2;
+
+	if (ctx->tp)
+		mpi_free_limb_space(ctx->tp);
+	if (ctx->tspace)
+		mpi_free_limb_space(ctx->tspace);
+	for (ctx = ctx->next; ctx; ctx = ctx2) {
+		ctx2 = ctx->next;
+		if (ctx->tp)
+			mpi_free_limb_space(ctx->tp);
+		if (ctx->tspace)
+			mpi_free_limb_space(ctx->tspace);
+		kfree(ctx);
+	}
+}
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized.  Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ */
+
+int
+mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+	    mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
+{
+	mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+	mpi_limb_t cy;
+	struct karatsuba_ctx ctx;
+
+	if (vsize < KARATSUBA_THRESHOLD) {
+		mpi_size_t i;
+		mpi_limb_t v_limb;
+
+		if (!vsize) {
+			*_result = 0;
+			return 0;
+		}
+
+		/* Multiply by the first limb in V separately, as the result can be
+		 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+		v_limb = vp[0];
+		if (v_limb <= 1) {
+			if (v_limb == 1)
+				MPN_COPY(prodp, up, usize);
+			else
+				MPN_ZERO(prodp, usize);
+			cy = 0;
+		} else
+			cy = mpihelp_mul_1(prodp, up, usize, v_limb);
+
+		prodp[usize] = cy;
+		prodp++;
+
+		/* For each iteration in the outer loop, multiply one limb from
+		 * U with one limb from V, and add it to PROD.  */
+		for (i = 1; i < vsize; i++) {
+			v_limb = vp[i];
+			if (v_limb <= 1) {
+				cy = 0;
+				if (v_limb == 1)
+					cy = mpihelp_add_n(prodp, prodp, up,
+							   usize);
+			} else
+				cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+
+			prodp[usize] = cy;
+			prodp++;
+		}
+
+		*_result = cy;
+		return 0;
+	}
+
+	memset(&ctx, 0, sizeof ctx);
+	if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
+		return -ENOMEM;
+	mpihelp_release_karatsuba_ctx(&ctx);
+	*_result = *prod_endp;
+	return 0;
+}
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
new file mode 100644
index 000000000000..26e4ed31e256
--- /dev/null
+++ b/lib/mpi/mpiutil.c
@@ -0,0 +1,211 @@
+/* mpiutil.ac  -  Utility functions for MPI
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Note:  It was a bad idea to use the number of limbs to allocate
+ *	  because on a alpha the limbs are large but we normally need
+ *	  integers of n bits - So we should chnage this to bits (or bytes).
+ *
+ *	  But mpi_alloc is used in a lot of places :-)
+ */
+MPI mpi_alloc(unsigned nlimbs)
+{
+	MPI a;
+
+	a = kmalloc(sizeof *a, GFP_KERNEL);
+	if (!a)
+		return a;
+
+	if (nlimbs) {
+		a->d = mpi_alloc_limb_space(nlimbs);
+		if (!a->d) {
+			kfree(a);
+			return NULL;
+		}
+	} else {
+		a->d = NULL;
+	}
+
+	a->alloced = nlimbs;
+	a->nlimbs = 0;
+	a->sign = 0;
+	a->flags = 0;
+	a->nbits = 0;
+	return a;
+}
+EXPORT_SYMBOL_GPL(mpi_alloc);
+
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs)
+{
+	size_t len = nlimbs * sizeof(mpi_limb_t);
+
+	if (!len)
+		return NULL;
+
+	return kmalloc(len, GFP_KERNEL);
+}
+
+void mpi_free_limb_space(mpi_ptr_t a)
+{
+	if (!a)
+		return;
+
+	kfree(a);
+}
+
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs)
+{
+	mpi_free_limb_space(a->d);
+	a->d = ap;
+	a->alloced = nlimbs;
+}
+
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+int mpi_resize(MPI a, unsigned nlimbs)
+{
+	void *p;
+
+	if (nlimbs <= a->alloced)
+		return 0;	/* no need to do it */
+
+	if (a->d) {
+		p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
+		kfree(a->d);
+		a->d = p;
+	} else {
+		a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		if (!a->d)
+			return -ENOMEM;
+	}
+	a->alloced = nlimbs;
+	return 0;
+}
+
+void mpi_clear(MPI a)
+{
+	a->nlimbs = 0;
+	a->nbits = 0;
+	a->flags = 0;
+}
+
+void mpi_free(MPI a)
+{
+	if (!a)
+		return;
+
+	if (a->flags & 4)
+		kfree(a->d);
+	else
+		mpi_free_limb_space(a->d);
+
+	if (a->flags & ~7)
+		pr_info("invalid flag value in mpi\n");
+	kfree(a);
+}
+EXPORT_SYMBOL_GPL(mpi_free);
+
+/****************
+ * Note: This copy function should not interpret the MPI
+ *	 but copy it transparently.
+ */
+int mpi_copy(MPI *copied, const MPI a)
+{
+	size_t i;
+	MPI b;
+
+	*copied = NULL;
+
+	if (a) {
+		b = mpi_alloc(a->nlimbs);
+		if (!b)
+			return -ENOMEM;
+
+		b->nlimbs = a->nlimbs;
+		b->sign = a->sign;
+		b->flags = a->flags;
+		b->nbits = a->nbits;
+
+		for (i = 0; i < b->nlimbs; i++)
+			b->d[i] = a->d[i];
+
+		*copied = b;
+	}
+
+	return 0;
+}
+
+int mpi_set(MPI w, const MPI u)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize = u->nlimbs;
+	int usign = u->sign;
+
+	if (RESIZE_IF_NEEDED(w, (size_t) usize) < 0)
+		return -ENOMEM;
+
+	wp = w->d;
+	up = u->d;
+	MPN_COPY(wp, up, usize);
+	w->nlimbs = usize;
+	w->nbits = u->nbits;
+	w->flags = u->flags;
+	w->sign = usign;
+	return 0;
+}
+
+int mpi_set_ui(MPI w, unsigned long u)
+{
+	if (RESIZE_IF_NEEDED(w, 1) < 0)
+		return -ENOMEM;
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	w->nbits = 0;
+	w->flags = 0;
+	return 0;
+}
+
+MPI mpi_alloc_set_ui(unsigned long u)
+{
+	MPI w = mpi_alloc(1);
+	if (!w)
+		return w;
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	return w;
+}
+
+void mpi_swap(MPI a, MPI b)
+{
+	struct gcry_mpi tmp;
+
+	tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index ac09f2226dc7..a8408b6cacdf 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -20,6 +20,7 @@ static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 	[NLA_U16]	= sizeof(u16),
 	[NLA_U32]	= sizeof(u32),
 	[NLA_U64]	= sizeof(u64),
+	[NLA_MSECS]	= sizeof(u64),
 	[NLA_NESTED]	= NLA_HDRLEN,
 };
 
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
new file mode 100644
index 000000000000..0d83ea8a9605
--- /dev/null
+++ b/lib/pci_iomap.c
@@ -0,0 +1,48 @@
+/*
+ * Implement the default iomap interfaces
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/pci.h>
+#include <linux/io.h>
+
+#include <linux/export.h>
+
+#ifdef CONFIG_PCI
+/**
+ * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len || !start)
+		return NULL;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+	if (flags & IORESOURCE_IO)
+		return __pci_ioport_map(dev, start, len);
+	if (flags & IORESOURCE_MEM) {
+		if (flags & IORESOURCE_CACHEABLE)
+			return ioremap(start, len);
+		return ioremap_nocache(start, len);
+	}
+	/* What? */
+	return NULL;
+}
+
+EXPORT_SYMBOL(pci_iomap);
+#endif /* CONFIG_PCI */
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 28f2c33c6b53..f8a3f1a829b8 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -10,8 +10,10 @@
 #include <linux/module.h>
 #include <linux/debugobjects.h>
 
+#ifdef CONFIG_HOTPLUG_CPU
 static LIST_HEAD(percpu_counters);
 static DEFINE_MUTEX(percpu_counters_lock);
+#endif
 
 #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
 
@@ -59,13 +61,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
 	int cpu;
 
-	spin_lock(&fbc->lock);
+	raw_spin_lock(&fbc->lock);
 	for_each_possible_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		*pcount = 0;
 	}
 	fbc->count = amount;
-	spin_unlock(&fbc->lock);
+	raw_spin_unlock(&fbc->lock);
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
@@ -76,10 +78,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
 	preempt_disable();
 	count = __this_cpu_read(*fbc->counters) + amount;
 	if (count >= batch || count <= -batch) {
-		spin_lock(&fbc->lock);
+		raw_spin_lock(&fbc->lock);
 		fbc->count += count;
 		__this_cpu_write(*fbc->counters, 0);
-		spin_unlock(&fbc->lock);
+		raw_spin_unlock(&fbc->lock);
 	} else {
 		__this_cpu_write(*fbc->counters, count);
 	}
@@ -96,13 +98,13 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 	s64 ret;
 	int cpu;
 
-	spin_lock(&fbc->lock);
+	raw_spin_lock(&fbc->lock);
 	ret = fbc->count;
 	for_each_online_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
-	spin_unlock(&fbc->lock);
+	raw_spin_unlock(&fbc->lock);
 	return ret;
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
@@ -110,7 +112,7 @@ EXPORT_SYMBOL(__percpu_counter_sum);
 int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 			  struct lock_class_key *key)
 {
-	spin_lock_init(&fbc->lock);
+	raw_spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
 	fbc->counters = alloc_percpu(s32);
@@ -173,11 +175,11 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 		s32 *pcount;
 		unsigned long flags;
 
-		spin_lock_irqsave(&fbc->lock, flags);
+		raw_spin_lock_irqsave(&fbc->lock, flags);
 		pcount = per_cpu_ptr(fbc->counters, cpu);
 		fbc->count += *pcount;
 		*pcount = 0;
-		spin_unlock_irqrestore(&fbc->lock, flags);
+		raw_spin_unlock_irqrestore(&fbc->lock, flags);
 	}
 	mutex_unlock(&percpu_counters_lock);
 #endif
diff --git a/lib/proportions.c b/lib/proportions.c
index d50746a79de2..05df84801b56 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -190,7 +190,7 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 
 int prop_local_init_percpu(struct prop_local_percpu *pl)
 {
-	spin_lock_init(&pl->lock);
+	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
 	return percpu_counter_init(&pl->events, 0);
@@ -226,7 +226,7 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 	if (pl->period == global_period)
 		return;
 
-	spin_lock_irqsave(&pl->lock, flags);
+	raw_spin_lock_irqsave(&pl->lock, flags);
 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 
 	/*
@@ -247,7 +247,7 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 		percpu_counter_set(&pl->events, 0);
 
 	pl->period = global_period;
-	spin_unlock_irqrestore(&pl->lock, flags);
+	raw_spin_unlock_irqrestore(&pl->lock, flags);
 }
 
 /*
@@ -324,7 +324,7 @@ void prop_fraction_percpu(struct prop_descriptor *pd,
 
 int prop_local_init_single(struct prop_local_single *pl)
 {
-	spin_lock_init(&pl->lock);
+	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
 	pl->events = 0;
@@ -356,7 +356,7 @@ void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 	if (pl->period == global_period)
 		return;
 
-	spin_lock_irqsave(&pl->lock, flags);
+	raw_spin_lock_irqsave(&pl->lock, flags);
 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 	/*
 	 * For each missed period, we half the local counter.
@@ -367,7 +367,7 @@ void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 	else
 		pl->events = 0;
 	pl->period = global_period;
-	spin_unlock_irqrestore(&pl->lock, flags);
+	raw_spin_unlock_irqrestore(&pl->lock, flags);
 }
 
 /*
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index a2f9da59c197..dc63d0818394 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -48,16 +48,14 @@
 struct radix_tree_node {
 	unsigned int	height;		/* Height from the bottom */
 	unsigned int	count;
-	struct rcu_head	rcu_head;
+	union {
+		struct radix_tree_node *parent;	/* Used when ascending tree */
+		struct rcu_head	rcu_head;	/* Used when freeing node */
+	};
 	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-struct radix_tree_path {
-	struct radix_tree_node *node;
-	int offset;
-};
-
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
@@ -256,6 +254,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 {
 	struct radix_tree_node *node;
+	struct radix_tree_node *slot;
 	unsigned int height;
 	int tag;
 
@@ -274,18 +273,23 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 		if (!(node = radix_tree_node_alloc(root)))
 			return -ENOMEM;
 
-		/* Increase the height.  */
-		node->slots[0] = indirect_to_ptr(root->rnode);
-
 		/* Propagate the aggregated tag info into the new root */
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 			if (root_tag_get(root, tag))
 				tag_set(node, tag, 0);
 		}
 
+		/* Increase the height.  */
 		newheight = root->height+1;
 		node->height = newheight;
 		node->count = 1;
+		node->parent = NULL;
+		slot = root->rnode;
+		if (newheight > 1) {
+			slot = indirect_to_ptr(slot);
+			slot->parent = node;
+		}
+		node->slots[0] = slot;
 		node = ptr_to_indirect(node);
 		rcu_assign_pointer(root->rnode, node);
 		root->height = newheight;
@@ -331,6 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 			if (!(slot = radix_tree_node_alloc(root)))
 				return -ENOMEM;
 			slot->height = height;
+			slot->parent = node;
 			if (node) {
 				rcu_assign_pointer(node->slots[offset], slot);
 				node->count++;
@@ -504,47 +509,41 @@ EXPORT_SYMBOL(radix_tree_tag_set);
 void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
 {
-	/*
-	 * The radix tree path needs to be one longer than the maximum path
-	 * since the "list" is null terminated.
-	 */
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot = NULL;
 	unsigned int height, shift;
+	int uninitialized_var(offset);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
 		goto out;
 
-	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
-	pathp->node = NULL;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 	slot = indirect_to_ptr(root->rnode);
 
-	while (height > 0) {
-		int offset;
-
+	while (shift) {
 		if (slot == NULL)
 			goto out;
 
+		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		pathp[1].offset = offset;
-		pathp[1].node = slot;
+		node = slot;
 		slot = slot->slots[offset];
-		pathp++;
-		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
 	}
 
 	if (slot == NULL)
 		goto out;
 
-	while (pathp->node) {
-		if (!tag_get(pathp->node, tag, pathp->offset))
+	while (node) {
+		if (!tag_get(node, tag, offset))
 			goto out;
-		tag_clear(pathp->node, tag, pathp->offset);
-		if (any_tag_set(pathp->node, tag))
+		tag_clear(node, tag, offset);
+		if (any_tag_set(node, tag))
 			goto out;
-		pathp--;
+
+		index >>= RADIX_TREE_MAP_SHIFT;
+		offset = index & RADIX_TREE_MAP_MASK;
+		node = node->parent;
 	}
 
 	/* clear the root's tag bit */
@@ -576,7 +575,6 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 {
 	unsigned int height, shift;
 	struct radix_tree_node *node;
-	int saw_unset_tag = 0;
 
 	/* check the root's tag bit */
 	if (!root_tag_get(root, tag))
@@ -603,15 +601,10 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 			return 0;
 
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
-		/*
-		 * This is just a debug check.  Later, we can bale as soon as
-		 * we see an unset tag.
-		 */
 		if (!tag_get(node, tag, offset))
-			saw_unset_tag = 1;
+			return 0;
 		if (height == 1)
-			return !!tag_get(node, tag, offset);
+			return 1;
 		node = rcu_dereference_raw(node->slots[offset]);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -652,8 +645,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		unsigned int iftag, unsigned int settag)
 {
 	unsigned int height = root->height;
-	struct radix_tree_path path[height];
-	struct radix_tree_path *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot;
 	unsigned int shift;
 	unsigned long tagged = 0;
@@ -677,14 +669,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	slot = indirect_to_ptr(root->rnode);
 
-	/*
-	 * we fill the path from (root->height - 2) to 0, leaving the index at
-	 * (root->height - 1) as a terminator. Zero the node in the terminator
-	 * so that we can use this to end walk loops back up the path.
-	 */
-	path[height - 1].node = NULL;
-
 	for (;;) {
+		unsigned long upindex;
 		int offset;
 
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
@@ -692,12 +678,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 			goto next;
 		if (!tag_get(slot, iftag, offset))
 			goto next;
-		if (height > 1) {
+		if (shift) {
 			/* Go down one level */
-			height--;
 			shift -= RADIX_TREE_MAP_SHIFT;
-			path[height - 1].node = slot;
-			path[height - 1].offset = offset;
+			node = slot;
 			slot = slot->slots[offset];
 			continue;
 		}
@@ -707,15 +691,27 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		tag_set(slot, settag, offset);
 
 		/* walk back up the path tagging interior nodes */
-		pathp = &path[0];
-		while (pathp->node) {
+		upindex = index;
+		while (node) {
+			upindex >>= RADIX_TREE_MAP_SHIFT;
+			offset = upindex & RADIX_TREE_MAP_MASK;
+
 			/* stop if we find a node with the tag already set */
-			if (tag_get(pathp->node, settag, pathp->offset))
+			if (tag_get(node, settag, offset))
 				break;
-			tag_set(pathp->node, settag, pathp->offset);
-			pathp++;
+			tag_set(node, settag, offset);
+			node = node->parent;
 		}
 
+		/*
+		 * Small optimization: now clear that node pointer.
+		 * Since all of this slot's ancestors now have the tag set
+		 * from setting it above, we have no further need to walk
+		 * back up the tree setting tags, until we update slot to
+		 * point to another radix_tree_node.
+		 */
+		node = NULL;
+
 next:
 		/* Go to next item at level determined by 'shift' */
 		index = ((index >> shift) + 1) << shift;
@@ -730,8 +726,7 @@ next:
 			 * last_index is guaranteed to be in the tree, what
 			 * we do below cannot wander astray.
 			 */
-			slot = path[height - 1].node;
-			height++;
+			slot = slot->parent;
 			shift += RADIX_TREE_MAP_SHIFT;
 		}
 	}
@@ -1305,7 +1300,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 	/* try to shrink tree height */
 	while (root->height > 0) {
 		struct radix_tree_node *to_free = root->rnode;
-		void *newptr;
+		struct radix_tree_node *slot;
 
 		BUG_ON(!radix_tree_is_indirect_ptr(to_free));
 		to_free = indirect_to_ptr(to_free);
@@ -1326,10 +1321,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 * (to_free->slots[0]), it will be safe to dereference the new
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
-		newptr = to_free->slots[0];
-		if (root->height > 1)
-			newptr = ptr_to_indirect(newptr);
-		root->rnode = newptr;
+		slot = to_free->slots[0];
+		if (root->height > 1) {
+			slot->parent = NULL;
+			slot = ptr_to_indirect(slot);
+		}
+		root->rnode = slot;
 		root->height--;
 
 		/*
@@ -1369,16 +1366,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
  */
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
-	/*
-	 * The radix tree path needs to be one longer than the maximum path
-	 * since the "list" is null terminated.
-	 */
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot = NULL;
 	struct radix_tree_node *to_free;
 	unsigned int height, shift;
 	int tag;
-	int offset;
+	int uninitialized_var(offset);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
@@ -1391,39 +1384,35 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 		goto out;
 	}
 	slot = indirect_to_ptr(slot);
-
-	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
-	pathp->node = NULL;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 
 	do {
 		if (slot == NULL)
 			goto out;
 
-		pathp++;
+		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		pathp->offset = offset;
-		pathp->node = slot;
+		node = slot;
 		slot = slot->slots[offset];
-		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
-	} while (height > 0);
+	} while (shift);
 
 	if (slot == NULL)
 		goto out;
 
 	/*
-	 * Clear all tags associated with the just-deleted item
+	 * Clear all tags associated with the item to be deleted.
+	 * This way of doing it would be inefficient, but seldom is any set.
 	 */
 	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-		if (tag_get(pathp->node, tag, pathp->offset))
+		if (tag_get(node, tag, offset))
 			radix_tree_tag_clear(root, index, tag);
 	}
 
 	to_free = NULL;
 	/* Now free the nodes we do not need anymore */
-	while (pathp->node) {
-		pathp->node->slots[pathp->offset] = NULL;
-		pathp->node->count--;
+	while (node) {
+		node->slots[offset] = NULL;
+		node->count--;
 		/*
 		 * Queue the node for deferred freeing after the
 		 * last reference to it disappears (set NULL, above).
@@ -1431,17 +1420,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 		if (to_free)
 			radix_tree_node_free(to_free);
 
-		if (pathp->node->count) {
-			if (pathp->node == indirect_to_ptr(root->rnode))
+		if (node->count) {
+			if (node == indirect_to_ptr(root->rnode))
 				radix_tree_shrink(root);
 			goto out;
 		}
 
 		/* Node with zero slots in use so free it */
-		to_free = pathp->node;
-		pathp--;
+		to_free = node;
 
+		index >>= RADIX_TREE_MAP_SHIFT;
+		offset = index & RADIX_TREE_MAP_MASK;
+		node = node->parent;
 	}
+
 	root_tag_clear_all(root);
 	root->height = 0;
 	root->rnode = NULL;
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index b595f560bee7..8b02f60ffc86 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/raid/pq.h>
+#include <linux/module.h>
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index d1e276a14fab..5b50f8dfc5d2 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -11,7 +11,7 @@
  * ----------------------------------------------------------------------- */
 
 /*
- * raid6int$#.c
+ * int$#.c
  *
  * $#-way unrolled portable integer math RAID-6 instruction set
  *
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
index 3b1500843bba..8a3780902cec 100644
--- a/lib/raid6/mktables.c
+++ b/lib/raid6/mktables.c
@@ -60,6 +60,7 @@ int main(int argc, char *argv[])
 	uint8_t exptbl[256], invtbl[256];
 
 	printf("#include <linux/raid/pq.h>\n");
+	printf("#include <linux/export.h>\n");
 
 	/* Compute multiplication table */
 	printf("\nconst u8  __attribute__((aligned(256)))\n"
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
index 8590d19cf522..fe275d7b6b36 100644
--- a/lib/raid6/recov.c
+++ b/lib/raid6/recov.c
@@ -18,6 +18,7 @@
  * the syndrome.)
  */
 
+#include <linux/export.h>
 #include <linux/raid/pq.h>
 
 /* Recover two failed data blocks. */
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 027a03f4c56d..c96d500577de 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -39,7 +39,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
 	 * in addition to the one that will be printed by
 	 * the entity that is holding the lock already:
 	 */
-	if (!spin_trylock_irqsave(&rs->lock, flags))
+	if (!raw_spin_trylock_irqsave(&rs->lock, flags))
 		return 0;
 
 	if (!rs->begin)
@@ -60,7 +60,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
 		rs->missed++;
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&rs->lock, flags);
+	raw_spin_unlock_irqrestore(&rs->lock, flags);
 
 	return ret;
 }
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 6a3bd48fa2a0..75510e94f7d0 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,5 +1,6 @@
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
+#include <linux/export.h>
 
 u32 reciprocal_value(u32 k)
 {
@@ -7,3 +8,4 @@ u32 reciprocal_value(u32 k)
 	do_div(val, k);
 	return (u32)val;
 }
+EXPORT_SYMBOL(reciprocal_value);
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index ffc9fc7f3b05..f2393c21fe85 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -22,9 +22,9 @@ int rwsem_is_locked(struct rw_semaphore *sem)
 	int ret = 1;
 	unsigned long flags;
 
-	if (spin_trylock_irqsave(&sem->wait_lock, flags)) {
+	if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
 		ret = (sem->activity != 0);
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
+		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	}
 	return ret;
 }
@@ -44,7 +44,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
 	sem->activity = 0;
-	spin_lock_init(&sem->wait_lock);
+	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
 }
 EXPORT_SYMBOL(__init_rwsem);
@@ -145,12 +145,12 @@ void __sched __down_read(struct rw_semaphore *sem)
 	struct task_struct *tsk;
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
 		/* granted */
 		sem->activity++;
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
+		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
 
@@ -165,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -189,7 +189,7 @@ int __down_read_trylock(struct rw_semaphore *sem)
 	int ret = 0;
 
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
 		/* granted */
@@ -197,7 +197,7 @@ int __down_read_trylock(struct rw_semaphore *sem)
 		ret = 1;
 	}
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	return ret;
 }
@@ -212,12 +212,12 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 	struct task_struct *tsk;
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
 		/* granted */
 		sem->activity = -1;
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
+		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
 
@@ -232,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -260,7 +260,7 @@ int __down_write_trylock(struct rw_semaphore *sem)
 	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
 		/* granted */
@@ -268,7 +268,7 @@ int __down_write_trylock(struct rw_semaphore *sem)
 		ret = 1;
 	}
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	return ret;
 }
@@ -280,12 +280,12 @@ void __up_read(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (--sem->activity == 0 && !list_empty(&sem->wait_list))
 		sem = __rwsem_wake_one_writer(sem);
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 }
 
 /*
@@ -295,13 +295,13 @@ void __up_write(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	sem->activity = 0;
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, 1);
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 }
 
 /*
@@ -312,12 +312,12 @@ void __downgrade_write(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	sem->activity = 1;
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, 0);
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 }
 
diff --git a/lib/rwsem.c b/lib/rwsem.c
index aa7c3052261f..410aa1189b13 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -22,7 +22,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
 	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
+	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
 }
 
@@ -180,7 +180,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 
 	/* set up my own style of waitqueue */
-	spin_lock_irq(&sem->wait_lock);
+	raw_spin_lock_irq(&sem->wait_lock);
 	waiter.task = tsk;
 	waiter.flags = flags;
 	get_task_struct(tsk);
@@ -204,7 +204,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 		 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
 		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 
-	spin_unlock_irq(&sem->wait_lock);
+	raw_spin_unlock_irq(&sem->wait_lock);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -245,13 +245,13 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	return sem;
 }
@@ -265,13 +265,13 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&sem->wait_lock, flags);
+	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	return sem;
 }
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 4ceb05d772ae..33b2cbb97380 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -390,7 +390,7 @@ bool sg_miter_next(struct sg_mapping_iter *miter)
 	miter->consumed = miter->length;
 
 	if (miter->__flags & SG_MITER_ATOMIC)
-		miter->addr = kmap_atomic(miter->page, KM_BIO_SRC_IRQ) + off;
+		miter->addr = kmap_atomic(miter->page) + off;
 	else
 		miter->addr = kmap(miter->page) + off;
 
@@ -424,7 +424,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON(!irqs_disabled());
-			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
+			kunmap_atomic(miter->addr);
 		} else
 			kunmap(miter->page);
 
diff --git a/lib/sha1.c b/lib/sha1.c
index f33271dd00cb..1de509a159c8 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/cryptohash.h>
 #include <asm/unaligned.h>
 
 /*
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 4689cb073da4..503f087382a4 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
+	if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
 		goto out;
 
 	/*
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 4755b98b6dfb..5f3eacdd6178 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -49,13 +49,10 @@ void __rwlock_init(rwlock_t *lock, const char *name,
 
 EXPORT_SYMBOL(__rwlock_init);
 
-static void spin_bug(raw_spinlock_t *lock, const char *msg)
+static void spin_dump(raw_spinlock_t *lock, const char *msg)
 {
 	struct task_struct *owner = NULL;
 
-	if (!debug_locks_off())
-		return;
-
 	if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
 		owner = lock->owner;
 	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
@@ -70,6 +67,14 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
 	dump_stack();
 }
 
+static void spin_bug(raw_spinlock_t *lock, const char *msg)
+{
+	if (!debug_locks_off())
+		return;
+
+	spin_dump(lock, msg);
+}
+
 #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
 
 static inline void
@@ -113,11 +118,7 @@ static void __spin_lock_debug(raw_spinlock_t *lock)
 		/* lockup suspected: */
 		if (print_once) {
 			print_once = 0;
-			printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, "
-					"%s/%d, %p\n",
-				raw_smp_processor_id(), current->comm,
-				task_pid_nr(current), lock);
-			dump_stack();
+			spin_dump(lock, "lockup");
 #ifdef CONFIG_SMP
 			trigger_all_cpu_backtrace();
 #endif
diff --git a/lib/string.c b/lib/string.c
index 01fad9b203e1..dc4a86341f91 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -360,7 +360,6 @@ char *strim(char *s)
 	size_t size;
 	char *end;
 
-	s = skip_spaces(s);
 	size = strlen(s);
 	if (!size)
 		return s;
@@ -370,7 +369,7 @@ char *strim(char *s)
 		end--;
 	*(end + 1) = '\0';
 
-	return s;
+	return skip_spaces(s);
 }
 EXPORT_SYMBOL(strim);
 
@@ -756,3 +755,57 @@ void *memchr(const void *s, int c, size_t n)
 }
 EXPORT_SYMBOL(memchr);
 #endif
+
+static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes)
+{
+	while (bytes) {
+		if (*start != value)
+			return (void *)start;
+		start++;
+		bytes--;
+	}
+	return NULL;
+}
+
+/**
+ * memchr_inv - Find an unmatching character in an area of memory.
+ * @start: The memory area
+ * @c: Find a character other than c
+ * @bytes: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *start, int c, size_t bytes)
+{
+	u8 value = c;
+	u64 value64;
+	unsigned int words, prefix;
+
+	if (bytes <= 16)
+		return check_bytes8(start, value, bytes);
+
+	value64 = value | value << 8 | value << 16 | value << 24;
+	value64 = (value64 & 0xffffffff) | value64 << 32;
+	prefix = 8 - ((unsigned long)start) % 8;
+
+	if (prefix) {
+		u8 *r = check_bytes8(start, value, prefix);
+		if (r)
+			return r;
+		start += prefix;
+		bytes -= prefix;
+	}
+
+	words = bytes / 8;
+
+	while (words) {
+		if (*(u64 *)start != value64)
+			return check_bytes8(start, value, 8);
+		start += 8;
+		words--;
+	}
+
+	return check_bytes8(start, value, bytes % 8);
+}
+EXPORT_SYMBOL(memchr_inv);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 99093b396145..d0f6315f4a24 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -110,11 +110,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
 {
 	return io_tlb_nslabs;
 }
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
@@ -321,6 +321,7 @@ void __init swiotlb_free(void)
 		free_bootmem_late(__pa(io_tlb_start),
 				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
+	io_tlb_nslabs = 0;
 }
 
 static int is_swiotlb_buffer(phys_addr_t paddr)
@@ -348,13 +349,12 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 			sz = min_t(size_t, PAGE_SIZE - offset, size);
 
 			local_irq_save(flags);
-			buffer = kmap_atomic(pfn_to_page(pfn),
-					     KM_BOUNCE_READ);
+			buffer = kmap_atomic(pfn_to_page(pfn));
 			if (dir == DMA_TO_DEVICE)
 				memcpy(dma_addr, buffer + offset, sz);
 			else
 				memcpy(buffer + offset, dma_addr, sz);
-			kunmap_atomic(buffer, KM_BOUNCE_READ);
+			kunmap_atomic(buffer);
 			local_irq_restore(flags);
 
 			size -= sz;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d7222a9c8267..38e612e66da5 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -31,17 +31,7 @@
 #include <asm/div64.h>
 #include <asm/sections.h>	/* for dereference_function_descriptor() */
 
-static unsigned int simple_guess_base(const char *cp)
-{
-	if (cp[0] == '0') {
-		if (_tolower(cp[1]) == 'x' && isxdigit(cp[2]))
-			return 16;
-		else
-			return 8;
-	} else {
-		return 10;
-	}
-}
+#include "kstrtox.h"
 
 /**
  * simple_strtoull - convert a string to an unsigned long long
@@ -51,23 +41,14 @@ static unsigned int simple_guess_base(const char *cp)
  */
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
-	unsigned long long result = 0;
-
-	if (!base)
-		base = simple_guess_base(cp);
+	unsigned long long result;
+	unsigned int rv;
 
-	if (base == 16 && cp[0] == '0' && _tolower(cp[1]) == 'x')
-		cp += 2;
+	cp = _parse_integer_fixup_radix(cp, &base);
+	rv = _parse_integer(cp, base, &result);
+	/* FIXME */
+	cp += (rv & ~KSTRTOX_OVERFLOW);
 
-	while (isxdigit(*cp)) {
-		unsigned int value;
-
-		value = isdigit(*cp) ? *cp - '0' : _tolower(*cp) - 'a' + 10;
-		if (value >= base)
-			break;
-		result = result * base + value;
-		cp++;
-	}
 	if (endp)
 		*endp = (char *)cp;
 
@@ -566,7 +547,7 @@ char *mac_address_string(char *buf, char *end, u8 *addr,
 	}
 
 	for (i = 0; i < 6; i++) {
-		p = pack_hex_byte(p, addr[i]);
+		p = hex_byte_pack(p, addr[i]);
 		if (fmt[0] == 'M' && i != 5)
 			*p++ = separator;
 	}
@@ -686,13 +667,13 @@ char *ip6_compressed_string(char *p, const char *addr)
 		lo = word & 0xff;
 		if (hi) {
 			if (hi > 0x0f)
-				p = pack_hex_byte(p, hi);
+				p = hex_byte_pack(p, hi);
 			else
 				*p++ = hex_asc_lo(hi);
-			p = pack_hex_byte(p, lo);
+			p = hex_byte_pack(p, lo);
 		}
 		else if (lo > 0x0f)
-			p = pack_hex_byte(p, lo);
+			p = hex_byte_pack(p, lo);
 		else
 			*p++ = hex_asc_lo(lo);
 		needcolon = true;
@@ -714,8 +695,8 @@ char *ip6_string(char *p, const char *addr, const char *fmt)
 	int i;
 
 	for (i = 0; i < 8; i++) {
-		p = pack_hex_byte(p, *addr++);
-		p = pack_hex_byte(p, *addr++);
+		p = hex_byte_pack(p, *addr++);
+		p = hex_byte_pack(p, *addr++);
 		if (fmt[0] == 'I' && i != 7)
 			*p++ = ':';
 	}
@@ -773,7 +754,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	}
 
 	for (i = 0; i < 16; i++) {
-		p = pack_hex_byte(p, addr[index[i]]);
+		p = hex_byte_pack(p, addr[index[i]]);
 		switch (i) {
 		case 3:
 		case 5:
@@ -796,6 +777,18 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
+static
+char *netdev_feature_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec)
+{
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	if (spec.field_width == -1)
+		spec.field_width = 2 + 2 * sizeof(netdev_features_t);
+	spec.base = 16;
+
+	return number(buf, end, *(const netdev_features_t *)addr, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -843,6 +836,7 @@ int kptr_restrict __read_mostly;
  *       Do not use this feature without some mechanism to verify the
  *       correctness of the format string and va_list arguments.
  * - 'K' For a kernel pointer that should be hidden from unprivileged users
+ * - 'NF' For a netdev_features_t
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -897,9 +891,15 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
 	case 'V':
-		return buf + vsnprintf(buf, end > buf ? end - buf : 0,
-				       ((struct va_format *)ptr)->fmt,
-				       *(((struct va_format *)ptr)->va));
+		{
+			va_list va;
+
+			va_copy(va, *((struct va_format *)ptr)->va);
+			buf += vsnprintf(buf, end > buf ? end - buf : 0,
+					 ((struct va_format *)ptr)->fmt, va);
+			va_end(va);
+			return buf;
+		}
 	case 'K':
 		/*
 		 * %pK cannot be used in IRQ context because its test
@@ -915,6 +915,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		       has_capability_noaudit(current, CAP_SYSLOG))))
 			ptr = NULL;
 		break;
+	case 'N':
+		switch (fmt[1]) {
+		case 'F':
+			return netdev_feature_string(buf, end, ptr, spec);
+		}
+		break;
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c
index e51e2558ca9d..a768e6d28bbb 100644
--- a/lib/xz/xz_dec_bcj.c
+++ b/lib/xz/xz_dec_bcj.c
@@ -441,8 +441,12 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 	 * next filter in the chain. Apply the BCJ filter on the new data
 	 * in the output buffer. If everything cannot be filtered, copy it
 	 * to temp and rewind the output buffer position accordingly.
+	 *
+	 * This needs to be always run when temp.size == 0 to handle a special
+	 * case where the output buffer is full and the next filter has no
+	 * more output coming but hasn't returned XZ_STREAM_END yet.
 	 */
-	if (s->temp.size < b->out_size - b->out_pos) {
+	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
 		out_start = b->out_pos;
 		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
 		b->out_pos += s->temp.size;
@@ -465,16 +469,25 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 		s->temp.size = b->out_pos - out_start;
 		b->out_pos -= s->temp.size;
 		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
+
+		/*
+		 * If there wasn't enough input to the next filter to fill
+		 * the output buffer with unfiltered data, there's no point
+		 * to try decoding more data to temp.
+		 */
+		if (b->out_pos + s->temp.size < b->out_size)
+			return XZ_OK;
 	}
 
 	/*
-	 * If we have unfiltered data in temp, try to fill by decoding more
-	 * data from the next filter. Apply the BCJ filter on temp. Then we
-	 * hopefully can fill the actual output buffer by copying filtered
-	 * data from temp. A mix of filtered and unfiltered data may be left
-	 * in temp; it will be taken care on the next call to this function.
+	 * We have unfiltered data in temp. If the output buffer isn't full
+	 * yet, try to fill the temp buffer by decoding more data from the
+	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
+	 * fill the actual output buffer by copying filtered data from temp.
+	 * A mix of filtered and unfiltered data may be left in temp; it will
+	 * be taken care on the next call to this function.
 	 */
-	if (s->temp.size > 0) {
+	if (b->out_pos < b->out_size) {
 		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
 		s->out = b->out;
 		s->out_pos = b->out_pos;