28 files changed, 12006 insertions, 0 deletions
diff --git a/jni/feature_stab/db_vlvm/db_bundle.h b/jni/feature_stab/db_vlvm/db_bundle.h
new file mode 100644
index 000000000..e4fb8db2c
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_bundle.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_bundle.h,v 1.2 2011/06/17 14:03:30 mbansal Exp $ */
+
+#ifndef DB_BUNDLE_H
+#define DB_BUNDLE_H
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMBundle (LM) Bundle adjustment utilities (a.k.a. Levenberg-Marquardt algorithm)
+ */
+/*\{*/
+
+#include "db_utilities.h"
+
+/*!
+Solve for update dx such that diagmult(1+lambda,transpose(J)%J)%dx= -Jtf
+using only upper half of JtJ, destroying lower half below diagonal in the process
+dimension is n and d should point to n allocated doubles of scratch memory
+*/
+inline void db_Compute_dx(double *dx,double **JtJ,double *min_Jtf,double lambda,double *d,int n)
+{
+    int i;
+    double opl;
+
+    opl=1.0+lambda;
+    for(i=0;i<n;i++) d[i]=JtJ[i][i]*opl;
+
+    db_CholeskyDecompSeparateDiagonal(JtJ,d,n);
+    db_CholeskyBacksub(dx,JtJ,d,n,min_Jtf);
+}
+
+/*!
+Solve for update dx such that diagmult(1+lambda,transpose(J)%J)%dx= -Jtf
+using only upper half of JtJ, destroying lower half below diagonal in the process
+*/
+inline void db_Compute_dx_3x3(double dx[3],double JtJ[9],const double min_Jtf[3],double lambda)
+{
+    double d[3],opl;
+
+    opl=1.0+lambda;
+    d[0]=JtJ[0]*opl;
+    d[1]=JtJ[4]*opl;
+    d[2]=JtJ[8]*opl;
+    db_CholeskyDecomp3x3SeparateDiagonal(JtJ,d);
+    db_CholeskyBacksub3x3(dx,JtJ,d,min_Jtf);
+}
+
+/*\}*/
+
+#endif /* DB_BUNDLE_H */
diff --git a/jni/feature_stab/db_vlvm/db_feature_detection.cpp b/jni/feature_stab/db_vlvm/db_feature_detection.cpp
new file mode 100644
index 000000000..28cb4a781
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_feature_detection.cpp
@@ -0,0 +1,1770 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+#include "db_feature_detection.h"
+#ifdef _VERBOSE_
+#include <iostream>
+#endif
+#include <float.h>
+
+#define DB_SUB_PIXEL
+
+#define BORDER 10 // 5
+
+float** db_AllocStrengthImage_f(float **im,int w,int h)
+{
+    int i,n,aw;
+    long c,size;
+    float **img,*aim,*p;
+
+    /*Determine number of 124 element chunks needed*/
+    n=(db_maxi(1,w-6)+123)/124;
+    /*Determine the total allocation width aw*/
+    aw=n*124+8;
+    /*Allocate*/
+    size=aw*h+16;
+    *im=new float [size];
+    /*Clean up*/
+    p=(*im);
+    for(c=0;c<size;c++) p[c]=0.0;
+    /*Get a 16 byte aligned pointer*/
+    aim=db_AlignPointer_f(*im,16);
+    /*Allocate pointer table*/
+    img=new float* [h];
+    /*Initialize the pointer table*/
+    for(i=0;i<h;i++)
+    {
+        img[i]=aim+aw*i+1;
+    }
+
+    return(img);
+}
+
+void db_FreeStrengthImage_f(float *im,float **img,int h)
+{
+    delete [] im;
+    delete [] img;
+}
+
+/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
+Memory references occur one pixel outside the subrow*/
+inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
+{
+    int c;
+
+    for(c=0;c<chunk_width;c++)
+    {
+        Ix[c]=img[i][j+c-1]-img[i][j+c+1];
+        Iy[c]=img[i-1][j+c]-img[i+1][j+c];
+    }
+}
+
+/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
+Memory references occur one pixel outside the subrow*/
+inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
+{
+#ifdef DB_USE_MMX
+    const unsigned char *r1,*r2,*r3;
+
+    r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
+
+    _asm
+    {
+        mov esi,16
+        mov eax,r1
+        mov ebx,r2
+        mov ecx,r3
+        mov edx,dxx
+
+        /*Get bitmask into mm7*/
+        mov       edi,7F7F7F7Fh
+        movd      mm7,edi
+        punpckldq mm7,mm7
+
+loopstart:
+        /***************dx part 1-12*********************************/
+        movq       mm0,[eax]       /*1 Get upper*/
+         pxor      mm6,mm6         /*2 Set to zero*/
+        movq       mm1,[ecx]       /*3 Get lower*/
+         psrlq     mm0,1           /*4 Shift*/
+        psrlq      mm1,1           /*5 Shift*/
+         pand      mm0,mm7         /*6 And*/
+        movq       mm2,[ebx-1]     /*13 Get left*/
+         pand      mm1,mm7         /*7 And*/
+        psubb      mm0,mm1         /*8 Subtract*/
+         pxor      mm5,mm5         /*14 Set to zero*/
+        movq       mm1,mm0         /*9 Copy*/
+         pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
+        movq       mm3,[ebx+1]     /*15 Get right*/
+         punpcklbw mm0,mm6         /*11 Unpack low*/
+        punpckhbw  mm1,mm6         /*12 Unpack high*/
+        /***************dy part 13-24*********************************/
+         movq      mm4,mm0         /*25 Copy dx*/
+        psrlq      mm2,1           /*16 Shift*/
+         pmullw    mm0,mm0         /*26 Multiply dx*dx*/
+        psrlq      mm3,1           /*17 Shift*/
+         pand      mm2,mm7         /*18 And*/
+        pand       mm3,mm7         /*19 And*/
+         /*Stall*/
+        psubb      mm2,mm3         /*20 Subtract*/
+         /*Stall*/
+        movq       mm3,mm2         /*21 Copy*/
+         pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
+        punpcklbw  mm2,mm5         /*23 Unpack low*/
+         /*Stall*/
+        punpckhbw  mm3,mm5         /*24 Unpack high*/
+        /***************dxx dxy dyy low part 25-49*********************************/
+         pmullw    mm4,mm2         /*27 Multiply dx*dy*/
+        pmullw     mm2,mm2         /*28 Multiply dy*dy*/
+         pxor      mm6,mm6         /*29 Set to zero*/
+        movq       mm5,mm0         /*30 Copy dx*dx*/
+         pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
+        punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
+         /*Stall*/
+        punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
+         pxor      mm6,mm6         /*36 Set to zero*/
+        movq       [edx],mm0       /*34 Store dx*dx lows*/
+         movq      mm0,mm4         /*37 Copy dx*dy*/
+        movq       [edx+8],mm5     /*35 Store dx*dx highs*/
+         pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
+        punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
+         /*Stall*/
+        punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
+         pxor      mm6,mm6         /*43 Set to zero*/
+        movq       [edx+512],mm4   /*41 Store dx*dy lows*/
+         movq      mm5,mm2         /*44 Copy dy*dy*/
+        movq       [edx+520],mm0   /*42 Store dx*dy highs*/
+         pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
+        punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
+         movq      mm4,mm1         /*50 Copy dx*/
+        punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
+         pmullw    mm1,mm1         /*51 Multiply dx*dx*/
+        movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
+         pmullw    mm4,mm3         /*52 Multiply dx*dy*/
+        movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
+        /***************dxx dxy dyy high part 50-79*********************************/
+         pmullw    mm3,mm3         /*53 Multiply dy*dy*/
+        pxor       mm6,mm6         /*54 Set to zero*/
+         movq      mm5,mm1         /*55 Copy dx*dx*/
+        pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
+         pxor      mm2,mm2         /*61 Set to zero*/
+        punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
+         movq      mm0,mm4         /*62 Copy dx*dy*/
+        punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
+         pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
+        movq       [edx+16],mm1    /*59 Store dx*dx lows*/
+         punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
+        movq       [edx+24],mm5    /*60 Store dx*dx highs*/
+         punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
+        movq       [edx+528],mm4   /*66 Store dx*dy lows*/
+         pxor      mm6,mm6         /*68 Set to zero*/
+        movq       [edx+536],mm0   /*67 Store dx*dy highs*/
+         movq      mm5,mm3         /*69 Copy dy*dy*/
+        pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
+         add       eax,8           /*75*/
+        punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
+         add       ebx,8           /*76*/
+        punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
+         add       ecx,8           /*77*/
+        movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
+         /*Stall*/
+        movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
+         /*Stall*/
+        add        edx,32          /*78*/
+         dec esi                   /*79*/
+        jnz loopstart
+
+        emms
+    }
+
+#else
+    int c;
+    int Ix,Iy;
+
+    for(c=0;c<nc;c++)
+    {
+        Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
+        Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
+        dxx[c]=Ix*Ix;
+        dxx[c+128]=Ix*Iy;
+        dxx[c+256]=Iy*Iy;
+    }
+#endif /*DB_USE_MMX*/
+}
+
+/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
+inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
+                                 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
+                                 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
+{
+    int c;
+    float dx,dy;
+    float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
+
+    for(c=0;c<chunk_width;c++)
+    {
+        dx=Ix0[c];
+        dy=Iy0[c];
+        Ixx0=dx*dx;
+        Ixy0=dx*dy;
+        Iyy0=dy*dy;
+
+        dx=Ix1[c];
+        dy=Iy1[c];
+        Ixx1=dx*dx;
+        Ixy1=dx*dy;
+        Iyy1=dy*dy;
+
+        dx=Ix2[c];
+        dy=Iy2[c];
+        Ixx2=dx*dx;
+        Ixy2=dx*dy;
+        Iyy2=dy*dy;
+
+        dx=Ix3[c];
+        dy=Iy3[c];
+        Ixx3=dx*dx;
+        Ixy3=dx*dy;
+        Iyy3=dy*dy;
+
+        dx=Ix4[c];
+        dy=Iy4[c];
+        Ixx4=dx*dx;
+        Ixy4=dx*dy;
+        Iyy4=dy*dy;
+
+        /*Filter vertically*/
+        gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
+        gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
+        gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
+    }
+}
+
+/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
+inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
+{
+#ifdef DB_USE_MMX
+    int c;
+
+    _asm
+    {
+        mov c,64
+        mov eax,d0
+        mov ebx,d1
+        mov ecx,d2
+        mov edx,d3
+        mov edi,d4
+        mov esi,g
+
+loopstart:
+        /***************dxx part 1-14*********************************/
+        movq        mm0,[eax]      /*1 Get dxx0*/
+         /*Stall*/
+        movq        mm1,[ebx]      /*2 Get dxx1*/
+         /*Stall*/
+        movq        mm2,[ecx]      /*5 Get dxx2*/
+         pslld      mm1,2          /*3 Shift dxx1*/
+        movq        mm3,[edx]      /*10 Get dxx3*/
+         paddd      mm0,mm1        /*4 Accumulate dxx1*/
+        movq        mm4,[eax+512]  /*15 Get dxy0*/
+         pslld      mm2,1          /*6 Shift dxx2 1*/
+        paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
+         pslld      mm2,1          /*8 Shift dxx2 2*/
+        movq        mm5,[ebx+512]  /*16 Get dxy1*/
+         paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
+        pslld       mm3,2          /*11 Shift dxx3*/
+         /*Stall*/
+        paddd       mm0,mm3        /*12 Accumulate dxx3*/
+         pslld      mm5,2          /*17 Shift dxy1*/
+        paddd       mm0,[edi]      /*13 Accumulate dxx4*/
+         paddd      mm4,mm5        /*18 Accumulate dxy1*/
+        movq        mm6,[ecx+512]  /*19 Get dxy2*/
+         /*Stall*/
+        movq        [esi],mm0      /*14 Store dxx sums*/
+        /***************dxy part 15-28*********************************/
+         pslld      mm6,1          /*20 Shift dxy2 1*/
+        paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
+         pslld      mm6,1          /*22 Shift dxy2 2*/
+        movq        mm0,[eax+1024] /*29 Get dyy0*/
+         paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
+        movq        mm7,[edx+512]  /*24 Get dxy3*/
+         pslld      mm7,2          /*25 Shift dxy3*/
+        movq        mm1,[ebx+1024] /*30 Get dyy1*/
+         paddd      mm4,mm7        /*26 Accumulate dxy3*/
+        paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
+         pslld      mm1,2          /*31 Shift dyy1*/
+        movq        mm2,[ecx+1024] /*33 Get dyy2*/
+         paddd      mm0,mm1        /*32 Accumulate dyy1*/
+        movq        [esi+512],mm4  /*28 Store dxy sums*/
+         pslld      mm2,1          /*34 Shift dyy2 1*/
+        /***************dyy part 29-49*********************************/
+
+
+        movq        mm3,[edx+1024] /*38 Get dyy3*/
+         paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
+        paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
+         pslld      mm2,1          /*36 Shift dyy2 2*/
+        paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
+         pslld      mm3,2          /*39 Shift dyy3*/
+        paddd       mm0,mm3        /*40 Accumulate dyy3*/
+         add        eax,8           /*43*/
+        add         ebx,8           /*44*/
+         add        ecx,8           /*45*/
+        movq        [esi+1024],mm0 /*42 Store dyy sums*/
+         /*Stall*/
+        add         edx,8           /*46*/
+         add        edi,8           /*47*/
+        add         esi,8           /*48*/
+         dec        c               /*49*/
+        jnz         loopstart
+
+        emms
+    }
+
+#else
+    int c,dd;
+
+    for(c=0;c<nc;c++)
+    {
+        /*Filter vertically*/
+        dd=d2[c];
+        g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
+
+        dd=d2[c+128];
+        g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
+
+        dd=d2[c+256];
+        g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
+    }
+#endif /*DB_USE_MMX*/
+}
+
+/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
+and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
+and starting at (i,j-2)*/
+inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
+{
+    float Gxx,Gxy,Gyy,det,trc;
+    int c;
+
+    for(c=0;c<chunk_width;c++)
+    {
+        Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
+        Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
+        Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
+
+        det=Gxx*Gyy-Gxy*Gxy;
+        trc=Gxx+Gyy;
+        s[i][j+c]=det-0.06f*trc*trc;
+    }
+}
+
+/*Filter g of length 128 in place with 14641. Output is shifted two steps
+and of length 124*/
+inline void db_Filter14641_128_i(int *g,int nc)
+{
+#ifdef DB_USE_MMX
+    int mask;
+
+    mask=0xFFFFFFFF;
+    _asm
+    {
+        mov esi,31
+        mov eax,g
+
+        /*Get bitmask 00000000FFFFFFFF into mm7*/
+        movd mm7,mask
+
+        /*Warming iteration one 1-16********************/
+        movq       mm6,[eax]      /*1 Load new data*/
+        paddd      mm0,mm6        /*2 Add 1* behind two steps*/
+        movq       mm2,mm6        /*3 Start with 1* in front two steps*/
+        pslld      mm6,1          /*4*/
+        paddd      mm1,mm6        /*5 Add 2* same place*/
+        pslld      mm6,1          /*6*/
+        paddd      mm1,mm6        /*7 Add 4* same place*/
+        pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd      mm1,mm6        /*9 Add 4* swapped*/
+        movq       mm5,mm6        /*10 Copy*/
+        pand       mm6,mm7        /*11 Get low double-word only*/
+        paddd      mm2,mm6        /*12 Add 4* in front one step*/
+        pxor       mm6,mm5        /*13 Get high double-word only*/
+        paddd      mm0,mm6        /*14 Add 4* behind one step*/
+        movq       mm0,mm1        /*15 Shift along*/
+        movq       mm1,mm2        /*16 Shift along*/
+        /*Warming iteration two 17-32********************/
+        movq       mm4,[eax+8]    /*17 Load new data*/
+        paddd      mm0,mm4        /*18 Add 1* behind two steps*/
+        movq       mm2,mm4        /*19 Start with 1* in front two steps*/
+        pslld      mm4,1          /*20*/
+        paddd      mm1,mm4        /*21 Add 2* same place*/
+        pslld      mm4,1          /*22*/
+        paddd      mm1,mm4        /*23 Add 4* same place*/
+        pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd      mm1,mm4        /*25 Add 4* swapped*/
+        movq       mm3,mm4        /*26 Copy*/
+        pand       mm4,mm7        /*27 Get low double-word only*/
+        paddd      mm2,mm4        /*28 Add 4* in front one step*/
+        pxor       mm4,mm3        /*29 Get high double-word only*/
+        paddd      mm0,mm4        /*30 Add 4* behind one step*/
+        movq       mm0,mm1        /*31 Shift along*/
+        movq       mm1,mm2        /*32 Shift along*/
+
+        /*Loop********************/
+loopstart:
+        /*First part of loop 33-47********/
+        movq        mm6,[eax+16]   /*33 Load new data*/
+         /*Stall*/
+        paddd       mm0,mm6        /*34 Add 1* behind two steps*/
+         movq       mm2,mm6        /*35 Start with 1* in front two steps*/
+        movq        mm4,[eax+24]   /*48 Load new data*/
+         pslld      mm6,1          /*36*/
+        paddd       mm1,mm6        /*37 Add 2* same place*/
+         pslld      mm6,1          /*38*/
+        paddd       mm1,mm6        /*39 Add 4* same place*/
+         pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd       mm1,mm4        /*49 Add 1* behind two steps*/
+         movq       mm5,mm6        /*41 Copy*/
+        paddd       mm1,mm6        /*42 Add 4* swapped*/
+         pand       mm6,mm7        /*43 Get low double-word only*/
+        paddd       mm2,mm6        /*44 Add 4* in front one step*/
+         pxor       mm6,mm5        /*45 Get high double-word only*/
+        paddd       mm0,mm6        /*46 Add 4* behind one step*/
+         movq       mm6,mm4        /*50a Copy*/
+        pslld       mm4,1          /*51*/
+         /*Stall*/
+        movq        [eax],mm0      /*47 Store result two steps behind*/
+        /*Second part of loop 48-66********/
+         movq       mm0,mm6        /*50b Start with 1* in front two steps*/
+        paddd       mm2,mm4        /*52 Add 2* same place*/
+         pslld      mm4,1          /*53*/
+        paddd       mm2,mm4        /*54 Add 4* same place*/
+         pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd       mm2,mm4        /*56 Add 4* swapped*/
+         movq       mm3,mm4        /*57 Copy*/
+        pand        mm4,mm7        /*58 Get low double-word only*/
+         /*Stall*/
+        paddd       mm0,mm4        /*59 Add 4* in front one step*/
+         pxor       mm4,mm3        /*60 Get high double-word only*/
+        paddd       mm1,mm4        /*61 Add 4* behind one step*/
+         add        eax,16         /*65*/
+        dec         esi            /*66*/
+         /*Stall*/
+        movq        [eax-8],mm1    /*62 Store result two steps behind*/
+         movq       mm1,mm0        /*63 Shift along*/
+        movq        mm0,mm2        /*64 Shift along*/
+        jnz loopstart
+
+        emms
+    }
+
+#else
+    int c;
+
+    for(c=0;c<nc-4;c++)
+    {
+        g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
+    }
+#endif /*DB_USE_MMX*/
+}
+
+/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
+of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
+s should be 16 byte aligned*/
+inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
+{
+    float k;
+
+    k=0.06f;
+
+    db_Filter14641_128_i(gxx,nc);
+    db_Filter14641_128_i(gxy,nc);
+    db_Filter14641_128_i(gyy,nc);
+
+#ifdef DB_USE_SIMD
+
+
+    _asm
+    {
+        mov esi,15
+        mov eax,gxx
+        mov ebx,gxy
+        mov ecx,gyy
+        mov edx,s
+
+        /*broadcast k to all positions of xmm7*/
+        movss   xmm7,k
+        shufps  xmm7,xmm7,0
+
+        /*****Warm up 1-10**************************************/
+        cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
+         movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
+        cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
+         movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
+        cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
+         movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
+        cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
+         movaps   xmm3,xmm0    /*10 Copy Cxx*/
+        cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+loopstart:
+        /*****First part of loop 11-18***********************/
+        mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
+         addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
+        cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
+         mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
+        mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
+         movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
+        cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
+         mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
+         /*Stall*/
+        cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
+         subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        movaps    [edx],xmm0    /*18 Store*/
+        /*****Second part of loop 26-40***********************/
+         movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
+        cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
+         movaps   xmm3,xmm4     /*28 Copy Cxx*/
+        mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
+         addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
+        cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
+         mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
+        cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
+         mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
+         movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
+        subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
+         movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
+        cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
+         mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
+         movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
+        movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
+         add      eax,32        /*37*/
+        subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
+         add      ebx,32        /*38*/
+        cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        movaps    [edx+16],xmm4 /*36 Store*/
+         /*Stall*/
+        add       ecx,32        /*39*/
+         add      edx,32        /*40*/
+        dec       esi           /*41*/
+        jnz loopstart
+
+        /****Cool down***************/
+        mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
+        addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
+        mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
+        mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
+        subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
+        mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        movaps   [edx],xmm0   /*Store*/
+    }
+
+#else
+    float Gxx,Gxy,Gyy,det,trc;
+    int c;
+
+    //for(c=0;c<124;c++)
+    for(c=0;c<nc-4;c++)
+    {
+        Gxx=(float)gxx[c];
+        Gxy=(float)gxy[c];
+        Gyy=(float)gyy[c];
+
+        det=Gxx*Gyy-Gxy*Gxy;
+        trc=Gxx+Gyy;
+        s[c]=det-k*trc*trc;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
+store it into the corresponding region of s. left and top have to be at least 3 and
+right and bottom have to be at most width-4,height-4*/
+inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
+                                      /*temp should point to at least
+                                      13*(right-left+5) of allocated memory*/
+                                      float *temp)
+{
+    float *Ix[5],*Iy[5];
+    float *gxx,*gxy,*gyy;
+    int i,chunk_width,chunk_width_p4;
+
+    chunk_width=right-left+1;
+    chunk_width_p4=chunk_width+4;
+    gxx=temp;
+    gxy=gxx+chunk_width_p4;
+    gyy=gxy+chunk_width_p4;
+    for(i=0;i<5;i++)
+    {
+        Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
+        Iy[i]=Ix[i]+chunk_width_p4;
+    }
+
+    /*Fill four rows of the wrap-around derivative buffers*/
+    for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
+
+    /*For each output row*/
+    for(i=top;i<=bottom;i++)
+    {
+        /*Step the derivative buffers*/
+        db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
+
+        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
+        db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
+                                 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
+                                 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
+
+        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
+        db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
+    }
+}
+
+/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
+store it into the corresponding region of s. left and top have to be at least 3 and
+right and bottom have to be at most width-4,height-4. The left of the region in s should
+be 16 byte aligned*/
+inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
+                                      /*temp should point to at least
+                                      18*128 of allocated memory*/
+                                      int *temp, int nc)
+{
+    int *Ixx[5],*Ixy[5],*Iyy[5];
+    int *gxx,*gxy,*gyy;
+    int i;
+
+    gxx=temp;
+    gxy=gxx+128;
+    gyy=gxy+128;
+    for(i=0;i<5;i++)
+    {
+        Ixx[i]=gyy+(3*i+1)*128;
+        Ixy[i]=gyy+(3*i+2)*128;
+        Iyy[i]=gyy+(3*i+3)*128;
+    }
+
+    /*Fill four rows of the wrap-around derivative buffers*/
+    for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
+
+    /*For each output row*/
+    for(i=top;i<=bottom;i++)
+    {
+        /*Step the derivative buffers*/
+        db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
+
+        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
+        db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
+
+        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
+        db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
+    }
+
+}
+
+/*Compute Harris corner strength of img. Strength is returned for the region
+with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
+same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
+for a meaningful result*/
+void db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
+                                    /*temp should point to at least
+                                    13*(chunk_width+4) of allocated memory*/
+                                    float *temp,
+                                    int chunk_width)
+{
+    int x,next_x,last,right;
+
+    last=w-4;
+    for(x=3;x<=last;x=next_x)
+    {
+        next_x=x+chunk_width;
+        right=next_x-1;
+        if(right>last) right=last;
+        /*Compute the Harris strength of a chunk*/
+        db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
+    }
+}
+
+/*Compute Harris corner strength of img. Strength is returned for the region
+with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
+same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
+for a meaningful result.Moreover, the image should be overallocated by 256 bytes.
+s[i][3] should by 16 byte aligned for any i*/
+void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
+                                    /*temp should point to at least
+                                    18*128 of allocated memory*/
+                                    int *temp)
+{
+    int x,next_x,last;
+    int nc;
+
+    last=w-4;
+    for(x=3;x<=last;x=next_x)
+    {
+        next_x=x+124;
+
+        // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
+        nc = db_mini(128,last-x+1);
+        //nc = 128;
+
+        /*Compute the Harris strength of a chunk*/
+        db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
+    }
+}
+
+inline float db_Max_128Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+
+        /*Chunk2*/
+        maxps xmm0,[eax+128]
+        maxps xmm1,[eax+144]
+        maxps xmm2,[eax+160]
+        maxps xmm3,[eax+176]
+        maxps xmm4,[eax+192]
+        maxps xmm5,[eax+208]
+        maxps xmm6,[eax+224]
+        maxps xmm7,[eax+240]
+
+        /*Chunk3*/
+        maxps xmm0,[eax+256]
+        maxps xmm1,[eax+272]
+        maxps xmm2,[eax+288]
+        maxps xmm3,[eax+304]
+        maxps xmm4,[eax+320]
+        maxps xmm5,[eax+336]
+        maxps xmm6,[eax+352]
+        maxps xmm7,[eax+368]
+
+        /*Chunk4*/
+        maxps xmm0,[eax+384]
+        maxps xmm1,[eax+400]
+        maxps xmm2,[eax+416]
+        maxps xmm3,[eax+432]
+        maxps xmm4,[eax+448]
+        maxps xmm5,[eax+464]
+        maxps xmm6,[eax+480]
+        maxps xmm7,[eax+496]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm4,xmm5
+        maxps   xmm6,xmm7
+        maxps   xmm0,xmm2
+        maxps   xmm4,xmm6
+        maxps   xmm0,xmm4
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+128;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_64Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+
+        /*Chunk2*/
+        maxps xmm0,[eax+128]
+        maxps xmm1,[eax+144]
+        maxps xmm2,[eax+160]
+        maxps xmm3,[eax+176]
+        maxps xmm4,[eax+192]
+        maxps xmm5,[eax+208]
+        maxps xmm6,[eax+224]
+        maxps xmm7,[eax+240]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm4,xmm5
+        maxps   xmm6,xmm7
+        maxps   xmm0,xmm2
+        maxps   xmm4,xmm6
+        maxps   xmm0,xmm4
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+64;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_32Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm4,xmm5
+        maxps   xmm6,xmm7
+        maxps   xmm0,xmm2
+        maxps   xmm4,xmm6
+        maxps   xmm0,xmm4
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+32;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_16Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm0,xmm2
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+16;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_8Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+8;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_Aligned16_f(float *v,int size)
+{
+    float val,max_val;
+    float *stop_v;
+
+    max_val=v[0];
+    for(;size>=128;size-=128)
+    {
+        val=db_Max_128Aligned16_f(v);
+        v+=128;
+        if(val>max_val) max_val=val;
+    }
+    if(size&64)
+    {
+        val=db_Max_64Aligned16_f(v);
+        v+=64;
+        if(val>max_val) max_val=val;
+    }
+    if(size&32)
+    {
+        val=db_Max_32Aligned16_f(v);
+        v+=32;
+        if(val>max_val) max_val=val;
+    }
+    if(size&16)
+    {
+        val=db_Max_16Aligned16_f(v);
+        v+=16;
+        if(val>max_val) max_val=val;
+    }
+    if(size&8)
+    {
+        val=db_Max_8Aligned16_f(v);
+        v+=8;
+        if(val>max_val) max_val=val;
+    }
+    if(size&7)
+    {
+        for(stop_v=v+(size&7);v!=stop_v;)
+        {
+            val= *v++;
+            if(val>max_val) max_val=val;
+        }
+    }
+
+    return(max_val);
+}
+
+/*Find maximum value of img in the region starting at (left,top)
+and with width w and height h. img[left] should be 16 byte aligned*/
+float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
+{
+    float val,max_val;
+    int i,stop_i;
+
+    if(w && h)
+    {
+        stop_i=top+h;
+        max_val=img[top][left];
+
+        for(i=top;i<stop_i;i++)
+        {
+            val=db_Max_Aligned16_f(img[i]+left,w);
+            if(val>max_val) max_val=val;
+        }
+        return(max_val);
+    }
+    return(0.0);
+}
+
+inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
+{
+#ifdef DB_USE_SIMD
+    _asm
+    {
+        mov eax,v1
+        mov ebx,v2
+        mov ecx,m
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+        maxps  xmm0,[ebx]
+        maxps  xmm1,[ebx+16]
+        maxps  xmm2,[ebx+32]
+        maxps  xmm3,[ebx+48]
+        maxps  xmm4,[ebx+64]
+        maxps  xmm5,[ebx+80]
+        maxps  xmm6,[ebx+96]
+        maxps  xmm7,[ebx+112]
+        movaps [ecx],xmm0
+        movaps [ecx+16],xmm1
+        movaps [ecx+32],xmm2
+        movaps [ecx+48],xmm3
+        movaps [ecx+64],xmm4
+        movaps [ecx+80],xmm5
+        movaps [ecx+96],xmm6
+        movaps [ecx+112],xmm7
+
+        /*Chunk2*/
+        movaps xmm0,[eax+128]
+        movaps xmm1,[eax+144]
+        movaps xmm2,[eax+160]
+        movaps xmm3,[eax+176]
+        movaps xmm4,[eax+192]
+        movaps xmm5,[eax+208]
+        movaps xmm6,[eax+224]
+        movaps xmm7,[eax+240]
+        maxps  xmm0,[ebx+128]
+        maxps  xmm1,[ebx+144]
+        maxps  xmm2,[ebx+160]
+        maxps  xmm3,[ebx+176]
+        maxps  xmm4,[ebx+192]
+        maxps  xmm5,[ebx+208]
+        maxps  xmm6,[ebx+224]
+        maxps  xmm7,[ebx+240]
+        movaps [ecx+128],xmm0
+        movaps [ecx+144],xmm1
+        movaps [ecx+160],xmm2
+        movaps [ecx+176],xmm3
+        movaps [ecx+192],xmm4
+        movaps [ecx+208],xmm5
+        movaps [ecx+224],xmm6
+        movaps [ecx+240],xmm7
+
+        /*Chunk3*/
+        movaps xmm0,[eax+256]
+        movaps xmm1,[eax+272]
+        movaps xmm2,[eax+288]
+        movaps xmm3,[eax+304]
+        movaps xmm4,[eax+320]
+        movaps xmm5,[eax+336]
+        movaps xmm6,[eax+352]
+        movaps xmm7,[eax+368]
+        maxps  xmm0,[ebx+256]
+        maxps  xmm1,[ebx+272]
+        maxps  xmm2,[ebx+288]
+        maxps  xmm3,[ebx+304]
+        maxps  xmm4,[ebx+320]
+        maxps  xmm5,[ebx+336]
+        maxps  xmm6,[ebx+352]
+        maxps  xmm7,[ebx+368]
+        movaps [ecx+256],xmm0
+        movaps [ecx+272],xmm1
+        movaps [ecx+288],xmm2
+        movaps [ecx+304],xmm3
+        movaps [ecx+320],xmm4
+        movaps [ecx+336],xmm5
+        movaps [ecx+352],xmm6
+        movaps [ecx+368],xmm7
+
+        /*Chunk4*/
+        movaps xmm0,[eax+384]
+        movaps xmm1,[eax+400]
+        movaps xmm2,[eax+416]
+        movaps xmm3,[eax+432]
+        movaps xmm4,[eax+448]
+        movaps xmm5,[eax+464]
+        movaps xmm6,[eax+480]
+        movaps xmm7,[eax+496]
+        maxps  xmm0,[ebx+384]
+        maxps  xmm1,[ebx+400]
+        maxps  xmm2,[ebx+416]
+        maxps  xmm3,[ebx+432]
+        maxps  xmm4,[ebx+448]
+        maxps  xmm5,[ebx+464]
+        maxps  xmm6,[ebx+480]
+        maxps  xmm7,[ebx+496]
+        movaps [ecx+384],xmm0
+        movaps [ecx+400],xmm1
+        movaps [ecx+416],xmm2
+        movaps [ecx+432],xmm3
+        movaps [ecx+448],xmm4
+        movaps [ecx+464],xmm5
+        movaps [ecx+480],xmm6
+        movaps [ecx+496],xmm7
+    }
+#else
+    int i;
+    float a,b;
+    for(i=0;i<128;i++)
+    {
+        a=v1[i];
+        b=v2[i];
+        if(a>=b) m[i]=a;
+        else m[i]=b;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
+{
+#ifdef DB_USE_SIMD
+    _asm
+    {
+        mov eax,v1
+        mov ebx,v2
+        mov ecx,m
+
+        /*Chunk1*/
+        movups xmm0,[eax]
+        movups xmm1,[eax+16]
+        movups xmm2,[eax+32]
+        movups xmm3,[eax+48]
+        movups xmm4,[eax+64]
+        movups xmm5,[eax+80]
+        movups xmm6,[eax+96]
+        movups xmm7,[eax+112]
+        maxps  xmm0,[ebx]
+        maxps  xmm1,[ebx+16]
+        maxps  xmm2,[ebx+32]
+        maxps  xmm3,[ebx+48]
+        maxps  xmm4,[ebx+64]
+        maxps  xmm5,[ebx+80]
+        maxps  xmm6,[ebx+96]
+        maxps  xmm7,[ebx+112]
+        movaps [ecx],xmm0
+        movaps [ecx+16],xmm1
+        movaps [ecx+32],xmm2
+        movaps [ecx+48],xmm3
+        movaps [ecx+64],xmm4
+        movaps [ecx+80],xmm5
+        movaps [ecx+96],xmm6
+        movaps [ecx+112],xmm7
+
+        /*Chunk2*/
+        movups xmm0,[eax+128]
+        movups xmm1,[eax+144]
+        movups xmm2,[eax+160]
+        movups xmm3,[eax+176]
+        movups xmm4,[eax+192]
+        movups xmm5,[eax+208]
+        movups xmm6,[eax+224]
+        movups xmm7,[eax+240]
+        maxps  xmm0,[ebx+128]
+        maxps  xmm1,[ebx+144]
+        maxps  xmm2,[ebx+160]
+        maxps  xmm3,[ebx+176]
+        maxps  xmm4,[ebx+192]
+        maxps  xmm5,[ebx+208]
+        maxps  xmm6,[ebx+224]
+        maxps  xmm7,[ebx+240]
+        movaps [ecx+128],xmm0
+        movaps [ecx+144],xmm1
+        movaps [ecx+160],xmm2
+        movaps [ecx+176],xmm3
+        movaps [ecx+192],xmm4
+        movaps [ecx+208],xmm5
+        movaps [ecx+224],xmm6
+        movaps [ecx+240],xmm7
+
+        /*Chunk3*/
+        movups xmm0,[eax+256]
+        movups xmm1,[eax+272]
+        movups xmm2,[eax+288]
+        movups xmm3,[eax+304]
+        movups xmm4,[eax+320]
+        movups xmm5,[eax+336]
+        movups xmm6,[eax+352]
+        movups xmm7,[eax+368]
+        maxps  xmm0,[ebx+256]
+        maxps  xmm1,[ebx+272]
+        maxps  xmm2,[ebx+288]
+        maxps  xmm3,[ebx+304]
+        maxps  xmm4,[ebx+320]
+        maxps  xmm5,[ebx+336]
+        maxps  xmm6,[ebx+352]
+        maxps  xmm7,[ebx+368]
+        movaps [ecx+256],xmm0
+        movaps [ecx+272],xmm1
+        movaps [ecx+288],xmm2
+        movaps [ecx+304],xmm3
+        movaps [ecx+320],xmm4
+        movaps [ecx+336],xmm5
+        movaps [ecx+352],xmm6
+        movaps [ecx+368],xmm7
+
+        /*Chunk4*/
+        movups xmm0,[eax+384]
+        movups xmm1,[eax+400]
+        movups xmm2,[eax+416]
+        movups xmm3,[eax+432]
+        movups xmm4,[eax+448]
+        movups xmm5,[eax+464]
+        movups xmm6,[eax+480]
+        movups xmm7,[eax+496]
+        maxps  xmm0,[ebx+384]
+        maxps  xmm1,[ebx+400]
+        maxps  xmm2,[ebx+416]
+        maxps  xmm3,[ebx+432]
+        maxps  xmm4,[ebx+448]
+        maxps  xmm5,[ebx+464]
+        maxps  xmm6,[ebx+480]
+        maxps  xmm7,[ebx+496]
+        movaps [ecx+384],xmm0
+        movaps [ecx+400],xmm1
+        movaps [ecx+416],xmm2
+        movaps [ecx+432],xmm3
+        movaps [ecx+448],xmm4
+        movaps [ecx+464],xmm5
+        movaps [ecx+480],xmm6
+        movaps [ecx+496],xmm7
+    }
+#else
+    int i;
+    float a,b;
+    for(i=0;i<128;i++)
+    {
+        a=v1[i];
+        b=v2[i];
+        if(a>=b) m[i]=a;
+        else m[i]=b;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
+stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
+The input s should be of width at least 128, and exist for 2 pixels outside the specified region.
+s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
+inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
+                                      /*temp should point to at least
+                                      6*132 floats of 16-byte-aligned allocated memory*/
+                                      float *temp)
+{
+#ifdef DB_USE_SIMD
+    int i,lm2;
+    float *two[4];
+    float *four,*five;
+
+    lm2=left-2;
+
+    /*Set pointers to pre-allocated memory*/
+    four=temp;
+    five=four+132;
+    for(i=0;i<4;i++)
+    {
+        two[i]=five+(i+1)*132;
+    }
+
+    /*Set rests of four and five to zero to avoid
+    floating point exceptions*/
+    for(i=129;i<132;i++)
+    {
+        four[i]=0.0;
+        five[i]=0.0;
+    }
+
+    /*Fill three rows of the wrap-around max buffers*/
+    for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
+
+    /*For each output row*/
+    for(;i<=bottom;i++)
+    {
+        /*Compute max of the lowest pair of rows in the five row window*/
+        db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
+        /*Compute max of the lowest and highest pair of rows in the five row window*/
+        db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
+        /*Compute max of all rows*/
+        db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
+        /*Compute max of 2x5 chunks*/
+        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
+        /*Compute max of pairs of 2x5 chunks*/
+        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
+        /*Compute max of pairs of 5x5 except middle*/
+        db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
+    }
+
+#else
+    int i,j,right;
+    float sv;
+
+    right=left+128;
+    for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
+    {
+        sv=s[i][j];
+
+        if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
+            sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
+            sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
+            sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
+            sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
+        {
+            sf[i][j-2]=0.0;
+        }
+        else sf[i][j-2]=sv;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
+stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
+outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
+Top must be at least 3. Reading and writing from and to the input and output images is done
+as if the region had a width equal to a multiple of 124. If this is not the case, the images
+should be over-allocated and the input cleared for a sufficient region*/
+void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
+                                          /*temp should point to at least
+                                          6*132 floats of 16-byte-aligned allocated memory*/
+                                          float *temp)
+{
+    int x,next_x;
+
+    for(x=left;x<=right;x=next_x)
+    {
+        next_x=x+124;
+        db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
+    }
+}
+
+/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
+which should point to space of at least as many positions as there are pixels in the chunk*/
+inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
+{
+    int i,j,nr;
+    float s;
+
+    nr=0;
+    for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
+    {
+        s=strength[i][j];
+
+        if(s>=threshold &&
+            s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
+            s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
+            s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
+            s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
+            s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
+        {
+            x_temp[nr]=(double) j;
+            y_temp[nr]=(double) i;
+            s_temp[nr]=(double) s;
+            nr++;
+        }
+    }
+    return(nr);
+}
+
+
+//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
+inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
+{
+    int x = (int) xd;
+    int y = (int) yd;
+
+    float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
+    float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
+    float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
+
+    float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
+
+    xs = xd;
+    ys = yd;
+
+    if ( db_absf(denom) <= FLT_EPSILON )
+    {
+        return;
+    }
+    else
+    {
+        float fx = strength[y][x+1] - strength[y][x-1];
+        float fy = strength[y+1][x] - strength[y-1][x];
+
+        float dx = (fyy * fx - fxy * fy) / denom;
+        float dy = (fxx * fy - fxy * fx) / denom;
+
+        if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
+        {
+            return;
+        }
+        else
+        {
+            xs -= dx;
+            ys -= dy;
+        }
+    }
+
+    return;
+}
+
+/*Extract corners from the image part from (left,top) to (right,bottom).
+Store in x and y, extracting at most satnr corners in each block of size (bw,bh).
+The pointer temp_d should point to at least 5*bw*bh positions.
+area_factor holds how many corners max to extract per 10000 pixels*/
+void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
+                                int bw,int bh,unsigned long area_factor,
+                                float threshold,double *temp_d,
+                                double *x_coord,double *y_coord,int *nr_corners)
+{
+    double *x_temp,*y_temp,*s_temp,*select_temp;
+    double loc_thresh;
+    unsigned long bwbh,area,saturation;
+    int x,next_x,last_x;
+    int y,next_y,last_y;
+    int nr,nr_points,i,stop;
+
+    bwbh=bw*bh;
+    x_temp=temp_d;
+    y_temp=x_temp+bwbh;
+    s_temp=y_temp+bwbh;
+    select_temp=s_temp+bwbh;
+
+#ifdef DB_SUB_PIXEL
+    // subpixel processing may sometimes push the corner ourside the real border
+    // increasing border size:
+    left++;
+    top++;
+    bottom--;
+    right--;
+#endif /*DB_SUB_PIXEL*/
+
+    nr_points=0;
+    for(y=top;y<=bottom;y=next_y)
+    {
+        next_y=y+bh;
+        last_y=next_y-1;
+        if(last_y>bottom) last_y=bottom;
+        for(x=left;x<=right;x=next_x)
+        {
+            next_x=x+bw;
+            last_x=next_x-1;
+            if(last_x>right) last_x=right;
+
+            area=(last_x-x+1)*(last_y-y+1);
+            saturation=(area*area_factor)/10000;
+            nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
+            if(nr)
+            {
+                if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
+                else loc_thresh=threshold;
+
+                stop=nr_points+saturation;
+                for(i=0;(i<nr)&&(nr_points<stop);i++)
+                {
+                    if(s_temp[i]>=loc_thresh)
+                    {
+                        #ifdef DB_SUB_PIXEL
+                               db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
+                        #else
+                               x_coord[nr_points]=x_temp[i];
+                               y_coord[nr_points]=y_temp[i];
+                        #endif
+
+                        nr_points++;
+                    }
+                }
+            }
+        }
+    }
+    *nr_corners=nr_points;
+}
+
+db_CornerDetector_f::db_CornerDetector_f()
+{
+    m_w=0; m_h=0;
+}
+
+db_CornerDetector_f::~db_CornerDetector_f()
+{
+    Clean();
+}
+
+void db_CornerDetector_f::Clean()
+{
+    if(m_w!=0)
+    {
+        delete [] m_temp_f;
+        delete [] m_temp_d;
+        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
+    }
+    m_w=0; m_h=0;
+}
+
+unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
+                            int nr_horizontal_blocks,int nr_vertical_blocks,
+                            double absolute_threshold,double relative_threshold)
+{
+    int chunkwidth=208;
+    int block_width,block_height;
+    unsigned long area_factor;
+    int active_width,active_height;
+
+    active_width=db_maxi(1,im_width-10);
+    active_height=db_maxi(1,im_height-10);
+    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
+    block_height=db_maxi(1,active_height/nr_vertical_blocks);
+
+    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
+        (((double)active_width)*((double)active_height)))));
+
+    return(Start(im_width,im_height,block_width,block_height,area_factor,
+        absolute_threshold,relative_threshold,chunkwidth));
+}
+
+unsigned long db_CornerDetector_f::Start(int im_width,int im_height,
+                             int block_width,int block_height,unsigned long area_factor,
+                             double absolute_threshold,double relative_threshold,int chunkwidth)
+{
+    Clean();
+
+    m_w=im_width;
+    m_h=im_height;
+    m_cw=chunkwidth;
+    m_bw=block_width;
+    m_bh=block_height;
+    m_area_factor=area_factor;
+    m_r_thresh=relative_threshold;
+    m_a_thresh=absolute_threshold;
+    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
+
+    m_temp_f=new float[13*(m_cw+4)];
+    m_temp_d=new double[5*m_bw*m_bh];
+    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
+
+    return(m_max_nr);
+}
+
+void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
+{
+    float max_val,threshold;
+
+    db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
+
+    if(m_r_thresh)
+    {
+        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
+        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
+    }
+    else threshold= (float) m_a_thresh;
+
+    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
+        m_temp_d,x_coord,y_coord,nr_corners);
+}
+
+db_CornerDetector_u::db_CornerDetector_u()
+{
+    m_w=0; m_h=0;
+}
+
+db_CornerDetector_u::~db_CornerDetector_u()
+{
+    Clean();
+}
+
+db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
+{
+    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
+        cd.m_a_thresh, cd.m_r_thresh);
+}
+
+db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
+{
+    if ( this == &cd ) return *this;
+
+    Clean();
+
+    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
+        cd.m_a_thresh, cd.m_r_thresh);
+
+    return *this;
+}
+
+void db_CornerDetector_u::Clean()
+{
+    if(m_w!=0)
+    {
+        delete [] m_temp_i;
+        delete [] m_temp_d;
+        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
+    }
+    m_w=0; m_h=0;
+}
+
+unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
+                            int nr_horizontal_blocks,int nr_vertical_blocks,
+                            double absolute_threshold,double relative_threshold)
+{
+    int block_width,block_height;
+    unsigned long area_factor;
+    int active_width,active_height;
+
+    active_width=db_maxi(1,im_width-10);
+    active_height=db_maxi(1,im_height-10);
+    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
+    block_height=db_maxi(1,active_height/nr_vertical_blocks);
+
+    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
+        (((double)active_width)*((double)active_height)))));
+
+    return(Start(im_width,im_height,block_width,block_height,area_factor,
+        16.0*absolute_threshold,relative_threshold));
+}
+
+unsigned long db_CornerDetector_u::Start(int im_width,int im_height,
+                             int block_width,int block_height,unsigned long area_factor,
+                             double absolute_threshold,double relative_threshold)
+{
+    Clean();
+
+    m_w=im_width;
+    m_h=im_height;
+    m_bw=block_width;
+    m_bh=block_height;
+    m_area_factor=area_factor;
+    m_r_thresh=relative_threshold;
+    m_a_thresh=absolute_threshold;
+    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
+
+    m_temp_i=new int[18*128];
+    m_temp_d=new double[5*m_bw*m_bh];
+    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
+
+    return(m_max_nr);
+}
+
+void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
+                                        const unsigned char * const *msk, unsigned char fgnd) const
+{
+    float max_val,threshold;
+
+    db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
+
+
+    if(m_r_thresh)
+    {
+        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
+        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
+    }
+    else threshold= (float) m_a_thresh;
+
+    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
+        m_temp_d,x_coord,y_coord,nr_corners);
+
+
+    if ( msk )
+    {
+        int nr_corners_mask=0;
+
+        for ( int i = 0; i < *nr_corners; ++i)
+        {
+            int cor_x = db_roundi(*(x_coord+i));
+            int cor_y = db_roundi(*(y_coord+i));
+            if ( msk[cor_y][cor_x] == fgnd )
+            {
+                x_coord[nr_corners_mask] = x_coord[i];
+                y_coord[nr_corners_mask] = y_coord[i];
+                nr_corners_mask++;
+            }
+        }
+        *nr_corners = nr_corners_mask;
+    }
+}
+
+void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
+    if ( m_w!=0 )
+        db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
+            m_temp_d,x_coord,y_coord,nr_corners);
+}
+
diff --git a/jni/feature_stab/db_vlvm/db_feature_detection.h b/jni/feature_stab/db_vlvm/db_feature_detection.h
new file mode 100644
index 000000000..68ffcc9ad
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_feature_detection.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_detection.h,v 1.3 2011/06/17 14:03:30 mbansal Exp $*/
+
+#ifndef DB_FEATURE_DETECTION_H
+#define DB_FEATURE_DETECTION_H
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup FeatureDetection Feature Detection
+ */
+#include "db_utilities.h"
+#include "db_utilities_constants.h"
+#include <stdlib.h> //for NULL
+
+/*!
+ * \class db_CornerDetector_f
+ * \ingroup FeatureDetection
+ * \brief Harris corner detector for float images.
+ *
+ *  This class performs Harris corner extraction on *float* images managed
+ * with functions in \ref LMImageBasicUtilities.
+ */
+class DB_API db_CornerDetector_f
+{
+public:
+    db_CornerDetector_f();
+    ~db_CornerDetector_f();
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of corners detected in one frame.
+     * \param im_width      width
+     * \param im_height     height
+     * \param target_nr_corners
+     * \param nr_horizontal_blocks
+     * \param nr_vertical_blocks
+     * \param absolute_threshold
+     * \param relative_threshold
+     */
+    unsigned long Init(int im_width,int im_height,
+                            int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS,
+                            int nr_horizontal_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            int nr_vertical_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            double absolute_threshold=DB_DEFAULT_ABS_CORNER_THRESHOLD,
+                            double relative_threshold=DB_DEFAULT_REL_CORNER_THRESHOLD);
+
+    /*!
+     * Detect the corners.
+     * x_coord and y_coord should be pre-allocated arrays of length returned by Init().
+     * \param img   row array pointer
+     * \param x_coord   corner locations
+     * \param y_coord   corner locations
+     * \param nr_corners    actual number of corners computed
+     */
+    void DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const;
+    void SetAbsoluteThreshold(double a_thresh) { m_a_thresh = a_thresh; };
+    void SetRelativeThreshold(double r_thresh) { m_r_thresh = r_thresh; };
+protected:
+    void Clean();
+    unsigned long Start(int im_width,int im_height,
+            int block_width,int block_height,unsigned long area_factor,
+            double absolute_threshold,double relative_threshold,int chunkwidth);
+
+    int m_w,m_h,m_cw,m_bw,m_bh;
+    /*Area factor holds the maximum number of corners to detect
+    per 10000 pixels*/
+    unsigned long m_area_factor,m_max_nr;
+    double m_a_thresh,m_r_thresh;
+    float *m_temp_f;
+    double *m_temp_d;
+    float **m_strength,*m_strength_mem;
+};
+/*!
+ * \class db_CornerDetector_u
+ * \ingroup FeatureDetection
+ * \brief Harris corner detector for byte images.
+ *
+ *  This class performs Harris corner extraction on *byte* images managed
+ * with functions in \ref LMImageBasicUtilities.
+ */
+class DB_API db_CornerDetector_u
+{
+public:
+    db_CornerDetector_u();
+    virtual ~db_CornerDetector_u();
+
+    /*!
+     Copy ctor duplicates settings.
+     Memory is not copied.
+     */
+    db_CornerDetector_u(const db_CornerDetector_u& cd);
+    /*!
+     Assignment optor duplicates settings.
+     Memory not copied.
+     */
+    db_CornerDetector_u& operator=(const db_CornerDetector_u& cd);
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of corners detected in one frame
+     */
+    virtual unsigned long Init(int im_width,int im_height,
+                            int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS,
+                            int nr_horizontal_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            int nr_vertical_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            double absolute_threshold=DB_DEFAULT_ABS_CORNER_THRESHOLD,
+                            double relative_threshold=DB_DEFAULT_REL_CORNER_THRESHOLD);
+
+    /*!
+     * Detect the corners.
+     * Observe that the image should be overallocated by at least 256 bytes
+     * at the end.
+     * x_coord and y_coord should be pre-allocated arrays of length returned by Init().
+     * Specifying image mask will restrict corner output to foreground regions.
+     * Foreground value can be specified using fgnd. By default any >0 mask value
+     * is considered to be foreground
+     * \param img   row array pointer
+     * \param x_coord   corner locations
+     * \param y_coord   corner locations
+     * \param nr_corners    actual number of corners computed
+     * \param msk       row array pointer to mask image
+     * \param fgnd      foreground value in the mask
+     */
+    virtual void DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
+        const unsigned char * const * msk=NULL, unsigned char fgnd=255) const;
+
+    /*!
+     Set absolute feature threshold
+     */
+    virtual void SetAbsoluteThreshold(double a_thresh) { m_a_thresh = a_thresh; };
+    /*!
+     Set relative feature threshold
+     */
+    virtual void SetRelativeThreshold(double r_thresh) { m_r_thresh = r_thresh; };
+
+    /*!
+     Extract corners from a pre-computed strength image.
+     \param strength    Harris strength image
+     \param x_coord corner locations
+     \param y_coord corner locations
+     \param nr_corners  actual number of corners computed
+     */
+    virtual void ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners);
+protected:
+    virtual void Clean();
+    /*The absolute threshold to this function should be 16.0 times
+    normal*/
+    unsigned long Start(int im_width,int im_height,
+            int block_width,int block_height,unsigned long area_factor,
+            double absolute_threshold,double relative_threshold);
+
+    int m_w,m_h,m_bw,m_bh;
+    /*Area factor holds the maximum number of corners to detect
+    per 10000 pixels*/
+    unsigned long m_area_factor,m_max_nr;
+    double m_a_thresh,m_r_thresh;
+    int *m_temp_i;
+    double *m_temp_d;
+    float **m_strength,*m_strength_mem;
+};
+
+#endif /*DB_FEATURE_DETECTION_H*/
diff --git a/jni/feature_stab/db_vlvm/db_feature_matching.cpp b/jni/feature_stab/db_vlvm/db_feature_matching.cpp
new file mode 100644
index 000000000..d278d0cf6
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_feature_matching.cpp
@@ -0,0 +1,3410 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_matching.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+#include "db_feature_matching.h"
+#ifdef _VERBOSE_
+#include <iostream>
+#endif
+
+
+int AffineWarpPoint_NN_LUT_x[11][11];
+int AffineWarpPoint_NN_LUT_y[11][11];
+
+float AffineWarpPoint_BL_LUT_x[11][11];
+float AffineWarpPoint_BL_LUT_y[11][11];
+
+
+inline float db_SignedSquareNormCorr7x7_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-3;
+    xm_g=x_g-3;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=49.0f*fgsum-fsum*gsum;
+    den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr9x9_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-4;
+    xm_g=x_g-4;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=81.0f*fgsum-fsum*gsum;
+    den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr11x11_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=121.0f*fgsum-fsum*gsum;
+    den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline void db_SignedSquareNormCorr11x11_Pre_u(unsigned char **f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    unsigned char *pf;
+    float den;
+    int f,f2sum,fsum;
+    int xm_f;
+
+    xm_f=x_f-5;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    *sum= (float) fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip=(float)(((den!=0.0)?1.0/den:0.0));
+}
+
+inline void db_SignedSquareNormCorr5x5_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float den;
+    int f2sum,fsum;
+    int xm_f=x_f-2;
+
+#ifndef DB_USE_SSE2
+    const unsigned char *pf;
+    short f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+    //int xwi;
+    //int ywi;
+    //f2sum=0;
+    //fsum=0;
+    //for (int r=-5;r<=5;r++){
+    //  ywi=y_f+r;
+    //  for (int c=-5;c<=5;c++){
+    //      xwi=x_f+c;
+    //      f=f_img[ywi][xwi];
+    //      f2sum+=f*f;
+    //      fsum+=f;
+    //      (*patch++)=f;
+    //  }
+    //}
+    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
+    (*patch++)=0; (*patch++)=0;
+#endif /* DB_USE_SSE2 */
+
+    *sum= (float) fsum;
+    den=(25.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+}
+
+inline void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float den;
+    int f2sum,fsum;
+    int xm_f=x_f-10;
+    short f;
+
+    int xwi;
+    int ywi;
+    f2sum=0;
+    fsum=0;
+    for (int r=-10;r<=10;r++){
+        ywi=y_f+r;
+        for (int c=-10;c<=10;c++){
+            xwi=x_f+c;
+            f=f_img[ywi][xwi];
+            f2sum+=f*f;
+            fsum+=f;
+            (*patch++)=f;
+        }
+    }
+
+    for(int i=442; i<512; i++)
+        (*patch++)=0;
+
+    *sum= (float) fsum;
+    den=(441.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+
+
+}
+
+/* Lay out the image in the patch, computing norm and
+*/
+inline void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float den;
+    int f2sum,fsum;
+    int xm_f=x_f-5;
+
+#ifndef DB_USE_SSE2
+    const unsigned char *pf;
+    short f;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    //int xwi;
+    //int ywi;
+    //f2sum=0;
+    //fsum=0;
+    //for (int r=-5;r<=5;r++){
+    //  ywi=y_f+r;
+    //  for (int c=-5;c<=5;c++){
+    //      xwi=x_f+c;
+    //      f=f_img[ywi][xwi];
+    //      f2sum+=f*f;
+    //      fsum+=f;
+    //      (*patch++)=f;
+    //  }
+    //}
+
+    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
+    (*patch++)=0; (*patch++)=0;
+#else
+    const unsigned char *pf0 =f_img[y_f-5]+xm_f;
+    const unsigned char *pf1 =f_img[y_f-4]+xm_f;
+    const unsigned char *pf2 =f_img[y_f-3]+xm_f;
+    const unsigned char *pf3 =f_img[y_f-2]+xm_f;
+    const unsigned char *pf4 =f_img[y_f-1]+xm_f;
+    const unsigned char *pf5 =f_img[y_f  ]+xm_f;
+    const unsigned char *pf6 =f_img[y_f+1]+xm_f;
+    const unsigned char *pf7 =f_img[y_f+2]+xm_f;
+    const unsigned char *pf8 =f_img[y_f+3]+xm_f;
+    const unsigned char *pf9 =f_img[y_f+4]+xm_f;
+    const unsigned char *pf10=f_img[y_f+5]+xm_f;
+
+    /* pixel mask */
+    const unsigned char pm[16] = {
+        0xFF,0xFF,
+        0xFF,0xFF,
+        0xFF,0xFF,
+        0,0,0,0,0,
+        0,0,0,0,0};
+    const unsigned char * pm_p = pm;
+
+    _asm
+    {
+        mov         ecx,patch   /* load patch pointer */
+        mov         ebx, pm_p   /* load pixel mask pointer */
+        movdqu      xmm1,[ebx]  /* load pixel mask */
+
+        pxor        xmm5,xmm5   /* set xmm5 to 0 accumulator for sum squares */
+        pxor        xmm4,xmm4   /* set xmm4 to 0 accumulator for sum */
+        pxor        xmm0,xmm0   /* set xmm0 to 0 */
+
+        /* row 0 */
+        mov         eax,pf0     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqa      [ecx+0*22],xmm7 /* move short values to patch */
+        movdqa      [ecx+0*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 1 */
+        mov         eax,pf1     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+1*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+1*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 2 */
+        mov         eax,pf2     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+2*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+2*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 3 */
+        mov         eax,pf3     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+3*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+3*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 4 */
+        mov         eax,pf4     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+4*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+4*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 5 */
+        mov         eax,pf5     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+5*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+5*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 6 */
+        mov         eax,pf6     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+6*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+6*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 7 */
+        mov         eax,pf7     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+7*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+7*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 8 */
+        mov         eax,pf8     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqa      [ecx+8*22],xmm7 /* move short values to patch */
+        movdqa      [ecx+8*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 9 */
+        mov         eax,pf9     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+9*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+9*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 10 */
+        mov         eax,pf10    /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+10*22],xmm7    /* move short values to patch */
+        movdqu      [ecx+10*22+16],xmm6 /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* add up the sum squares */
+        movhlps     xmm0,xmm5   /* high half to low half */
+        paddd       xmm5,xmm0   /* add high to low */
+        pshuflw     xmm0,xmm5, 0xE /* reshuffle */
+        paddd       xmm5,xmm0   /* add remaining */
+        movd        f2sum,xmm5
+
+        /* add up the sum */
+        movhlps     xmm0,xmm4
+        paddw       xmm4,xmm0   /* halves added */
+        pshuflw     xmm0,xmm4,0xE
+        paddw       xmm4,xmm0   /* quarters added */
+        pshuflw     xmm0,xmm4,0x1
+        paddw       xmm4,xmm0   /* eighth added */
+        movd        fsum, xmm4
+
+        emms
+    }
+
+    fsum = fsum & 0xFFFF;
+
+    patch[126] = 0;
+    patch[127] = 0;
+#endif /* DB_USE_SSE2 */
+
+    *sum= (float) fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+}
+
+void AffineWarpPointOffset(float &r_w,float &c_w,double Hinv[9],int r,int c)
+{
+    r_w=(float)(Hinv[3]*c+Hinv[4]*r);
+    c_w=(float)(Hinv[0]*c+Hinv[1]*r);
+}
+
+
+
+/*!
+Prewarp the patches with given affine transform. For a given homogeneous point "x", "H*x" is
+the warped point and for any displacement "d" in the warped image resulting in point "y", the
+corresponding point in the original image is given by "Hinv*y", which can be simplified for affine H.
+If "affine" is 1, then nearest neighbor method is used, else if it is 2, then
+bilinear method is used.
+ */
+inline void db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(short *patch,const unsigned char * const *f_img,
+                                                                    int xi,int yi,float *sum,float *recip,
+                                                                    const double Hinv[9],int affine)
+{
+    float den;
+    short f;
+    int f2sum,fsum;
+
+    f2sum=0;
+    fsum=0;
+
+    if (affine==1)
+    {
+        for (int r=0;r<11;r++){
+            for (int c=0;c<11;c++){
+                f=f_img[yi+AffineWarpPoint_NN_LUT_y[r][c]][xi+AffineWarpPoint_NN_LUT_x[r][c]];
+                f2sum+=f*f;
+                fsum+=f;
+                (*patch++)=f;
+            }
+        }
+    }
+    else if (affine==2)
+    {
+        for (int r=0;r<11;r++){
+            for (int c=0;c<11;c++){
+                f=db_BilinearInterpolation(yi+AffineWarpPoint_BL_LUT_y[r][c]
+                ,xi+AffineWarpPoint_BL_LUT_x[r][c],f_img);
+                f2sum+=f*f;
+                fsum+=f;
+                (*patch++)=f;
+            }
+        }
+    }
+
+
+
+    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
+    (*patch++)=0; (*patch++)=0;
+
+    *sum= (float) fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+}
+
+
+inline float db_SignedSquareNormCorr11x11_Post_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g,
+                                                float fsum_gsum,float f_recip_g_recip)
+{
+    unsigned char *pf,*pg;
+    int fgsum;
+    float fg_corr;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum= (float) db_ScalarProduct512_s(f_patch,g_patch);
+
+    fg_corr=441.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+
+float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum= (float) db_ScalarProduct128_s(f_patch,g_patch);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+float db_SignedSquareNormCorr5x5Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum= (float) db_ScalarProduct32_s(f_patch,g_patch);
+
+    fg_corr=25.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+
+inline float db_SignedSquareNormCorr15x15_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-7;
+    xm_g=x_g-7;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=225.0f*fgsum-fsum*gsum;
+    den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr7x7_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-3;
+    xm_g=x_g-3;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=49.0f*fgsum-fsum*gsum;
+    den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr9x9_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-4;
+    xm_g=x_g-4;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=81.0f*fgsum-fsum*gsum;
+    den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr11x11_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=121.0f*fgsum-fsum*gsum;
+    den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline void db_SignedSquareNormCorr11x11_Pre_f(float **f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float *pf,den;
+    float f,f2sum,fsum;
+    int xm_f;
+
+    xm_f=x_f-5;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    *sum=fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float) ((den!=0.0)?1.0/den:0.0);
+}
+
+inline void db_SignedSquareNormCorr11x11_PreAlign_f(float *patch,const float * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    const float *pf;
+    float den,f,f2sum,fsum;
+    int xm_f;
+
+    xm_f=x_f-5;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0;
+    (*patch++)=0.0; (*patch++)=0.0;
+
+    *sum=fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float) ((den!=0.0)?1.0/den:0.0);
+}
+
+inline float db_SignedSquareNormCorr11x11_Post_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g,
+                                                float fsum_gsum,float f_recip_g_recip)
+{
+    float *pf,*pg;
+    float fgsum,fg_corr;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+inline float db_SignedSquareNormCorr11x11Aligned_Post_f(const float *f_patch,const float *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum=db_ScalarProduct128Aligned16_f(f_patch,g_patch);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+inline float db_SignedSquareNormCorr15x15_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-7;
+    xm_g=x_g-7;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=225.0f*fgsum-fsum*gsum;
+    den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+db_Bucket_f** db_AllocBuckets_f(int nr_h,int nr_v,int bd)
+{
+    int i,j;
+    db_Bucket_f **bp,*b;
+
+    b=new db_Bucket_f [(nr_h+2)*(nr_v+2)];
+    bp=new db_Bucket_f* [(nr_v+2)];
+    bp=bp+1;
+    for(i= -1;i<=nr_v;i++)
+    {
+        bp[i]=b+1+(nr_h+2)*(i+1);
+        for(j= -1;j<=nr_h;j++)
+        {
+            bp[i][j].ptr=new db_PointInfo_f [bd];
+        }
+    }
+
+    return(bp);
+}
+
+db_Bucket_u** db_AllocBuckets_u(int nr_h,int nr_v,int bd)
+{
+    int i,j;
+    db_Bucket_u **bp,*b;
+
+    b=new db_Bucket_u [(nr_h+2)*(nr_v+2)];
+    bp=new db_Bucket_u* [(nr_v+2)];
+    bp=bp+1;
+    for(i= -1;i<=nr_v;i++)
+    {
+        bp[i]=b+1+(nr_h+2)*(i+1);
+        for(j= -1;j<=nr_h;j++)
+        {
+            bp[i][j].ptr=new db_PointInfo_u [bd];
+        }
+    }
+
+    return(bp);
+}
+
+void db_FreeBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
+{
+    int i,j;
+
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
+    {
+        delete [] bp[i][j].ptr;
+    }
+    delete [] (bp[-1]-1);
+    delete [] (bp-1);
+}
+
+void db_FreeBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
+{
+    int i,j;
+
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
+    {
+        delete [] bp[i][j].ptr;
+    }
+    delete [] (bp[-1]-1);
+    delete [] (bp-1);
+}
+
+void db_EmptyBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
+{
+    int i,j;
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
+}
+
+void db_EmptyBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
+{
+    int i,j;
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
+}
+
+float* db_FillBuckets_f(float *patch_space,const float * const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners)
+{
+    int i,xi,yi,xpos,ypos,nr;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir;
+
+    db_EmptyBuckets_f(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xi=(int) x[i];
+        yi=(int) y[i];
+        xpos=xi/bw;
+        ypos=yi/bh;
+        if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=xi;
+                pir->y=yi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                patch_space+=128;
+            }
+        }
+    }
+    return(patch_space);
+}
+
+short* db_FillBuckets_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,int use_smaller_matching_window, int use_21)
+{
+    int i,xi,yi,xpos,ypos,nr;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir;
+
+    db_EmptyBuckets_u(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xi=(int)db_roundi(x[i]);
+        yi=(int)db_roundi(y[i]);
+        xpos=xi/bw;
+        ypos=yi/bh;
+        if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=xi;
+                pir->y=yi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                if(use_21)
+                {
+                    db_SignedSquareNormCorr21x21_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                    patch_space+=512;
+                }
+                else
+                {
+                if(!use_smaller_matching_window)
+                {
+                    db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                    patch_space+=128;
+                }
+                else
+                {
+                    db_SignedSquareNormCorr5x5_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                    patch_space+=32;
+                }
+                }
+            }
+        }
+    }
+    return(patch_space);
+}
+
+
+
+float* db_FillBucketsPrewarped_f(float *patch_space,const float *const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,const double H[9])
+{
+    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir;
+    double xd[2],wx[2];
+
+    db_EmptyBuckets_f(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xd[0]=x[i];
+        xd[1]=y[i];
+        xi=(int) xd[0];
+        yi=(int) xd[1];
+        db_ImageHomographyInhomogenous(wx,H,xd);
+        wxi=(int) wx[0];
+        wyi=(int) wx[1];
+
+        xpos=((wxi+bw)/bw)-1;
+        ypos=((wyi+bh)/bh)-1;
+        if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=wxi;
+                pir->y=wyi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                patch_space+=128;
+            }
+        }
+    }
+    return(patch_space);
+}
+
+short* db_FillBucketsPrewarped_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
+                                 int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
+                                 int nr_corners,const double H[9])
+{
+    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir;
+    double xd[2],wx[2];
+
+    db_EmptyBuckets_u(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xd[0]=x[i];
+        xd[1]=y[i];
+        xi=(int) db_roundi(xd[0]);
+        yi=(int) db_roundi(xd[1]);
+        db_ImageHomographyInhomogenous(wx,H,xd);
+        wxi=(int) wx[0];
+        wyi=(int) wx[1];
+
+        xpos=((wxi+bw)/bw)-1;
+        ypos=((wyi+bh)/bh)-1;
+        if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=wxi;
+                pir->y=wyi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                patch_space+=128;
+            }
+        }
+    }
+    return(patch_space);
+}
+
+
+
+short* db_FillBucketsPrewarpedAffine_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
+                                 int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
+                                 int nr_corners,const double H[9],const double Hinv[9],const int warpboundsp[4],
+                                 int affine)
+{
+    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir;
+    double xd[2],wx[2];
+
+    db_EmptyBuckets_u(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xd[0]=x[i];
+        xd[1]=y[i];
+        xi=(int) db_roundi(xd[0]);
+        yi=(int) db_roundi(xd[1]);
+        db_ImageHomographyInhomogenous(wx,H,xd);
+        wxi=(int) wx[0];
+        wyi=(int) wx[1];
+
+        xpos=((wxi+bw)/bw)-1;
+        ypos=((wyi+bh)/bh)-1;
+
+
+        if (xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
+        {
+            if( xi>warpboundsp[0] && xi<warpboundsp[1] && yi>warpboundsp[2] && yi<warpboundsp[3])
+            {
+
+                br=&bp[ypos][xpos];
+                nr=br->nr;
+                if(nr<bd)
+                {
+                    pir=&(br->ptr[nr]);
+                    pir->x=wxi;
+                    pir->y=wyi;
+                    pir->id=i;
+                    pir->pir=0;
+                    pir->patch=patch_space;
+                    br->nr=nr+1;
+
+                    db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip),Hinv,affine);
+                    patch_space+=128;
+                }
+            }
+        }
+    }
+    return(patch_space);
+}
+
+
+
+inline void db_MatchPointPair_f(db_PointInfo_f *pir_l,db_PointInfo_f *pir_r,
+                            unsigned long kA,unsigned long kB)
+{
+    int x_l,y_l,x_r,y_r,xm,ym;
+    double score;
+
+    x_l=pir_l->x;
+    y_l=pir_l->y;
+    x_r=pir_r->x;
+    y_r=pir_r->y;
+    xm=x_l-x_r;
+    ym=y_l-y_r;
+    /*Check if disparity is within the maximum disparity
+    with the formula xm^2*256+ym^2*kA<kB
+    where kA=256*w^2/h^2
+    and   kB=256*max_disp^2*w^2*/
+    if(((xm*xm)<<8)+ym*ym*kA<kB)
+    {
+        /*Correlate*/
+        score=db_SignedSquareNormCorr11x11Aligned_Post_f(pir_l->patch,pir_r->patch,
+            (pir_l->sum)*(pir_r->sum),
+            (pir_l->recip)*(pir_r->recip));
+
+        if((!(pir_l->pir)) || (score>pir_l->s))
+        {
+            /*Update left corner*/
+            pir_l->s=score;
+            pir_l->pir=pir_r;
+        }
+        if((!(pir_r->pir)) || (score>pir_r->s))
+        {
+            /*Update right corner*/
+            pir_r->s=score;
+            pir_r->pir=pir_l;
+        }
+    }
+}
+
+inline void db_MatchPointPair_u(db_PointInfo_u *pir_l,db_PointInfo_u *pir_r,
+                            unsigned long kA,unsigned long kB, unsigned int rect_window,bool use_smaller_matching_window, int use_21)
+{
+    int xm,ym;
+    double score;
+    bool compute_score;
+
+
+    if( rect_window )
+        compute_score = ((unsigned)db_absi(pir_l->x - pir_r->x)<kA && (unsigned)db_absi(pir_l->y - pir_r->y)<kB);
+    else
+    {   /*Check if disparity is within the maximum disparity
+        with the formula xm^2*256+ym^2*kA<kB
+        where kA=256*w^2/h^2
+        and   kB=256*max_disp^2*w^2*/
+        xm= pir_l->x - pir_r->x;
+        ym= pir_l->y - pir_r->y;
+        compute_score = ((xm*xm)<<8)+ym*ym*kA < kB;
+    }
+
+    if ( compute_score )
+    {
+        if(use_21)
+        {
+            score=db_SignedSquareNormCorr21x21Aligned_Post_s(pir_l->patch,pir_r->patch,
+                (pir_l->sum)*(pir_r->sum),
+                (pir_l->recip)*(pir_r->recip));
+        }
+        else
+        {
+        /*Correlate*/
+        if(!use_smaller_matching_window)
+        {
+            score=db_SignedSquareNormCorr11x11Aligned_Post_s(pir_l->patch,pir_r->patch,
+                (pir_l->sum)*(pir_r->sum),
+                (pir_l->recip)*(pir_r->recip));
+        }
+        else
+        {
+            score=db_SignedSquareNormCorr5x5Aligned_Post_s(pir_l->patch,pir_r->patch,
+                (pir_l->sum)*(pir_r->sum),
+                (pir_l->recip)*(pir_r->recip));
+        }
+        }
+
+        if((!(pir_l->pir)) || (score>pir_l->s))
+        {
+            /*Update left corner*/
+            pir_l->s=score;
+            pir_l->pir=pir_r;
+        }
+        if((!(pir_r->pir)) || (score>pir_r->s))
+        {
+            /*Update right corner*/
+            pir_r->s=score;
+            pir_r->pir=pir_l;
+        }
+    }
+}
+
+inline void db_MatchPointAgainstBucket_f(db_PointInfo_f *pir_l,db_Bucket_f *b_r,
+                                       unsigned long kA,unsigned long kB)
+{
+    int p_r,nr;
+    db_PointInfo_f *pir_r;
+
+    nr=b_r->nr;
+    pir_r=b_r->ptr;
+    for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_f(pir_l,pir_r+p_r,kA,kB);
+}
+
+inline void db_MatchPointAgainstBucket_u(db_PointInfo_u *pir_l,db_Bucket_u *b_r,
+                                       unsigned long kA,unsigned long kB,int rect_window, bool use_smaller_matching_window, int use_21)
+{
+    int p_r,nr;
+    db_PointInfo_u *pir_r;
+
+    nr=b_r->nr;
+    pir_r=b_r->ptr;
+
+    for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_u(pir_l,pir_r+p_r,kA,kB, rect_window, use_smaller_matching_window, use_21);
+
+}
+
+void db_MatchBuckets_f(db_Bucket_f **bp_l,db_Bucket_f **bp_r,int nr_h,int nr_v,
+                     unsigned long kA,unsigned long kB)
+{
+    int i,j,k,a,b,br_nr;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir_l;
+
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir_l=br->ptr+k;
+            for(a=i-1;a<=i+1;a++)
+            {
+                for(b=j-1;b<=j+1;b++)
+                {
+                    db_MatchPointAgainstBucket_f(pir_l,&bp_r[a][b],kA,kB);
+                }
+            }
+        }
+    }
+}
+
+void db_MatchBuckets_u(db_Bucket_u **bp_l,db_Bucket_u **bp_r,int nr_h,int nr_v,
+                     unsigned long kA,unsigned long kB,int rect_window,bool use_smaller_matching_window, int use_21)
+{
+    int i,j,k,a,b,br_nr;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir_l;
+
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir_l=br->ptr+k;
+            for(a=i-1;a<=i+1;a++)
+            {
+                for(b=j-1;b<=j+1;b++)
+                {
+                    db_MatchPointAgainstBucket_u(pir_l,&bp_r[a][b],kA,kB,rect_window,use_smaller_matching_window, use_21);
+                }
+            }
+        }
+    }
+}
+
+void db_CollectMatches_f(db_Bucket_f **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
+{
+    int i,j,k,br_nr;
+    unsigned long count;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir,*pir2;
+
+    count=0;
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir=br->ptr+k;
+            pir2=pir->pir;
+            if(pir2)
+            {
+                /*This point has a best match*/
+                if((pir2->pir)==pir)
+                {
+                    /*We have a mutually consistent match*/
+                    if(count<target)
+                    {
+                        id_l[count]=pir->id;
+                        id_r[count]=pir2->id;
+                        count++;
+                    }
+                }
+            }
+        }
+    }
+    *nr_matches=count;
+}
+
+void db_CollectMatches_u(db_Bucket_u **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
+{
+    int i,j,k,br_nr;
+    unsigned long count;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir,*pir2;
+
+    count=0;
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir=br->ptr+k;
+            pir2=pir->pir;
+            if(pir2)
+            {
+                /*This point has a best match*/
+                if((pir2->pir)==pir)
+                {
+                    /*We have a mutually consistent match*/
+                    if(count<target)
+                    {
+                        id_l[count]=pir->id;
+                        id_r[count]=pir2->id;
+                        count++;
+                    }
+                }
+            }
+        }
+    }
+    *nr_matches=count;
+}
+
+db_Matcher_f::db_Matcher_f()
+{
+    m_w=0; m_h=0;
+}
+
+db_Matcher_f::~db_Matcher_f()
+{
+    Clean();
+}
+
+void db_Matcher_f::Clean()
+{
+    if(m_w)
+    {
+        /*Free buckets*/
+        db_FreeBuckets_f(m_bp_l,m_nr_h,m_nr_v);
+        db_FreeBuckets_f(m_bp_r,m_nr_h,m_nr_v);
+        /*Free space for patch layouts*/
+        delete [] m_patch_space;
+    }
+    m_w=0; m_h=0;
+}
+
+unsigned long db_Matcher_f::Init(int im_width,int im_height,double max_disparity,int target_nr_corners)
+{
+    Clean();
+    m_w=im_width;
+    m_h=im_height;
+    m_bw=db_maxi(1,(int) (max_disparity*((double)im_width)));
+    m_bh=db_maxi(1,(int) (max_disparity*((double)im_height)));
+    m_nr_h=1+(im_width-1)/m_bw;
+    m_nr_v=1+(im_height-1)/m_bh;
+    m_bd=db_maxi(1,(int)(((double)target_nr_corners)*
+        max_disparity*max_disparity));
+    m_target=target_nr_corners;
+    m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
+    m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
+
+    /*Alloc bucket structure*/
+    m_bp_l=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
+    m_bp_r=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
+
+    /*Alloc 16byte-aligned space for patch layouts*/
+    m_patch_space=new float [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
+    m_aligned_patch_space=db_AlignPointer_f(m_patch_space,16);
+
+    return(m_target);
+}
+
+void db_Matcher_f::Match(const float * const *l_img,const float * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9])
+{
+    float *ps;
+
+    /*Insert the corners into bucket structure*/
+    ps=db_FillBuckets_f(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l);
+    if(H==0) db_FillBuckets_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r);
+    else db_FillBucketsPrewarped_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
+
+    /*Compute all the necessary match scores*/
+    db_MatchBuckets_f(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB);
+
+    /*Collect the correspondences*/
+    db_CollectMatches_f(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
+}
+
+db_Matcher_u::db_Matcher_u()
+{
+    m_w=0; m_h=0;
+    m_rect_window = 0;
+    m_bw=m_bh=m_nr_h=m_nr_v=m_bd=m_target=0;
+    m_bp_l=m_bp_r=0;
+    m_patch_space=m_aligned_patch_space=0;
+}
+
+db_Matcher_u::db_Matcher_u(const db_Matcher_u& cm)
+{
+    Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
+}
+
+db_Matcher_u& db_Matcher_u::operator= (const db_Matcher_u& cm)
+{
+    if ( this == &cm ) return *this;
+    Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
+    return *this;
+}
+
+
+db_Matcher_u::~db_Matcher_u()
+{
+    Clean();
+}
+
+void db_Matcher_u::Clean()
+{
+    if(m_w)
+    {
+        /*Free buckets*/
+        db_FreeBuckets_u(m_bp_l,m_nr_h,m_nr_v);
+        db_FreeBuckets_u(m_bp_r,m_nr_h,m_nr_v);
+        /*Free space for patch layouts*/
+        delete [] m_patch_space;
+    }
+    m_w=0; m_h=0;
+}
+
+
+unsigned long db_Matcher_u::Init(int im_width,int im_height,double max_disparity,int target_nr_corners,
+                                 double max_disparity_v, bool use_smaller_matching_window, int use_21)
+{
+    Clean();
+    m_w=im_width;
+    m_h=im_height;
+    m_max_disparity=max_disparity;
+    m_max_disparity_v=max_disparity_v;
+
+    if ( max_disparity_v != DB_DEFAULT_NO_DISPARITY )
+    {
+        m_rect_window = 1;
+
+        m_bw=db_maxi(1,(int)(max_disparity*((double)im_width)));
+        m_bh=db_maxi(1,(int)(max_disparity_v*((double)im_height)));
+
+        m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity_v));
+
+        m_kA=(int)(max_disparity*m_w);
+        m_kB=(int)(max_disparity_v*m_h);
+
+    } else
+    {
+        m_bw=(int)db_maxi(1,(int)(max_disparity*((double)im_width)));
+        m_bh=(int)db_maxi(1,(int)(max_disparity*((double)im_height)));
+
+        m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity));
+
+        m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
+        m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
+    }
+
+    m_nr_h=1+(im_width-1)/m_bw;
+    m_nr_v=1+(im_height-1)/m_bh;
+
+    m_target=target_nr_corners;
+
+    /*Alloc bucket structure*/
+    m_bp_l=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
+    m_bp_r=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
+
+    m_use_smaller_matching_window = use_smaller_matching_window;
+    m_use_21 = use_21;
+
+    if(m_use_21)
+    {
+        /*Alloc 64byte-aligned space for patch layouts*/
+        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*512+64];
+        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,64);
+    }
+    else
+    {
+    if(!m_use_smaller_matching_window)
+    {
+        /*Alloc 16byte-aligned space for patch layouts*/
+        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
+        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,16);
+    }
+    else
+    {
+        /*Alloc 4byte-aligned space for patch layouts*/
+        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*32+4];
+        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,4);
+    }
+    }
+
+    return(m_target);
+}
+
+void db_Matcher_u::Match(const unsigned char * const *l_img,const unsigned char * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9],int affine)
+{
+    short *ps;
+
+    /*Insert the corners into bucket structure*/
+    ps=db_FillBuckets_u(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l,m_use_smaller_matching_window,m_use_21);
+    if(H==0)
+        db_FillBuckets_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,m_use_smaller_matching_window,m_use_21);
+    else
+    {
+        if (affine)
+        {
+            double Hinv[9];
+            db_InvertAffineTransform(Hinv,H);
+            float r_w, c_w;
+            float stretch_x[2];
+            float stretch_y[2];
+            AffineWarpPointOffset(r_w,c_w,Hinv, 5,5);
+            stretch_x[0]=db_absf(c_w);stretch_y[0]=db_absf(r_w);
+            AffineWarpPointOffset(r_w,c_w,Hinv, 5,-5);
+            stretch_x[1]=db_absf(c_w);stretch_y[1]=db_absf(r_w);
+            int max_stretxh_x=(int) (db_maxd(stretch_x[0],stretch_x[1]));
+            int max_stretxh_y=(int) (db_maxd(stretch_y[0],stretch_y[1]));
+            int warpbounds[4]={max_stretxh_x,m_w-1-max_stretxh_x,max_stretxh_y,m_h-1-max_stretxh_y};
+
+            for (int r=-5;r<=5;r++){
+                for (int c=-5;c<=5;c++){
+                    AffineWarpPointOffset(r_w,c_w,Hinv,r,c);
+                    AffineWarpPoint_BL_LUT_y[r+5][c+5]=r_w;
+                    AffineWarpPoint_BL_LUT_x[r+5][c+5]=c_w;
+
+                    AffineWarpPoint_NN_LUT_y[r+5][c+5]=db_roundi(r_w);
+                    AffineWarpPoint_NN_LUT_x[r+5][c+5]=db_roundi(c_w);
+
+                }
+            }
+
+            db_FillBucketsPrewarpedAffine_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,
+                x_r,y_r,nr_r,H,Hinv,warpbounds,affine);
+        }
+        else
+            db_FillBucketsPrewarped_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
+    }
+
+
+    /*Compute all the necessary match scores*/
+    db_MatchBuckets_u(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB, m_rect_window,m_use_smaller_matching_window,m_use_21);
+
+    /*Collect the correspondences*/
+    db_CollectMatches_u(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
+}
+
+int db_Matcher_u::IsAllocated()
+{
+    return (int)(m_w != 0);
+}
diff --git a/jni/feature_stab/db_vlvm/db_feature_matching.h b/jni/feature_stab/db_vlvm/db_feature_matching.h
new file mode 100644
index 000000000..6c056b9a3
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_feature_matching.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_matching.h,v 1.3 2011/06/17 14:03:30 mbansal Exp $*/
+
+#ifndef DB_FEATURE_MATCHING_H
+#define DB_FEATURE_MATCHING_H
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup FeatureMatching Feature Matching
+ */
+#include "db_utilities.h"
+#include "db_utilities_constants.h"
+
+DB_API void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip);
+DB_API void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip);
+float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip);
+float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip);
+
+class db_PointInfo_f
+{
+public:
+    /*Coordinates of point*/
+    int x;
+    int y;
+    /*Id nr of point*/
+    int id;
+    /*Best match score*/
+    double s;
+    /*Best match candidate*/
+    db_PointInfo_f *pir;
+    /*Precomputed coefficients
+    of image patch*/
+    float sum;
+    float recip;
+    /*Pointer to patch layout*/
+    const float *patch;
+};
+
+class db_Bucket_f
+{
+public:
+    db_PointInfo_f *ptr;
+    int nr;
+};
+
+class db_PointInfo_u
+{
+public:
+    /*Coordinates of point*/
+    int x;
+    int y;
+    /*Id nr of point*/
+    int id;
+    /*Best match score*/
+    double s;
+    /*Best match candidate*/
+    db_PointInfo_u *pir;
+    /*Precomputed coefficients
+    of image patch*/
+    float sum;
+    float recip;
+    /*Pointer to patch layout*/
+    const short *patch;
+};
+
+class db_Bucket_u
+{
+public:
+    db_PointInfo_u *ptr;
+    int nr;
+};
+/*!
+ * \class db_Matcher_f
+ * \ingroup FeatureMatching
+ * \brief Feature matcher for float images.
+ *
+ * Normalized correlation feature matcher for <b>float</b> images.
+ * Correlation window size is constant and set to 11x11.
+ * See \ref FeatureDetection to detect Harris corners.
+ * Images are managed with functions in \ref LMImageBasicUtilities.
+ */
+class DB_API db_Matcher_f
+{
+public:
+    db_Matcher_f();
+    ~db_Matcher_f();
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of matches.
+     * \param im_width          width
+     * \param im_height         height
+     * \param max_disparity     maximum distance (as fraction of image size) between matches
+     * \param target_nr_corners maximum number of matches
+     * \return maximum number of matches
+     */
+    unsigned long Init(int im_width,int im_height,
+        double max_disparity=DB_DEFAULT_MAX_DISPARITY,
+        int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS);
+
+    /*!
+     * Match two sets of features.
+     * If the prewarp H is not NULL it will be applied to the features
+     * in the right image before matching.
+     * Parameters id_l and id_r must point to arrays of size target_nr_corners
+     * (returned by Init()).
+     * The results of matching are in id_l and id_r.
+     * Interpretaqtion of results: if id_l[i] = m and id_r[i] = n,
+     * feature at (x_l[m],y_l[m]) matched to (x_r[n],y_r[n]).
+     * \param l_img     left image
+     * \param r_img     right image
+     * \param x_l       left x coordinates of features
+     * \param y_l       left y coordinates of features
+     * \param nr_l      number of features in left image
+     * \param x_r       right x coordinates of features
+     * \param y_r       right y coordinates of features
+     * \param nr_r      number of features in right image
+     * \param id_l      indices of left features that matched
+     * \param id_r      indices of right features that matched
+     * \param nr_matches    number of features actually matched
+     * \param H         image homography (prewarp) to be applied to right image features
+     */
+    void Match(const float * const *l_img,const float * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9]=0);
+
+protected:
+    void Clean();
+
+    int m_w,m_h,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,m_target;
+    unsigned long m_kA,m_kB;
+    db_Bucket_f **m_bp_l;
+    db_Bucket_f **m_bp_r;
+    float *m_patch_space,*m_aligned_patch_space;
+};
+/*!
+ * \class db_Matcher_u
+ * \ingroup FeatureMatching
+ * \brief Feature matcher for byte images.
+ *
+ * Normalized correlation feature matcher for <b>byte</b> images.
+ * Correlation window size is constant and set to 11x11.
+ * See \ref FeatureDetection to detect Harris corners.
+ * Images are managed with functions in \ref LMImageBasicUtilities.
+ *
+ * If the prewarp matrix H is supplied, the feature coordinates are warped by H before being placed in
+ * appropriate buckets. If H is an affine transform and the "affine" parameter is set to 1 or 2,
+ * then the correlation patches themselves are warped before being placed in the patch space.
+ */
+class DB_API db_Matcher_u
+{
+public:
+    db_Matcher_u();
+
+    int GetPatchSize(){return 11;};
+
+    virtual ~db_Matcher_u();
+
+    /*!
+     Copy ctor duplicates settings.
+     Memory not copied.
+     */
+    db_Matcher_u(const db_Matcher_u& cm);
+
+    /*!
+     Assignment optor duplicates settings
+     Memory not copied.
+     */
+    db_Matcher_u& operator= (const db_Matcher_u& cm);
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of matches.
+     * If max_disparity_v is DB_DEFAULT_NO_DISPARITY, look for matches
+     * in a ellipse around a feature of radius max_disparity*im_width by max_disparity*im_height.
+     * If max_disparity_v is specified, use a rectangle max_disparity*im_width by max_disparity_v*im_height.
+     * \param im_width          width
+     * \param im_height         height
+     * \param max_disparity     maximum distance (as fraction of image size) between matches
+     * \param target_nr_corners maximum number of matches
+     * \param max_disparity_v   maximum vertical disparity (distance between matches)
+     * \param use_smaller_matching_window   if set to true, uses a correlation window of 5x5 instead of the default 11x11
+     * \return maximum number of matches
+     */
+    virtual unsigned long Init(int im_width,int im_height,
+        double max_disparity=DB_DEFAULT_MAX_DISPARITY,
+        int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS,
+        double max_disparity_v=DB_DEFAULT_NO_DISPARITY,
+        bool use_smaller_matching_window=false, int use_21=0);
+
+    /*!
+     * Match two sets of features.
+     * If the prewarp H is not NULL it will be applied to the features
+     * in the right image before matching.
+     * Parameters id_l and id_r must point to arrays of size target_nr_corners
+     * (returned by Init()).
+     * The results of matching are in id_l and id_r.
+     * Interpretaqtion of results: if id_l[i] = m and id_r[i] = n,
+     * feature at (x_l[m],y_l[m]) matched to (x_r[n],y_r[n]).
+     * \param l_img     left image
+     * \param r_img     right image
+     * \param x_l       left x coordinates of features
+     * \param y_l       left y coordinates of features
+     * \param nr_l      number of features in left image
+     * \param x_r       right x coordinates of features
+     * \param y_r       right y coordinates of features
+     * \param nr_r      number of features in right image
+     * \param id_l      indices of left features that matched
+     * \param id_r      indices of right features that matched
+     * \param nr_matches    number of features actually matched
+     * \param H         image homography (prewarp) to be applied to right image features
+     * \param affine    prewarp the 11x11 patches by given affine transform. 0 means no warping,
+                        1 means nearest neighbor, 2 means bilinear warping.
+     */
+    virtual void Match(const unsigned char * const *l_img,const unsigned char * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9]=0,int affine=0);
+
+    /*!
+     * Checks if Init() was called.
+     * \return 1 if Init() was called, 0 otherwise.
+     */
+    int IsAllocated();
+
+protected:
+    virtual void Clean();
+
+
+    int m_w,m_h,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,m_target;
+    unsigned long m_kA,m_kB;
+    db_Bucket_u **m_bp_l;
+    db_Bucket_u **m_bp_r;
+    short *m_patch_space,*m_aligned_patch_space;
+
+    double m_max_disparity, m_max_disparity_v;
+    int m_rect_window;
+    bool m_use_smaller_matching_window;
+    int m_use_21;
+};
+
+
+
+#endif /*DB_FEATURE_MATCHING_H*/
diff --git a/jni/feature_stab/db_vlvm/db_framestitching.cpp b/jni/feature_stab/db_vlvm/db_framestitching.cpp
new file mode 100644
index 000000000..b574f7a04
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_framestitching.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_framestitching.cpp,v 1.2 2011/06/17 14:03:30 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include "db_framestitching.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+inline void db_RotationFromMOuterProductSum(double R[9],double *score,double M[9])
+{
+    double N[16],q[4],lambda[4],lambda_max;
+    double y[4];
+    int nr_roots;
+
+    N[0]=   M[0]+M[4]+M[8];
+    N[5]=   M[0]-M[4]-M[8];
+    N[10]= -M[0]+M[4]-M[8];
+    N[15]= -M[0]-M[4]+M[8];
+    N[1] =N[4] =M[5]-M[7];
+    N[2] =N[8] =M[6]-M[2];
+    N[3] =N[12]=M[1]-M[3];
+    N[6] =N[9] =M[1]+M[3];
+    N[7] =N[13]=M[6]+M[2];
+    N[11]=N[14]=M[5]+M[7];
+
+    /*get the quaternion representing the rotation
+    by finding the eigenvector corresponding to the most
+    positive eigenvalue. Force eigenvalue solutions, since the matrix
+    is symmetric and solutions might otherwise be lost
+    when the data is planar*/
+    db_RealEigenvalues4x4(lambda,&nr_roots,N,1);
+    if(nr_roots)
+    {
+        lambda_max=lambda[0];
+        if(nr_roots>=2)
+        {
+            if(lambda[1]>lambda_max) lambda_max=lambda[1];
+            if(nr_roots>=3)
+            {
+                if(lambda[2]>lambda_max) lambda_max=lambda[2];
+                {
+                    if(nr_roots>=4) if(lambda[3]>lambda_max) lambda_max=lambda[3];
+                }
+            }
+        }
+    }
+    else lambda_max=1.0;
+    db_EigenVector4x4(q,lambda_max,N);
+
+    /*Compute the rotation matrix*/
+    db_QuaternionToRotation(R,q);
+
+    if(score)
+    {
+        /*Compute score=transpose(q)*N*q */
+        db_Multiply4x4_4x1(y,N,q);
+        *score=db_ScalarProduct4(q,y);
+    }
+}
+
+void db_StitchSimilarity3DRaw(double *scale,double R[9],double t[3],
+                            double **Xp,double **X,int nr_points,int orientation_preserving,
+                            int allow_scaling,int allow_rotation,int allow_translation)
+{
+    int i;
+    double c[3],cp[3],r[3],rp[3],M[9],s,sp,sc;
+    double Rr[9],score_p,score_r;
+    double *temp,*temp_p;
+
+    if(allow_translation)
+    {
+        db_PointCentroid3D(c,X,nr_points);
+        db_PointCentroid3D(cp,Xp,nr_points);
+    }
+    else
+    {
+        db_Zero3(c);
+        db_Zero3(cp);
+    }
+
+    db_Zero9(M);
+    s=sp=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp=   *X++;
+        temp_p= *Xp++;
+        r[0]=(*temp++)-c[0];
+        r[1]=(*temp++)-c[1];
+        r[2]=(*temp++)-c[2];
+        rp[0]=(*temp_p++)-cp[0];
+        rp[1]=(*temp_p++)-cp[1];
+        rp[2]=(*temp_p++)-cp[2];
+
+        M[0]+=r[0]*rp[0];
+        M[1]+=r[0]*rp[1];
+        M[2]+=r[0]*rp[2];
+        M[3]+=r[1]*rp[0];
+        M[4]+=r[1]*rp[1];
+        M[5]+=r[1]*rp[2];
+        M[6]+=r[2]*rp[0];
+        M[7]+=r[2]*rp[1];
+        M[8]+=r[2]*rp[2];
+
+        s+=db_sqr(r[0])+db_sqr(r[1])+db_sqr(r[2]);
+        sp+=db_sqr(rp[0])+db_sqr(rp[1])+db_sqr(rp[2]);
+    }
+
+    /*Compute scale*/
+    if(allow_scaling) sc=sqrt(db_SafeDivision(sp,s));
+    else sc=1.0;
+    *scale=sc;
+
+    /*Compute rotation*/
+    if(allow_rotation)
+    {
+        if(orientation_preserving)
+        {
+            db_RotationFromMOuterProductSum(R,0,M);
+        }
+        else
+        {
+            /*Try preserving*/
+            db_RotationFromMOuterProductSum(R,&score_p,M);
+            /*Try reversing*/
+            M[6]= -M[6];
+            M[7]= -M[7];
+            M[8]= -M[8];
+            db_RotationFromMOuterProductSum(Rr,&score_r,M);
+            if(score_r>score_p)
+            {
+                /*Reverse is better*/
+                R[0]=Rr[0]; R[1]=Rr[1]; R[2]= -Rr[2];
+                R[3]=Rr[3]; R[4]=Rr[4]; R[5]= -Rr[5];
+                R[6]=Rr[6]; R[7]=Rr[7]; R[8]= -Rr[8];
+            }
+        }
+    }
+    else db_Identity3x3(R);
+
+    /*Compute translation*/
+    if(allow_translation)
+    {
+        t[0]=cp[0]-sc*(R[0]*c[0]+R[1]*c[1]+R[2]*c[2]);
+        t[1]=cp[1]-sc*(R[3]*c[0]+R[4]*c[1]+R[5]*c[2]);
+        t[2]=cp[2]-sc*(R[6]*c[0]+R[7]*c[1]+R[8]*c[2]);
+    }
+    else db_Zero3(t);
+}
+
+
diff --git a/jni/feature_stab/db_vlvm/db_framestitching.h b/jni/feature_stab/db_vlvm/db_framestitching.h
new file mode 100644
index 000000000..5fef5f37e
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_framestitching.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_framestitching.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_FRAMESTITCHING_H
+#define DB_FRAMESTITCHING_H
+/*!
+ * \defgroup FrameStitching Frame Stitching (2D and 3D homography estimation)
+ */
+/*\{*/
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMFrameStitching (LM) Frame Stitching (2D and 3D homography estimation)
+ */
+/*\{*/
+
+/*!
+Find scale, rotation and translation of the similarity that
+takes the nr_points inhomogenous 3D points X to Xp
+(left to right according to Horn), i.e. for the homogenous equivalents
+Xp and X we would have
+\code
+    Xp~
+    [sR t]*X
+    [0  1]
+\endcode
+If orientation_preserving is true, R is restricted such that det(R)>0.
+allow_scaling, allow_rotation and allow_translation allow s,R and t
+to differ from 1,Identity and 0
+
+Full similarity takes the following on 550MHz:
+\code
+4.5 microseconds with       3 points
+4.7 microseconds with       4 points
+5.0 microseconds with       5 points
+5.2 microseconds with       6 points
+5.8 microseconds with      10 points
+20  microseconds with     100 points
+205 microseconds with    1000 points
+2.9 milliseconds with   10000 points
+50  milliseconds with  100000 points
+0.5 seconds      with 1000000 points
+\endcode
+Without orientation_preserving:
+\code
+4 points is minimal for (s,R,t) (R,t)
+3 points is minimal for (s,R) (R)
+2 points is minimal for (s,t)
+1 point is minimal for  (s) (t)
+\endcode
+With orientation_preserving:
+\code
+3 points is minimal for (s,R,t) (R,t)
+2 points is minimal for (s,R) (s,t) (R)
+1 point is minimal for  (s) (t)
+\endcode
+
+\param scale                    scale
+\param R                        rotation
+\param t                        translation
+\param Xp                       inhomogenouse 3D points in first coordinate system
+\param X                        inhomogenouse 3D points in second coordinate system
+\param nr_points                number of points
+\param orientation_preserving   if true, R is restricted such that det(R)>0.
+\param allow_scaling            estimate scale
+\param allow_rotation           estimate rotation
+\param allow_translation        estimate translation
+*/
+DB_API void db_StitchSimilarity3DRaw(double *scale,double R[9],double t[3],
+                            double **Xp,double **X,int nr_points,int orientation_preserving=1,
+                            int allow_scaling=1,int allow_rotation=1,int allow_translation=1);
+
+
+/*\}*/
+
+#endif /* DB_FRAMESTITCHING_H */
diff --git a/jni/feature_stab/db_vlvm/db_image_homography.cpp b/jni/feature_stab/db_vlvm/db_image_homography.cpp
new file mode 100644
index 000000000..aaad7f85e
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_image_homography.cpp
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_image_homography.cpp,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include "db_image_homography.h"
+#include "db_framestitching.h"
+#include "db_metrics.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+/*Compute the linear constraint on H obtained by requiring that the
+ratio between coordinate i_num and i_den of xp is equal to the ratio
+between coordinate i_num and i_den of Hx. i_zero should be set to
+the coordinate not equal to i_num or i_den. No normalization is used*/
+inline void db_SProjImagePointPointConstraint(double c[9],int i_num,int i_den,int i_zero,
+                           double xp[3],double x[3])
+{
+    db_MultiplyScalarCopy3(c+3*i_den,x,  xp[i_num]);
+    db_MultiplyScalarCopy3(c+3*i_num,x, -xp[i_den]);
+    db_Zero3(c+3*i_zero);
+}
+
+/*Compute two constraints on H generated by the correspondence (Xp,X),
+assuming that Xp ~= H*X. No normalization is used*/
+inline void db_SProjImagePointPointConstraints(double c1[9],double c2[9],double xp[3],double x[3])
+{
+    int ma_ind;
+
+    /*Find index of coordinate of Xp with largest absolute value*/
+    ma_ind=db_MaxAbsIndex3(xp);
+
+    /*Generate 2 constraints,
+    each constraint is generated by considering the ratio between a
+    coordinate and the largest absolute value coordinate*/
+    switch(ma_ind)
+    {
+    case 0:
+        db_SProjImagePointPointConstraint(c1,1,0,2,xp,x);
+        db_SProjImagePointPointConstraint(c2,2,0,1,xp,x);
+        break;
+    case 1:
+        db_SProjImagePointPointConstraint(c1,0,1,2,xp,x);
+        db_SProjImagePointPointConstraint(c2,2,1,0,xp,x);
+        break;
+    default:
+        db_SProjImagePointPointConstraint(c1,0,2,1,xp,x);
+        db_SProjImagePointPointConstraint(c2,1,2,0,xp,x);
+    }
+}
+
+inline void db_SAffineImagePointPointConstraints(double c1[7],double c2[7],double xp[3],double x[3])
+{
+    double ct1[9],ct2[9];
+
+    db_SProjImagePointPointConstraints(ct1,ct2,xp,x);
+    db_Copy6(c1,ct1); c1[6]=ct1[8];
+    db_Copy6(c2,ct2); c2[6]=ct2[8];
+}
+
+void db_StitchProjective2D_4Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],double x4[3],
+                                      double xp1[3],double xp2[3],double xp3[3],double xp4[3])
+{
+    double c[72];
+
+    /*Collect the constraints*/
+    db_SProjImagePointPointConstraints(c   ,c+9 ,xp1,x1);
+    db_SProjImagePointPointConstraints(c+18,c+27,xp2,x2);
+    db_SProjImagePointPointConstraints(c+36,c+45,xp3,x3);
+    db_SProjImagePointPointConstraints(c+54,c+63,xp4,x4);
+    /*Solve for the nullvector*/
+    db_NullVector8x9Destructive(H,c);
+}
+
+void db_StitchAffine2D_3Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],
+                                      double xp1[3],double xp2[3],double xp3[3])
+{
+    double c[42];
+
+    /*Collect the constraints*/
+    db_SAffineImagePointPointConstraints(c   ,c+7 ,xp1,x1);
+    db_SAffineImagePointPointConstraints(c+14,c+21,xp2,x2);
+    db_SAffineImagePointPointConstraints(c+28,c+35,xp3,x3);
+    /*Solve for the nullvector*/
+    db_NullVector6x7Destructive(H,c);
+    db_MultiplyScalar6(H,db_SafeReciprocal(H[6]));
+    H[6]=H[7]=0; H[8]=1.0;
+}
+
+/*Compute up to three solutions for the focal length given two point correspondences
+generated by a rotation with a common unknown focal length. No specific normalization
+of the input points is required. If signed_disambiguation is true, the points are
+required to be in front of the camera*/
+inline void db_CommonFocalLengthFromRotation_2Point(double fsol[3],int *nr_sols,double x1[3],double x2[3],double xp1[3],double xp2[3],int signed_disambiguation=1)
+{
+    double m,ax,ay,apx,apy,bx,by,bpx,bpy;
+    double p1[2],p2[2],p3[2],p4[2],p5[2],p6[2];
+    double p7[3],p8[4],p9[5],p10[3],p11[4];
+    double roots[3];
+    int nr_roots,i,j;
+
+    /*Solve for focal length using the equation
+    <a,b>^2*<ap,ap><bp,bp>=<ap,bp>^2*<a,a><b,b>
+    where a and ap are the homogenous vectors in the first image
+    after focal length scaling and b,bp are the vectors in the
+    second image*/
+
+    /*Normalize homogenous coordinates so that last coordinate is one*/
+    m=db_SafeReciprocal(x1[2]);
+    ax=x1[0]*m;
+    ay=x1[1]*m;
+    m=db_SafeReciprocal(xp1[2]);
+    apx=xp1[0]*m;
+    apy=xp1[1]*m;
+    m=db_SafeReciprocal(x2[2]);
+    bx=x2[0]*m;
+    by=x2[1]*m;
+    m=db_SafeReciprocal(xp2[2]);
+    bpx=xp2[0]*m;
+    bpy=xp2[1]*m;
+
+    /*Compute cubic in l=1/(f^2)
+    by dividing out the root l=0 from the equation
+    (l(ax*bx+ay*by)+1)^2*(l(apx^2+apy^2)+1)*(l(bpx^2+bpy^2)+1)=
+    (l(apx*bpx+apy*bpy)+1)^2*(l(ax^2+ay^2)+1)*(l(bx^2+by^2)+1)*/
+    p1[1]=ax*bx+ay*by;
+    p2[1]=db_sqr(apx)+db_sqr(apy);
+    p3[1]=db_sqr(bpx)+db_sqr(bpy);
+    p4[1]=apx*bpx+apy*bpy;
+    p5[1]=db_sqr(ax)+db_sqr(ay);
+    p6[1]=db_sqr(bx)+db_sqr(by);
+    p1[0]=p2[0]=p3[0]=p4[0]=p5[0]=p6[0]=1;
+
+    db_MultiplyPoly1_1(p7,p1,p1);
+    db_MultiplyPoly1_2(p8,p2,p7);
+    db_MultiplyPoly1_3(p9,p3,p8);
+
+    db_MultiplyPoly1_1(p10,p4,p4);
+    db_MultiplyPoly1_2(p11,p5,p10);
+    db_SubtractPolyProduct1_3(p9,p6,p11);
+    /*Cubic starts at p9[1]*/
+    db_SolveCubic(roots,&nr_roots,p9[4],p9[3],p9[2],p9[1]);
+
+    for(j=0,i=0;i<nr_roots;i++)
+    {
+        if(roots[i]>0)
+        {
+            if((!signed_disambiguation) || (db_PolyEval1(p1,roots[i])*db_PolyEval1(p4,roots[i])>0))
+            {
+                fsol[j++]=db_SafeSqrtReciprocal(roots[i]);
+            }
+        }
+    }
+    *nr_sols=j;
+}
+
+int db_StitchRotationCommonFocalLength_3Points(double H[9],double x1[3],double x2[3],double x3[3],double xp1[3],double xp2[3],double xp3[3],double *f,int signed_disambiguation)
+{
+    double fsol[3];
+    int nr_sols,i,best_sol,done;
+    double cost,best_cost;
+    double m,hyp[27],x1_temp[3],x2_temp[3],xp1_temp[3],xp2_temp[3];
+    double *hyp_point,ft;
+    double y[2];
+
+    db_CommonFocalLengthFromRotation_2Point(fsol,&nr_sols,x1,x2,xp1,xp2,signed_disambiguation);
+    if(nr_sols)
+    {
+        db_DeHomogenizeImagePoint(y,xp3);
+        done=0;
+        for(i=0;i<nr_sols;i++)
+        {
+            ft=fsol[i];
+            m=db_SafeReciprocal(ft);
+            x1_temp[0]=x1[0]*m;
+            x1_temp[1]=x1[1]*m;
+            x1_temp[2]=x1[2];
+            x2_temp[0]=x2[0]*m;
+            x2_temp[1]=x2[1]*m;
+            x2_temp[2]=x2[2];
+            xp1_temp[0]=xp1[0]*m;
+            xp1_temp[1]=xp1[1]*m;
+            xp1_temp[2]=xp1[2];
+            xp2_temp[0]=xp2[0]*m;
+            xp2_temp[1]=xp2[1]*m;
+            xp2_temp[2]=xp2[2];
+
+            hyp_point=hyp+9*i;
+            db_StitchCameraRotation_2Points(hyp_point,x1_temp,x2_temp,xp1_temp,xp2_temp);
+            hyp_point[2]*=ft;
+            hyp_point[5]*=ft;
+            hyp_point[6]*=m;
+            hyp_point[7]*=m;
+            cost=db_SquaredReprojectionErrorHomography(y,hyp_point,x3);
+
+            if(!done || cost<best_cost)
+            {
+                done=1;
+                best_cost=cost;
+                best_sol=i;
+            }
+        }
+
+        if(f) *f=fsol[best_sol];
+        db_Copy9(H,hyp+9*best_sol);
+        return(1);
+    }
+    else
+    {
+        db_Identity3x3(H);
+        if(f) *f=1.0;
+        return(0);
+    }
+}
+
+void db_StitchSimilarity2DRaw(double *scale,double R[4],double t[2],
+                            double **Xp,double **X,int nr_points,int orientation_preserving,
+                            int allow_scaling,int allow_rotation,int allow_translation)
+{
+    int i;
+    double c[2],cp[2],r[2],rp[2],M[4],s,sp,sc;
+    double *temp,*temp_p;
+    double Aacc,Bacc,Aacc2,Bacc2,divisor,divisor2,m,Am,Bm;
+
+    if(allow_translation)
+    {
+        db_PointCentroid2D(c,X,nr_points);
+        db_PointCentroid2D(cp,Xp,nr_points);
+    }
+    else
+    {
+        db_Zero2(c);
+        db_Zero2(cp);
+    }
+
+    db_Zero4(M);
+    s=sp=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp=   *X++;
+        temp_p= *Xp++;
+        r[0]=(*temp++)-c[0];
+        r[1]=(*temp++)-c[1];
+        rp[0]=(*temp_p++)-cp[0];
+        rp[1]=(*temp_p++)-cp[1];
+
+        M[0]+=r[0]*rp[0];
+        M[1]+=r[0]*rp[1];
+        M[2]+=r[1]*rp[0];
+        M[3]+=r[1]*rp[1];
+
+        s+=db_sqr(r[0])+db_sqr(r[1]);
+        sp+=db_sqr(rp[0])+db_sqr(rp[1]);
+    }
+
+    /*Compute scale*/
+    if(allow_scaling) sc=sqrt(db_SafeDivision(sp,s));
+    else sc=1.0;
+    *scale=sc;
+
+    /*Compute rotation*/
+    if(allow_rotation)
+    {
+        /*orientation preserving*/
+        Aacc=M[0]+M[3];
+        Bacc=M[2]-M[1];
+        /*orientation reversing*/
+        Aacc2=M[0]-M[3];
+        Bacc2=M[2]+M[1];
+        if(Aacc!=0.0 || Bacc!=0.0)
+        {
+            divisor=sqrt(Aacc*Aacc+Bacc*Bacc);
+            m=db_SafeReciprocal(divisor);
+            Am=Aacc*m;
+            Bm=Bacc*m;
+            R[0]=  Am;
+            R[1]=  Bm;
+            R[2]= -Bm;
+            R[3]=  Am;
+        }
+        else
+        {
+            db_Identity2x2(R);
+            divisor=0.0;
+        }
+        if(!orientation_preserving && (Aacc2!=0.0 || Bacc2!=0.0))
+        {
+            divisor2=sqrt(Aacc2*Aacc2+Bacc2*Bacc2);
+            if(divisor2>divisor)
+            {
+                m=db_SafeReciprocal(divisor2);
+                Am=Aacc2*m;
+                Bm=Bacc2*m;
+                R[0]=  Am;
+                R[1]=  Bm;
+                R[2]=  Bm;
+                R[3]= -Am;
+            }
+        }
+    }
+    else db_Identity2x2(R);
+
+    /*Compute translation*/
+    if(allow_translation)
+    {
+        t[0]=cp[0]-sc*(R[0]*c[0]+R[1]*c[1]);
+        t[1]=cp[1]-sc*(R[2]*c[0]+R[3]*c[1]);
+    }
+    else db_Zero2(t);
+}
+
+
diff --git a/jni/feature_stab/db_vlvm/db_image_homography.h b/jni/feature_stab/db_vlvm/db_image_homography.h
new file mode 100644
index 000000000..165447dd7
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_image_homography.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_image_homography.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_IMAGE_HOMOGRAPHY
+#define DB_IMAGE_HOMOGRAPHY
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_framestitching.h"
+/*!
+ * \defgroup LMImageHomography (LM) Image Homography Estimation (feature based)
+ */
+/*\{*/
+/*!
+Solve for projective H such that xp~Hx. Prior normalization is not necessary,
+although desirable for numerical conditioning
+\param H    image projective (out)
+\param x1   image 1 point 1
+\param x2   image 1 point 2
+\param x3   image 1 point 3
+\param x4   image 1 point 4
+\param xp1  image 2 point 1
+\param xp2  image 2 point 2
+\param xp3  image 2 point 3
+\param xp4  image 2 point 4
+*/
+DB_API void db_StitchProjective2D_4Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],double x4[3],
+                                      double xp1[3],double xp2[3],double xp3[3],double xp4[3]);
+
+/*!
+Solve for affine H such that xp~Hx. Prior normalization is not necessary,
+although desirable for numerical conditioning
+\param H    image projective (out)
+\param x1   image 1 point 1
+\param x2   image 1 point 2
+\param x3   image 1 point 3
+\param xp1  image 2 point 1
+\param xp2  image 2 point 2
+\param xp3  image 2 point 3
+*/
+DB_API void db_StitchAffine2D_3Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],
+                                      double xp1[3],double xp2[3],double xp3[3]);
+
+/*!
+Solve for rotation R such that xp~Rx.
+Image points have to be of unit norm for the least squares to be meaningful.
+\param R    image rotation (out)
+\param x1   image 1 point 1
+\param x2   image 1 point 2
+\param xp1  image 2 point 1
+\param xp2  image 2 point 2
+*/
+inline void db_StitchCameraRotation_2Points(double R[9],
+                                            /*Image points have to be of unit norm
+                                            for the least squares to be meaningful*/
+                                            double x1[3],double x2[3],
+                                            double xp1[3],double xp2[3])
+{
+    double* x[2];
+    double* xp[2];
+    double scale,t[3];
+
+    x[0]=x1;
+    x[1]=x2;
+    xp[0]=xp1;
+    xp[1]=xp2;
+    db_StitchSimilarity3DRaw(&scale,R,t,xp,x,2,1,0,1,0);
+}
+
+/*!
+Solve for a homography H generated by a rotation R with a common unknown focal length f, i.e.
+H=diag(f,f,1)*R*diag(1/f,1/f,1) such that xp~Hx.
+If signed_disambiguation is true, the points are
+required to be in front of the camera. No specific normalization of the homogenous points
+is required, although it could be desirable to keep x1,x2,xp1 and xp2 of reasonable magnitude.
+If a solution is obtained the function returns 1, otherwise 0. If the focal length is desired
+a valid pointer should be passed in f
+*/
+DB_API int db_StitchRotationCommonFocalLength_3Points(double H[9],double x1[3],double x2[3],double x3[3],
+                                                      double xp1[3],double xp2[3],double xp3[3],double *f=0,int signed_disambiguation=1);
+
+/*!
+Find scale, rotation and translation of the similarity that
+takes the nr_points inhomogenous 2D points X to Xp,
+i.e. for the homogenous equivalents
+Xp and X we would have
+\code
+Xp~
+[sR t]*X
+[0  1]
+\endcode
+If orientation_preserving is true, R is restricted such that det(R)>0.
+allow_scaling, allow_rotation and allow_translation allow s,R and t
+to differ from 1,Identity and 0
+
+Full similarity takes the following on 550MHz:
+\code
+0.9 microseconds with       2 points
+1.0 microseconds with       3 points
+1.1 microseconds with       4 points
+1.3 microseconds with       5 points
+1.4 microseconds with       6 points
+1.7 microseconds with      10 points
+9   microseconds with     100 points
+130 microseconds with    1000 points
+1.3 milliseconds with   10000 points
+35  milliseconds with  100000 points
+350 milliseconds with 1000000 points
+\endcode
+
+Without orientation_preserving:
+\code
+3 points is minimal for (s,R,t) (R,t)
+2 points is minimal for (s,t) (s,R) (R)
+1 point is minimal for  (s) (t)
+\endcode
+
+With orientation_preserving:
+\code
+2 points is minimal for (s,R,t) (R,t) (s,t)
+1 point is minimal for (s,R) (R) (s) (t)
+\endcode
+\param scale        (out)
+\param R            2D rotation (out)
+\param t            2D translation (out)
+\param Xp           (nr_points x 2) pointer to array of image points
+\param X            (nr_points x 2 ) pointer to array of image points
+\param nr_points    number of points
+\param orientation_preserving
+\param allow_scaling    compute scale (if 0, scale=1)
+\param allow_rotation   compute rotation (if 0, R=[I])
+\param allow_translation compute translation (if 0 t = [0,0]')
+*/
+DB_API void db_StitchSimilarity2DRaw(double *scale,double R[4],double t[2],
+                            double **Xp,double **X,int nr_points,int orientation_preserving=1,
+                            int allow_scaling=1,int allow_rotation=1,int allow_translation=1);
+/*!
+See db_StitchRotationCommonFocalLength_3Points().
+\param H            Image similarity transformation (out)
+\param Xp           (nr_points x 2) pointer to array of image points
+\param X            (nr_points x 2) pointer to array of image points
+\param nr_points    number of points
+\param orientation_preserving
+\param allow_scaling    compute scale (if 0, scale=1)
+\param allow_rotation   compute rotation (if 0, R=[I])
+\param allow_translation compute translation (if 0 t = [0,0]')
+*/
+inline void db_StitchSimilarity2D(double H[9],double **Xp,double **X,int nr_points,int orientation_preserving=1,
+                                  int allow_scaling=1,int allow_rotation=1,int allow_translation=1)
+{
+    double s,R[4],t[2];
+
+    db_StitchSimilarity2DRaw(&s,R,t,Xp,X,nr_points,orientation_preserving,
+        allow_scaling,allow_rotation,allow_translation);
+
+    H[0]=s*R[0]; H[1]=s*R[1]; H[2]=t[0];
+    H[3]=s*R[2]; H[4]=s*R[3]; H[5]=t[1];
+    db_Zero2(H+6);
+    H[8]=1.0;
+}
+/*\}*/
+#endif /* DB_IMAGE_HOMOGRAPHY */
diff --git a/jni/feature_stab/db_vlvm/db_metrics.h b/jni/feature_stab/db_vlvm/db_metrics.h
new file mode 100644
index 000000000..6b95458f1
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_metrics.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_metrics.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_METRICS
+#define DB_METRICS
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+/*!
+ * \defgroup LMMetrics (LM) Metrics
+ */
+/*\{*/
+
+
+
+
+/*!
+Compute function value fp and Jacobian J of robustifier given input value f*/
+inline void db_CauchyDerivative(double J[4],double fp[2],const double f[2],double one_over_scale2)
+{
+    double x2,y2,r,r2,r2s,one_over_r2,fu,r_fu,one_over_r_fu;
+    double one_plus_r2s,half_dfu_dx,half_dfu_dy,coeff,coeff2,coeff3;
+    int at_zero;
+
+    /*The robustifier takes the input (x,y) and makes a new
+    vector (xp,yp) where
+    xp=sqrt(log(1+(x^2+y^2)*one_over_scale2))*x/sqrt(x^2+y^2)
+    yp=sqrt(log(1+(x^2+y^2)*one_over_scale2))*y/sqrt(x^2+y^2)
+    The new vector has the property
+    xp^2+yp^2=log(1+(x^2+y^2)*one_over_scale2)
+    i.e. when it is square-summed it gives the robust
+    reprojection error
+    Define
+    r2=(x^2+y^2) and
+    r2s=r2*one_over_scale2
+    fu=log(1+r2s)/r2
+    then
+    xp=sqrt(fu)*x
+    yp=sqrt(fu)*y
+    and
+    d(r2)/dx=2x
+    d(r2)/dy=2y
+    and
+    dfu/dx=d(r2)/dx*(r2s/(1+r2s)-log(1+r2s))/(r2*r2)
+    dfu/dy=d(r2)/dy*(r2s/(1+r2s)-log(1+r2s))/(r2*r2)
+    and
+    d(xp)/dx=1/(2sqrt(fu))*(dfu/dx)*x+sqrt(fu)
+    d(xp)/dy=1/(2sqrt(fu))*(dfu/dy)*x
+    d(yp)/dx=1/(2sqrt(fu))*(dfu/dx)*y
+    d(yp)/dy=1/(2sqrt(fu))*(dfu/dy)*y+sqrt(fu)
+    */
+
+    x2=db_sqr(f[0]);
+    y2=db_sqr(f[1]);
+    r2=x2+y2;
+    r=sqrt(r2);
+
+    if(r2<=0.0) at_zero=1;
+    else
+    {
+        one_over_r2=1.0/r2;
+        r2s=r2*one_over_scale2;
+        one_plus_r2s=1.0+r2s;
+        fu=log(one_plus_r2s)*one_over_r2;
+        r_fu=sqrt(fu);
+        if(r_fu<=0.0) at_zero=1;
+        else
+        {
+            one_over_r_fu=1.0/r_fu;
+            fp[0]=r_fu*f[0];
+            fp[1]=r_fu*f[1];
+            /*r2s is always >= 0*/
+            coeff=(r2s/one_plus_r2s*one_over_r2-fu)*one_over_r2;
+            half_dfu_dx=f[0]*coeff;
+            half_dfu_dy=f[1]*coeff;
+            coeff2=one_over_r_fu*half_dfu_dx;
+            coeff3=one_over_r_fu*half_dfu_dy;
+
+            J[0]=coeff2*f[0]+r_fu;
+            J[1]=coeff3*f[0];
+            J[2]=coeff2*f[1];
+            J[3]=coeff3*f[1]+r_fu;
+            at_zero=0;
+        }
+    }
+    if(at_zero)
+    {
+        /*Close to zero the robustifying mapping
+        becomes identity*sqrt(one_over_scale2)*/
+        fp[0]=0.0;
+        fp[1]=0.0;
+        J[0]=sqrt(one_over_scale2);
+        J[1]=0.0;
+        J[2]=0.0;
+        J[3]=J[0];
+    }
+}
+
+inline double db_SquaredReprojectionErrorHomography(const double y[2],const double H[9],const double x[3])
+{
+    double x0,x1,x2,mult;
+    double sd;
+
+    x0=H[0]*x[0]+H[1]*x[1]+H[2]*x[2];
+    x1=H[3]*x[0]+H[4]*x[1]+H[5]*x[2];
+    x2=H[6]*x[0]+H[7]*x[1]+H[8]*x[2];
+    mult=1.0/((x2!=0.0)?x2:1.0);
+    sd=db_sqr((y[0]-x0*mult))+db_sqr((y[1]-x1*mult));
+
+    return(sd);
+}
+
+inline double db_SquaredInhomogenousHomographyError(const double y[2],const double H[9],const double x[2])
+{
+    double x0,x1,x2,mult;
+    double sd;
+
+    x0=H[0]*x[0]+H[1]*x[1]+H[2];
+    x1=H[3]*x[0]+H[4]*x[1]+H[5];
+    x2=H[6]*x[0]+H[7]*x[1]+H[8];
+    mult=1.0/((x2!=0.0)?x2:1.0);
+    sd=db_sqr((y[0]-x0*mult))+db_sqr((y[1]-x1*mult));
+
+    return(sd);
+}
+
+/*!
+Return a constant divided by likelihood of a Cauchy distributed
+reprojection error given the image point y, homography H, image point
+point x and the squared scale coefficient one_over_scale2=1.0/(scale*scale)
+where scale is the half width at half maximum (hWahM) of the
+Cauchy distribution*/
+inline double db_ExpCauchyInhomogenousHomographyError(const double y[2],const double H[9],const double x[2],
+                                                      double one_over_scale2)
+{
+    double sd;
+    sd=db_SquaredInhomogenousHomographyError(y,H,x);
+    return(1.0+sd*one_over_scale2);
+}
+
+/*!
+Compute residual vector f between image point y and homography Hx of
+image point x. Also compute Jacobian of f with respect
+to an update dx of H*/
+inline void db_DerivativeInhomHomographyError(double Jf_dx[18],double f[2],const double y[2],const double H[9],
+                                              const double x[2])
+{
+    double xh,yh,zh,mult,mult2,xh_mult2,yh_mult2;
+    /*The Jacobian of the inhomogenous coordinates with respect to
+    the homogenous is
+    [1/zh  0  -xh/(zh*zh)]
+    [ 0  1/zh -yh/(zh*zh)]
+    The Jacobian of the homogenous coordinates with respect to dH is
+    [x0 x1 1  0  0 0  0  0 0]
+    [ 0  0 0 x0 x1 1  0  0 0]
+    [ 0  0 0  0  0 0 x0 x1 1]
+    The output Jacobian is minus their product, i.e.
+    [-x0/zh -x1/zh -1/zh    0      0     0    x0*xh/(zh*zh) x1*xh/(zh*zh) xh/(zh*zh)]
+    [   0      0     0   -x0/zh -x1/zh -1/zh  x0*yh/(zh*zh) x1*yh/(zh*zh) yh/(zh*zh)]*/
+
+    /*Compute warped point, which is the same as
+    homogenous coordinates of reprojection*/
+    xh=H[0]*x[0]+H[1]*x[1]+H[2];
+    yh=H[3]*x[0]+H[4]*x[1]+H[5];
+    zh=H[6]*x[0]+H[7]*x[1]+H[8];
+    mult=1.0/((zh!=0.0)?zh:1.0);
+    /*Compute inhomogenous residual*/
+    f[0]=y[0]-xh*mult;
+    f[1]=y[1]-yh*mult;
+    /*Compute Jacobian*/
+    mult2=mult*mult;
+    xh_mult2=xh*mult2;
+    yh_mult2=yh*mult2;
+    Jf_dx[0]= -x[0]*mult;
+    Jf_dx[1]= -x[1]*mult;
+    Jf_dx[2]= -mult;
+    Jf_dx[3]=0;
+    Jf_dx[4]=0;
+    Jf_dx[5]=0;
+    Jf_dx[6]=x[0]*xh_mult2;
+    Jf_dx[7]=x[1]*xh_mult2;
+    Jf_dx[8]=xh_mult2;
+    Jf_dx[9]=0;
+    Jf_dx[10]=0;
+    Jf_dx[11]=0;
+    Jf_dx[12]=Jf_dx[0];
+    Jf_dx[13]=Jf_dx[1];
+    Jf_dx[14]=Jf_dx[2];
+    Jf_dx[15]=x[0]*yh_mult2;
+    Jf_dx[16]=x[1]*yh_mult2;
+    Jf_dx[17]=yh_mult2;
+}
+
+/*!
+Compute robust residual vector f between image point y and homography Hx of
+image point x. Also compute Jacobian of f with respect
+to an update dH of H*/
+inline void db_DerivativeCauchyInhomHomographyReprojection(double Jf_dx[18],double f[2],const double y[2],const double H[9],
+                                                           const double x[2],double one_over_scale2)
+{
+    double Jf_dx_loc[18],f_loc[2];
+    double J[4],J0,J1,J2,J3;
+
+    /*Compute reprojection Jacobian*/
+    db_DerivativeInhomHomographyError(Jf_dx_loc,f_loc,y,H,x);
+    /*Compute robustifier Jacobian*/
+    db_CauchyDerivative(J,f,f_loc,one_over_scale2);
+
+    /*Multiply the robustifier Jacobian with
+    the reprojection Jacobian*/
+    J0=J[0];J1=J[1];J2=J[2];J3=J[3];
+    Jf_dx[0]=J0*Jf_dx_loc[0];
+    Jf_dx[1]=J0*Jf_dx_loc[1];
+    Jf_dx[2]=J0*Jf_dx_loc[2];
+    Jf_dx[3]=                J1*Jf_dx_loc[12];
+    Jf_dx[4]=                J1*Jf_dx_loc[13];
+    Jf_dx[5]=                J1*Jf_dx_loc[14];
+    Jf_dx[6]=J0*Jf_dx_loc[6]+J1*Jf_dx_loc[15];
+    Jf_dx[7]=J0*Jf_dx_loc[7]+J1*Jf_dx_loc[16];
+    Jf_dx[8]=J0*Jf_dx_loc[8]+J1*Jf_dx_loc[17];
+    Jf_dx[9]= J2*Jf_dx_loc[0];
+    Jf_dx[10]=J2*Jf_dx_loc[1];
+    Jf_dx[11]=J2*Jf_dx_loc[2];
+    Jf_dx[12]=                J3*Jf_dx_loc[12];
+    Jf_dx[13]=                J3*Jf_dx_loc[13];
+    Jf_dx[14]=                J3*Jf_dx_loc[14];
+    Jf_dx[15]=J2*Jf_dx_loc[6]+J3*Jf_dx_loc[15];
+    Jf_dx[16]=J2*Jf_dx_loc[7]+J3*Jf_dx_loc[16];
+    Jf_dx[17]=J2*Jf_dx_loc[8]+J3*Jf_dx_loc[17];
+}
+/*!
+Compute residual vector f between image point y and rotation of
+image point x by R. Also compute Jacobian of f with respect
+to an update dx of R*/
+inline void db_DerivativeInhomRotationReprojection(double Jf_dx[6],double f[2],const double y[2],const double R[9],
+                                                   const double x[2])
+{
+    double xh,yh,zh,mult,mult2,xh_mult2,yh_mult2;
+    /*The Jacobian of the inhomogenous coordinates with respect to
+    the homogenous is
+    [1/zh  0  -xh/(zh*zh)]
+    [ 0  1/zh -yh/(zh*zh)]
+    The Jacobian at zero of the homogenous coordinates with respect to
+    [sin(phi) sin(ohm) sin(kap)] is
+    [-rx2   0   rx1 ]
+    [  0   rx2 -rx0 ]
+    [ rx0 -rx1   0  ]
+    The output Jacobian is minus their product, i.e.
+    [1+xh*xh/(zh*zh) -xh*yh/(zh*zh)   -yh/zh]
+    [xh*yh/(zh*zh)   -1-yh*yh/(zh*zh)  xh/zh]*/
+
+    /*Compute rotated point, which is the same as
+    homogenous coordinates of reprojection*/
+    xh=R[0]*x[0]+R[1]*x[1]+R[2];
+    yh=R[3]*x[0]+R[4]*x[1]+R[5];
+    zh=R[6]*x[0]+R[7]*x[1]+R[8];
+    mult=1.0/((zh!=0.0)?zh:1.0);
+    /*Compute inhomogenous residual*/
+    f[0]=y[0]-xh*mult;
+    f[1]=y[1]-yh*mult;
+    /*Compute Jacobian*/
+    mult2=mult*mult;
+    xh_mult2=xh*mult2;
+    yh_mult2=yh*mult2;
+    Jf_dx[0]= 1.0+xh*xh_mult2;
+    Jf_dx[1]= -yh*xh_mult2;
+    Jf_dx[2]= -yh*mult;
+    Jf_dx[3]= -Jf_dx[1];
+    Jf_dx[4]= -1-yh*yh_mult2;
+    Jf_dx[5]= xh*mult;
+}
+
+/*!
+Compute robust residual vector f between image point y and rotation of
+image point x by R. Also compute Jacobian of f with respect
+to an update dx of R*/
+inline void db_DerivativeCauchyInhomRotationReprojection(double Jf_dx[6],double f[2],const double y[2],const double R[9],
+                                                         const double x[2],double one_over_scale2)
+{
+    double Jf_dx_loc[6],f_loc[2];
+    double J[4],J0,J1,J2,J3;
+
+    /*Compute reprojection Jacobian*/
+    db_DerivativeInhomRotationReprojection(Jf_dx_loc,f_loc,y,R,x);
+    /*Compute robustifier Jacobian*/
+    db_CauchyDerivative(J,f,f_loc,one_over_scale2);
+
+    /*Multiply the robustifier Jacobian with
+    the reprojection Jacobian*/
+    J0=J[0];J1=J[1];J2=J[2];J3=J[3];
+    Jf_dx[0]=J0*Jf_dx_loc[0]+J1*Jf_dx_loc[3];
+    Jf_dx[1]=J0*Jf_dx_loc[1]+J1*Jf_dx_loc[4];
+    Jf_dx[2]=J0*Jf_dx_loc[2]+J1*Jf_dx_loc[5];
+    Jf_dx[3]=J2*Jf_dx_loc[0]+J3*Jf_dx_loc[3];
+    Jf_dx[4]=J2*Jf_dx_loc[1]+J3*Jf_dx_loc[4];
+    Jf_dx[5]=J2*Jf_dx_loc[2]+J3*Jf_dx_loc[5];
+}
+
+
+
+/*!
+// remove the outliers whose projection error is larger than pre-defined
+*/
+inline int db_RemoveOutliers_Homography(const double H[9], double *x_i,double *xp_i, double *wp,double *im, double *im_p, double *im_r, double *im_raw,double *im_raw_p,int point_count,double scale, double thresh=DB_OUTLIER_THRESHOLD)
+{
+    double temp_valueE, t2;
+    int c;
+    int k1=0;
+    int k2=0;
+    int k3=0;
+    int numinliers=0;
+    int ind1;
+    int ind2;
+    int ind3;
+    int isinlier;
+
+    // experimentally determined
+    t2=1.0/(thresh*thresh*thresh*thresh);
+
+    // count the inliers
+    for(c=0;c<point_count;c++)
+    {
+        ind1=c<<1;
+        ind2=c<<2;
+        ind3=3*c;
+
+        temp_valueE=db_SquaredInhomogenousHomographyError(im_p+ind3,H,im+ind3);
+
+        isinlier=((temp_valueE<=t2)?1:0);
+
+        // if it is inlier, then copy the 3d and 2d correspondences
+        if (isinlier)
+        {
+            numinliers++;
+
+            x_i[k1]=x_i[ind1];
+            x_i[k1+1]=x_i[ind1+1];
+
+            xp_i[k1]=xp_i[ind1];
+            xp_i[k1+1]=xp_i[ind1+1];
+
+            k1=k1+2;
+
+            // original normalized pixel coordinates
+            im[k3]=im[ind3];
+            im[k3+1]=im[ind3+1];
+            im[k3+2]=im[ind3+2];
+
+            im_r[k3]=im_r[ind3];
+            im_r[k3+1]=im_r[ind3+1];
+            im_r[k3+2]=im_r[ind3+2];
+
+            im_p[k3]=im_p[ind3];
+            im_p[k3+1]=im_p[ind3+1];
+            im_p[k3+2]=im_p[ind3+2];
+
+            // left and right raw pixel coordinates
+            im_raw[k3] = im_raw[ind3];
+            im_raw[k3+1] = im_raw[ind3+1];
+            im_raw[k3+2] = im_raw[ind3+2]; // the index
+
+            im_raw_p[k3] = im_raw_p[ind3];
+            im_raw_p[k3+1] = im_raw_p[ind3+1];
+            im_raw_p[k3+2] = im_raw_p[ind3+2]; // the index
+
+            k3=k3+3;
+
+            // 3D coordinates
+            wp[k2]=wp[ind2];
+            wp[k2+1]=wp[ind2+1];
+            wp[k2+2]=wp[ind2+2];
+            wp[k2+3]=wp[ind2+3];
+
+            k2=k2+4;
+
+        }
+    }
+
+    return numinliers;
+}
+
+
+
+
+
+/*\}*/
+
+#endif /* DB_METRICS */
diff --git a/jni/feature_stab/db_vlvm/db_rob_image_homography.cpp b/jni/feature_stab/db_vlvm/db_rob_image_homography.cpp
new file mode 100644
index 000000000..82dec0cbe
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_rob_image_homography.cpp
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_rob_image_homography.cpp,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include "db_rob_image_homography.h"
+#include "db_bundle.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_image_homography.h"
+
+#ifdef _VERBOSE_
+#include <iostream>
+using namespace std;
+#endif /*VERBOSE*/
+
+inline double db_RobImageHomography_Cost(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2)
+{
+    int c;
+    double back,acc,*x_i_temp,*xp_i_temp;
+
+    for(back=0.0,c=0;c<point_count;)
+    {
+        /*Take log of product of ten reprojection
+        errors to reduce nr of expensive log operations*/
+        if(c+9<point_count)
+        {
+            x_i_temp=x_i+(c<<1);
+            xp_i_temp=xp_i+(c<<1);
+
+            acc=db_ExpCauchyInhomogenousHomographyError(xp_i_temp,H,x_i_temp,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+2,H,x_i_temp+2,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+4,H,x_i_temp+4,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+6,H,x_i_temp+6,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+8,H,x_i_temp+8,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+10,H,x_i_temp+10,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+12,H,x_i_temp+12,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+14,H,x_i_temp+14,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+16,H,x_i_temp+16,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+18,H,x_i_temp+18,one_over_scale2);
+            c+=10;
+        }
+        else
+        {
+            for(acc=1.0;c<point_count;c++)
+            {
+                acc*=db_ExpCauchyInhomogenousHomographyError(xp_i+(c<<1),H,x_i+(c<<1),one_over_scale2);
+            }
+        }
+        back+=log(acc);
+    }
+    return(back);
+}
+
+inline double db_RobImageHomography_Statistics(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2,db_Statistics *stat,double thresh=DB_OUTLIER_THRESHOLD)
+{
+    int c,i;
+    double t2,frac;
+
+    t2=thresh*thresh;
+    for(i=0,c=0;c<point_count;c++)
+    {
+        i+=(db_SquaredInhomogenousHomographyError(xp_i+(c<<1),H,x_i+(c<<1))*one_over_scale2<=t2)?1:0;
+    }
+    frac=((double)i)/((double)(db_maxi(point_count,1)));
+
+#ifdef _VERBOSE_
+    std::cout << "Inlier Percentage RobImageHomography: " << frac*100.0 << "% out of " << point_count << " constraints" << std::endl;
+#endif /*_VERBOSE_*/
+
+    if(stat)
+    {
+        stat->nr_points=point_count;
+        stat->one_over_scale2=one_over_scale2;
+        stat->nr_inliers=i;
+        stat->inlier_fraction=frac;
+
+        stat->cost=db_RobImageHomography_Cost(H,point_count,x_i,xp_i,one_over_scale2);
+        stat->model_dimension=0;
+        /*stat->nr_parameters=;*/
+
+        stat->lambda1=log(4.0);
+        stat->lambda2=log(4.0*((double)db_maxi(1,stat->nr_points)));
+        stat->lambda3=10.0;
+        stat->gric=stat->cost+stat->lambda1*stat->model_dimension*((double)stat->nr_points)+stat->lambda2*((double)stat->nr_parameters);
+        stat->inlier_evidence=((double)stat->nr_inliers)-stat->lambda3*((double)stat->nr_parameters);
+    }
+
+    return(frac);
+}
+
+/*Compute min_Jtf and upper right of JtJ. Return cost.*/
+inline double db_RobImageHomography_Jacobians(double JtJ[81],double min_Jtf[9],double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2)
+{
+    double back,Jf_dx[18],f[2],temp,temp2;
+    int i;
+
+    db_Zero(JtJ,81);
+    db_Zero(min_Jtf,9);
+    for(back=0.0,i=0;i<point_count;i++)
+    {
+        /*Compute reprojection error vector and its Jacobian
+        for this point*/
+        db_DerivativeCauchyInhomHomographyReprojection(Jf_dx,f,xp_i+(i<<1),H,x_i+(i<<1),one_over_scale2);
+        /*Perform
+        min_Jtf-=Jf_dx*f[0] and
+        min_Jtf-=(Jf_dx+9)*f[1] to accumulate -Jt%f*/
+        db_RowOperation9(min_Jtf,Jf_dx,f[0]);
+        db_RowOperation9(min_Jtf,Jf_dx+9,f[1]);
+        /*Accumulate upper right of JtJ with outer product*/
+        temp=Jf_dx[0]; temp2=Jf_dx[9];
+        JtJ[0]+=temp*Jf_dx[0]+temp2*Jf_dx[9];
+        JtJ[1]+=temp*Jf_dx[1]+temp2*Jf_dx[10];
+        JtJ[2]+=temp*Jf_dx[2]+temp2*Jf_dx[11];
+        JtJ[3]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[4]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[5]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[6]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[7]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[8]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[1]; temp2=Jf_dx[10];
+        JtJ[10]+=temp*Jf_dx[1]+temp2*Jf_dx[10];
+        JtJ[11]+=temp*Jf_dx[2]+temp2*Jf_dx[11];
+        JtJ[12]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[13]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[14]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[15]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[16]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[17]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[2]; temp2=Jf_dx[11];
+        JtJ[20]+=temp*Jf_dx[2]+temp2*Jf_dx[11];
+        JtJ[21]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[22]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[23]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[24]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[25]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[26]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[3]; temp2=Jf_dx[12];
+        JtJ[30]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[31]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[32]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[33]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[34]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[35]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[4]; temp2=Jf_dx[13];
+        JtJ[40]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[41]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[42]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[43]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[44]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[5]; temp2=Jf_dx[14];
+        JtJ[50]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[51]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[52]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[53]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[6]; temp2=Jf_dx[15];
+        JtJ[60]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[61]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[62]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[7]; temp2=Jf_dx[16];
+        JtJ[70]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[71]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[8]; temp2=Jf_dx[17];
+        JtJ[80]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+
+        /*Add square-sum to cost*/
+        back+=db_sqr(f[0])+db_sqr(f[1]);
+    }
+
+    return(back);
+}
+
+/*Compute min_Jtf and upper right of JtJ. Return cost*/
+inline double db_RobCamRotation_Jacobians(double JtJ[9],double min_Jtf[3],double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2)
+{
+    double back,Jf_dx[6],f[2];
+    int i,j;
+
+    db_Zero(JtJ,9);
+    db_Zero(min_Jtf,3);
+    for(back=0.0,i=0;i<point_count;i++)
+    {
+        /*Compute reprojection error vector and its Jacobian
+        for this point*/
+        j=(i<<1);
+        db_DerivativeCauchyInhomRotationReprojection(Jf_dx,f,xp_i+j,H,x_i+j,one_over_scale2);
+        /*Perform
+        min_Jtf-=Jf_dx*f[0] and
+        min_Jtf-=(Jf_dx+3)*f[1] to accumulate -Jt%f*/
+        db_RowOperation3(min_Jtf,Jf_dx,f[0]);
+        db_RowOperation3(min_Jtf,Jf_dx+3,f[1]);
+        /*Accumulate upper right of JtJ with outer product*/
+        JtJ[0]+=Jf_dx[0]*Jf_dx[0]+Jf_dx[3]*Jf_dx[3];
+        JtJ[1]+=Jf_dx[0]*Jf_dx[1]+Jf_dx[3]*Jf_dx[4];
+        JtJ[2]+=Jf_dx[0]*Jf_dx[2]+Jf_dx[3]*Jf_dx[5];
+        JtJ[4]+=Jf_dx[1]*Jf_dx[1]+Jf_dx[4]*Jf_dx[4];
+        JtJ[5]+=Jf_dx[1]*Jf_dx[2]+Jf_dx[4]*Jf_dx[5];
+        JtJ[8]+=Jf_dx[2]*Jf_dx[2]+Jf_dx[5]*Jf_dx[5];
+
+        /*Add square-sum to cost*/
+        back+=db_sqr(f[0])+db_sqr(f[1]);
+    }
+
+    return(back);
+}
+
+void db_RobCamRotation_Polish(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2,
+                               int max_iterations,double improvement_requirement)
+{
+    int i,update,stop;
+    double lambda,cost,current_cost;
+    double JtJ[9],min_Jtf[3],dx[3],H_p_dx[9];
+
+    lambda=0.001;
+    for(update=1,stop=0,i=0;(stop<2) && (i<max_iterations);i++)
+    {
+        /*if first time since improvement, compute Jacobian and residual*/
+        if(update)
+        {
+            current_cost=db_RobCamRotation_Jacobians(JtJ,min_Jtf,H,point_count,x_i,xp_i,one_over_scale2);
+            update=0;
+        }
+
+#ifdef _VERBOSE_
+        /*std::cout << "Cost:" << current_cost << " ";*/
+#endif /*_VERBOSE_*/
+
+        /*Come up with a hypothesis dx
+        based on the current lambda*/
+        db_Compute_dx_3x3(dx,JtJ,min_Jtf,lambda);
+
+        /*Compute Cost(x+dx)*/
+        db_UpdateRotation(H_p_dx,H,dx);
+        cost=db_RobImageHomography_Cost(H_p_dx,point_count,x_i,xp_i,one_over_scale2);
+
+        /*Is there an improvement?*/
+        if(cost<current_cost)
+        {
+            /*improvement*/
+            if(current_cost-cost<current_cost*improvement_requirement) stop++;
+            else stop=0;
+            lambda*=0.1;
+            /*Move to the hypothesised position x+dx*/
+            current_cost=cost;
+            db_Copy9(H,H_p_dx);
+            db_OrthonormalizeRotation(H);
+            update=1;
+
+#ifdef _VERBOSE_
+        std::cout << "Step" << i << "Imp,Lambda=" << lambda << "Cost:" << current_cost << std::endl;
+#endif /*_VERBOSE_*/
+        }
+        else
+        {
+            /*no improvement*/
+            lambda*=10.0;
+            stop=0;
+        }
+    }
+}
+
+inline void db_RobImageHomographyFetchJacobian(double **JtJ_ref,double *min_Jtf,double **JtJ_temp_ref,double *min_Jtf_temp,int n,int *fetch_vector)
+{
+    int i,j,t;
+    double *t1,*t2;
+
+    for(i=0;i<n;i++)
+    {
+        t=fetch_vector[i];
+        min_Jtf[i]=min_Jtf_temp[t];
+        t1=JtJ_ref[i];
+        t2=JtJ_temp_ref[t];
+        for(j=i;j<n;j++)
+        {
+            t1[j]=t2[fetch_vector[j]];
+        }
+    }
+}
+
+inline void db_RobImageHomographyMultiplyJacobian(double **JtJ_ref,double *min_Jtf,double **JtJ_temp_ref,double *min_Jtf_temp,double **JE_dx_ref,int n)
+{
+    double JtJ_JE[72],*JtJ_JE_ref[9];
+
+    db_SetupMatrixRefs(JtJ_JE_ref,9,8,JtJ_JE);
+
+    db_SymmetricExtendUpperToLower(JtJ_temp_ref,9,9);
+    db_MultiplyMatricesAB(JtJ_JE_ref,JtJ_temp_ref,JE_dx_ref,9,9,n);
+    db_UpperMultiplyMatricesAtB(JtJ_ref,JE_dx_ref,JtJ_JE_ref,n,9,n);
+    db_MultiplyMatrixVectorAtb(min_Jtf,JE_dx_ref,min_Jtf_temp,n,9);
+}
+
+inline void db_RobImageHomographyJH_Js(double **JE_dx_ref,int j,double H[9])
+{
+    /*Update of upper 2x2 is multiplication by
+    [s 0][ cos(theta) sin(theta)]
+    [0 s][-sin(theta) cos(theta)]*/
+    JE_dx_ref[0][j]=H[0];
+    JE_dx_ref[1][j]=H[1];
+    JE_dx_ref[2][j]=0;
+    JE_dx_ref[3][j]=H[2];
+    JE_dx_ref[4][j]=H[3];
+    JE_dx_ref[5][j]=0;
+    JE_dx_ref[6][j]=0;
+    JE_dx_ref[7][j]=0;
+    JE_dx_ref[8][j]=0;
+}
+
+inline void db_RobImageHomographyJH_JR(double **JE_dx_ref,int j,double H[9])
+{
+    /*Update of upper 2x2 is multiplication by
+    [s 0][ cos(theta) sin(theta)]
+    [0 s][-sin(theta) cos(theta)]*/
+    JE_dx_ref[0][j]=  H[3];
+    JE_dx_ref[1][j]=  H[4];
+    JE_dx_ref[2][j]=0;
+    JE_dx_ref[3][j]= -H[0];
+    JE_dx_ref[4][j]= -H[1];
+    JE_dx_ref[5][j]=0;
+    JE_dx_ref[6][j]=0;
+    JE_dx_ref[7][j]=0;
+    JE_dx_ref[8][j]=0;
+}
+
+inline void db_RobImageHomographyJH_Jt(double **JE_dx_ref,int j,int k,double H[9])
+{
+    JE_dx_ref[0][j]=0;
+    JE_dx_ref[1][j]=0;
+    JE_dx_ref[2][j]=1.0;
+    JE_dx_ref[3][j]=0;
+    JE_dx_ref[4][j]=0;
+    JE_dx_ref[5][j]=0;
+    JE_dx_ref[6][j]=0;
+    JE_dx_ref[7][j]=0;
+    JE_dx_ref[8][j]=0;
+
+    JE_dx_ref[0][k]=0;
+    JE_dx_ref[1][k]=0;
+    JE_dx_ref[2][k]=0;
+    JE_dx_ref[3][k]=0;
+    JE_dx_ref[4][k]=0;
+    JE_dx_ref[5][k]=1.0;
+    JE_dx_ref[6][k]=0;
+    JE_dx_ref[7][k]=0;
+    JE_dx_ref[8][k]=0;
+}
+
+inline void db_RobImageHomographyJH_dRotFocal(double **JE_dx_ref,int j,int k,int l,int m,double H[9])
+{
+    double f,fi,fi2;
+    double R[9],J[9];
+
+    /*Updated matrix is diag(f+df,f+df)*dR*R*diag(1/(f+df),1/(f+df),1)*/
+    f=db_FocalAndRotFromCamRotFocalHomography(R,H);
+    fi=db_SafeReciprocal(f);
+    fi2=db_sqr(fi);
+    db_JacobianOfRotatedPointStride(J,R,3);
+    JE_dx_ref[0][j]=   J[0];
+    JE_dx_ref[1][j]=   J[1];
+    JE_dx_ref[2][j]=f* J[2];
+    JE_dx_ref[3][j]=   J[3];
+    JE_dx_ref[4][j]=   J[4];
+    JE_dx_ref[5][j]=f* J[5];
+    JE_dx_ref[6][j]=fi*J[6];
+    JE_dx_ref[7][j]=fi*J[7];
+    JE_dx_ref[8][j]=   J[8];
+    db_JacobianOfRotatedPointStride(J,R+1,3);
+    JE_dx_ref[0][k]=   J[0];
+    JE_dx_ref[1][k]=   J[1];
+    JE_dx_ref[2][k]=f* J[2];
+    JE_dx_ref[3][k]=   J[3];
+    JE_dx_ref[4][k]=   J[4];
+    JE_dx_ref[5][k]=f* J[5];
+    JE_dx_ref[6][k]=fi*J[6];
+    JE_dx_ref[7][k]=fi*J[7];
+    JE_dx_ref[8][k]=   J[8];
+    db_JacobianOfRotatedPointStride(J,R+2,3);
+    JE_dx_ref[0][l]=   J[0];
+    JE_dx_ref[1][l]=   J[1];
+    JE_dx_ref[2][l]=f* J[2];
+    JE_dx_ref[3][l]=   J[3];
+    JE_dx_ref[4][l]=   J[4];
+    JE_dx_ref[5][l]=f* J[5];
+    JE_dx_ref[6][l]=fi*J[6];
+    JE_dx_ref[7][l]=fi*J[7];
+    JE_dx_ref[8][l]=   J[8];
+
+    JE_dx_ref[0][m]=0;
+    JE_dx_ref[1][m]=0;
+    JE_dx_ref[2][m]=H[2];
+    JE_dx_ref[3][m]=0;
+    JE_dx_ref[4][m]=0;
+    JE_dx_ref[5][m]=H[5];
+    JE_dx_ref[6][m]= -fi2*H[6];
+    JE_dx_ref[7][m]= -fi2*H[7];
+    JE_dx_ref[8][m]=0;
+}
+
+inline double db_RobImageHomography_Jacobians_Generic(double *JtJ_ref[8],double min_Jtf[8],int *num_param,int *frozen_coord,double H[9],int point_count,double *x_i,double *xp_i,int homography_type,double one_over_scale2)
+{
+    double back;
+    int i,j,fetch_vector[8],n;
+    double JtJ_temp[81],min_Jtf_temp[9],JE_dx[72];
+    double *JE_dx_ref[9],*JtJ_temp_ref[9];
+
+    /*Compute cost and JtJ,min_Jtf with respect to H*/
+    back=db_RobImageHomography_Jacobians(JtJ_temp,min_Jtf_temp,H,point_count,x_i,xp_i,one_over_scale2);
+
+    /*Compute JtJ,min_Jtf with respect to the right parameters
+    The formulas are
+    JtJ=transpose(JE_dx)*JtJ*JE_dx and
+    min_Jtf=transpose(JE_dx)*min_Jtf,
+    where the 9xN matrix JE_dx is the Jacobian of H with respect
+    to the update*/
+    db_SetupMatrixRefs(JtJ_temp_ref,9,9,JtJ_temp);
+    db_SetupMatrixRefs(JE_dx_ref,9,8,JE_dx);
+    switch(homography_type)
+    {
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            n=4;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_JR(JE_dx_ref,1,H);
+            db_RobImageHomographyJH_Jt(JE_dx_ref,2,3,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION:
+        case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            n=1;
+            db_RobImageHomographyJH_JR(JE_dx_ref,0,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_SCALING:
+            n=1;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_S_T:
+            n=3;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_Jt(JE_dx_ref,1,2,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_T:
+            n=3;
+            db_RobImageHomographyJH_JR(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_Jt(JE_dx_ref,1,2,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_S:
+            n=2;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_JR(JE_dx_ref,1,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+
+        case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            n=2;
+            fetch_vector[0]=2;
+            fetch_vector[1]=5;
+            db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector);
+            break;
+        case DB_HOMOGRAPHY_TYPE_AFFINE:
+            n=6;
+            fetch_vector[0]=0;
+            fetch_vector[1]=1;
+            fetch_vector[2]=2;
+            fetch_vector[3]=3;
+            fetch_vector[4]=4;
+            fetch_vector[5]=5;
+            db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector);
+            break;
+        case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+            n=8;
+            *frozen_coord=db_MaxAbsIndex9(H);
+            for(j=0,i=0;i<9;i++) if(i!=(*frozen_coord))
+            {
+                fetch_vector[j]=i;
+                j++;
+            }
+            db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector);
+            break;
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+            n=4;
+            db_RobImageHomographyJH_dRotFocal(JE_dx_ref,0,1,2,3,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+    }
+    *num_param=n;
+
+    return(back);
+}
+
+inline void db_ImageHomographyUpdateGeneric(double H_p_dx[9],double H[9],double *dx,int homography_type,int frozen_coord)
+{
+    switch(homography_type)
+    {
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            db_MultiplyRotationOntoImageHomography(H,dx[1]);
+            H_p_dx[2]+=dx[2];
+            H_p_dx[5]+=dx[3];
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION:
+        case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            db_MultiplyRotationOntoImageHomography(H,dx[0]);
+            break;
+        case DB_HOMOGRAPHY_TYPE_SCALING:
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            break;
+        case DB_HOMOGRAPHY_TYPE_S_T:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            H_p_dx[2]+=dx[1];
+            H_p_dx[5]+=dx[2];
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_T:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyRotationOntoImageHomography(H,dx[0]);
+            H_p_dx[2]+=dx[1];
+            H_p_dx[5]+=dx[2];
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_S:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            db_MultiplyRotationOntoImageHomography(H,dx[1]);
+            break;
+        case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            db_Copy9(H_p_dx,H);
+            H_p_dx[2]+=dx[0];
+            H_p_dx[5]+=dx[1];
+            break;
+        case DB_HOMOGRAPHY_TYPE_AFFINE:
+            db_UpdateImageHomographyAffine(H_p_dx,H,dx);
+            break;
+        case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+            db_UpdateImageHomographyProjective(H_p_dx,H,dx,frozen_coord);
+            break;
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+            db_UpdateRotFocalHomography(H_p_dx,H,dx);
+            break;
+    }
+}
+
+void db_RobCamRotation_Polish_Generic(double H[9],int point_count,int homography_type,double *x_i,double *xp_i,double one_over_scale2,
+                               int max_iterations,double improvement_requirement)
+{
+    int i,update,stop,n;
+    int frozen_coord = 0;
+    double lambda,cost,current_cost;
+    double JtJ[72],min_Jtf[9],dx[8],H_p_dx[9];
+    double *JtJ_ref[9],d[8];
+
+    lambda=0.001;
+    for(update=1,stop=0,i=0;(stop<2) && (i<max_iterations);i++)
+    {
+        /*if first time since improvement, compute Jacobian and residual*/
+        if(update)
+        {
+            db_SetupMatrixRefs(JtJ_ref,9,8,JtJ);
+            current_cost=db_RobImageHomography_Jacobians_Generic(JtJ_ref,min_Jtf,&n,&frozen_coord,H,point_count,x_i,xp_i,homography_type,one_over_scale2);
+            update=0;
+        }
+
+#ifdef _VERBOSE_
+        /*std::cout << "Cost:" << current_cost << " ";*/
+#endif /*_VERBOSE_*/
+
+        /*Come up with a hypothesis dx
+        based on the current lambda*/
+        db_Compute_dx(dx,JtJ_ref,min_Jtf,lambda,d,n);
+
+        /*Compute Cost(x+dx)*/
+        db_ImageHomographyUpdateGeneric(H_p_dx,H,dx,homography_type,frozen_coord);
+        cost=db_RobImageHomography_Cost(H_p_dx,point_count,x_i,xp_i,one_over_scale2);
+
+        /*Is there an improvement?*/
+        if(cost<current_cost)
+        {
+            /*improvement*/
+            if(current_cost-cost<current_cost*improvement_requirement) stop++;
+            else stop=0;
+            lambda*=0.1;
+            /*Move to the hypothesised position x+dx*/
+            current_cost=cost;
+            db_Copy9(H,H_p_dx);
+            update=1;
+
+#ifdef _VERBOSE_
+        std::cout << "Step" << i << "Imp,Lambda=" << lambda << "Cost:" << current_cost << std::endl;
+#endif /*_VERBOSE_*/
+        }
+        else
+        {
+            /*no improvement*/
+            lambda*=10.0;
+            stop=0;
+        }
+    }
+}
+void db_RobImageHomography(
+                              /*Best homography*/
+                              double H[9],
+                              /*2DPoint to 2DPoint constraints
+                              Points are assumed to be given in
+                              homogenous coordinates*/
+                              double *im, double *im_p,
+                              /*Nr of points in total*/
+                              int nr_points,
+                              /*Calibration matrices
+                              used to normalize the points*/
+                              double K[9],
+                              double Kp[9],
+                              /*Pre-allocated space temp_d
+                              should point to at least
+                              12*nr_samples+10*nr_points
+                              allocated positions*/
+                              double *temp_d,
+                              /*Pre-allocated space temp_i
+                              should point to at least
+                              max(nr_samples,nr_points)
+                              allocated positions*/
+                              int *temp_i,
+                              int homography_type,
+                              db_Statistics *stat,
+                              int max_iterations,
+                              int max_points,
+                              double scale,
+                              int nr_samples,
+                              int chunk_size,
+                              /////////////////////////////////////////////
+                              // regular use: set outlierremoveflagE =0;
+                              // flag for the outlier removal
+                              int outlierremoveflagE,
+                              // if flag is 1, then the following variables
+                              // need the input
+                              //////////////////////////////////////
+                              // 3D coordinates
+                              double *wp,
+                              // its corresponding stereo pair's points
+                              double *im_r,
+                              // raw image coordinates
+                              double *im_raw, double *im_raw_p,
+                              // final matches
+                              int *finalNumE)
+{
+    /*Random seed*/
+    int r_seed;
+
+    int point_count_new;
+    /*Counters*/
+    int i,j,c,point_count,hyp_count;
+    int last_hyp,new_last_hyp,last_corr;
+    int pos,point_pos,last_point;
+    /*Accumulator*/
+    double acc;
+    /*Hypothesis pointer*/
+    double *hyp_point;
+    /*Random sample*/
+    int s[4];
+    /*Pivot for hypothesis pruning*/
+    double pivot;
+    /*Best hypothesis position*/
+    int best_pos;
+    /*Best score*/
+    double lowest_cost;
+    /*One over the squared scale of
+    Cauchy distribution*/
+    double one_over_scale2;
+    /*temporary pointers*/
+    double *x_i_temp,*xp_i_temp;
+    /*Temporary space for inverse calibration matrices*/
+    double K_inv[9];
+    double Kp_inv[9];
+    /*Temporary space for homography*/
+    double H_temp[9],H_temp2[9];
+    /*Pointers to homogenous coordinates*/
+    double *x_h_point,*xp_h_point;
+    /*Array of pointers to inhomogenous coordinates*/
+    double *X[3],*Xp[3];
+    /*Similarity parameters*/
+    int orientation_preserving,allow_scaling,allow_rotation,allow_translation,sample_size;
+
+    /*Homogenous coordinates of image points in first image*/
+    double *x_h;
+    /*Homogenous coordinates of image points in second image*/
+    double *xp_h;
+    /*Inhomogenous coordinates of image points in first image*/
+    double *x_i;
+    /*Inhomogenous coordinates of image points in second image*/
+    double *xp_i;
+    /*Homography hypotheses*/
+    double *hyp_H_array;
+    /*Cost array*/
+    double *hyp_cost_array;
+    /*Permutation of the hypotheses*/
+    int *hyp_perm;
+    /*Sample of the points*/
+    int *point_perm;
+    /*Temporary space for quick-select
+    2*nr_samples*/
+    double *temp_select;
+
+    /*Get inverse calibration matrices*/
+    db_InvertCalibrationMatrix(K_inv,K);
+    db_InvertCalibrationMatrix(Kp_inv,Kp);
+    /*Compute scale coefficient*/
+    one_over_scale2=1.0/(scale*scale);
+    /*Initialize random seed*/
+    r_seed=12345;
+    /*Set pointers to pre-allocated space*/
+    hyp_cost_array=temp_d;
+    hyp_H_array=temp_d+nr_samples;
+    temp_select=temp_d+10*nr_samples;
+    x_h=temp_d+12*nr_samples;
+    xp_h=temp_d+12*nr_samples+3*nr_points;
+    x_i=temp_d+12*nr_samples+6*nr_points;
+    xp_i=temp_d+12*nr_samples+8*nr_points;
+    hyp_perm=temp_i;
+    point_perm=temp_i;
+
+    /*Prepare a randomly permuted subset of size
+    point_count from the input points*/
+
+    point_count=db_mini(nr_points,(int)(chunk_size*log((double)nr_samples)/DB_LN2));
+
+    point_count_new = point_count;
+
+    for(i=0;i<nr_points;i++) point_perm[i]=i;
+
+    for(last_point=nr_points-1,i=0;i<point_count;i++,last_point--)
+    {
+        pos=db_RandomInt(r_seed,last_point);
+        point_pos=point_perm[pos];
+        point_perm[pos]=point_perm[last_point];
+
+        /*Normalize image points with calibration
+        matrices and move them to x_h and xp_h*/
+        c=3*point_pos;
+        j=3*i;
+        x_h_point=x_h+j;
+        xp_h_point=xp_h+j;
+        db_Multiply3x3_3x1(x_h_point,K_inv,im+c);
+        db_Multiply3x3_3x1(xp_h_point,Kp_inv,im_p+c);
+
+        db_HomogenousNormalize3(x_h_point);
+        db_HomogenousNormalize3(xp_h_point);
+
+        /*Dehomogenize image points and move them
+        to x_i and xp_i*/
+        c=(i<<1);
+        db_DeHomogenizeImagePoint(x_i+c,x_h_point); // 2-dimension
+        db_DeHomogenizeImagePoint(xp_i+c,xp_h_point); //2-dimension
+    }
+
+
+    /*Generate Hypotheses*/
+    hyp_count=0;
+    switch(homography_type)
+    {
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+    case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+    case DB_HOMOGRAPHY_TYPE_ROTATION:
+    case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+    case DB_HOMOGRAPHY_TYPE_SCALING:
+    case DB_HOMOGRAPHY_TYPE_S_T:
+    case DB_HOMOGRAPHY_TYPE_R_T:
+    case DB_HOMOGRAPHY_TYPE_R_S:
+
+        switch(homography_type)
+        {
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=1;
+            allow_translation=1;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            orientation_preserving=0;
+            allow_scaling=1;
+            allow_rotation=1;
+            allow_translation=1;
+            sample_size=3;
+            break;
+        case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            orientation_preserving=1;
+            allow_scaling=0;
+            allow_rotation=0;
+            allow_translation=1;
+            sample_size=1;
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION:
+            orientation_preserving=1;
+            allow_scaling=0;
+            allow_rotation=1;
+            allow_translation=0;
+            sample_size=1;
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            orientation_preserving=0;
+            allow_scaling=0;
+            allow_rotation=1;
+            allow_translation=0;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_SCALING:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=0;
+            allow_translation=0;
+            sample_size=1;
+            break;
+        case DB_HOMOGRAPHY_TYPE_S_T:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=0;
+            allow_translation=1;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_T:
+            orientation_preserving=1;
+            allow_scaling=0;
+            allow_rotation=1;
+            allow_translation=1;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_S:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=0;
+            allow_translation=0;
+            sample_size=1;
+            break;
+        }
+
+        if(point_count>=sample_size) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            X[0]= &x_i[s[0]<<1];
+            X[1]= &x_i[s[1]<<1];
+            X[2]= &x_i[s[2]<<1];
+            Xp[0]= &xp_i[s[0]<<1];
+            Xp[1]= &xp_i[s[1]<<1];
+            Xp[2]= &xp_i[s[2]<<1];
+            db_StitchSimilarity2D(&hyp_H_array[9*hyp_count],Xp,X,sample_size,orientation_preserving,
+                                  allow_scaling,allow_rotation,allow_translation);
+            hyp_count++;
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION:
+        if(point_count>=2) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,2,point_count,r_seed);
+            db_StitchCameraRotation_2Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]]);
+            hyp_count++;
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+        if(point_count>=3) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            hyp_count+=db_StitchRotationCommonFocalLength_3Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]]);
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+        if(point_count>=3) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            hyp_count+=db_StitchRotationCommonFocalLength_3Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]],NULL,0);
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_AFFINE:
+        if(point_count>=3) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            db_StitchAffine2D_3Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]]);
+            hyp_count++;
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+    default:
+        if(point_count>=4) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,4,point_count,r_seed);
+            db_StitchProjective2D_4Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],&x_h[3*s[3]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]],&xp_h[3*s[3]]);
+            hyp_count++;
+        }
+    }
+
+    if(hyp_count)
+    {
+        /*Count cost in chunks and decimate hypotheses
+        until only one remains or the correspondences are
+        exhausted*/
+        for(i=0;i<hyp_count;i++)
+        {
+            hyp_perm[i]=i;
+            hyp_cost_array[i]=0.0;
+        }
+        for(i=0,last_hyp=hyp_count-1;(last_hyp>0) && (i<point_count);i+=chunk_size)
+        {
+            /*Update cost with the next chunk*/
+            last_corr=db_mini(i+chunk_size-1,point_count-1);
+            for(j=0;j<=last_hyp;j++)
+            {
+                hyp_point=hyp_H_array+9*hyp_perm[j];
+                for(c=i;c<=last_corr;)
+                {
+                    /*Take log of product of ten reprojection
+                    errors to reduce nr of expensive log operations*/
+                    if(c+9<=last_corr)
+                    {
+                        x_i_temp=x_i+(c<<1);
+                        xp_i_temp=xp_i+(c<<1);
+
+                        acc=db_ExpCauchyInhomogenousHomographyError(xp_i_temp,hyp_point,x_i_temp,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+2,hyp_point,x_i_temp+2,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+4,hyp_point,x_i_temp+4,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+6,hyp_point,x_i_temp+6,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+8,hyp_point,x_i_temp+8,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+10,hyp_point,x_i_temp+10,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+12,hyp_point,x_i_temp+12,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+14,hyp_point,x_i_temp+14,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+16,hyp_point,x_i_temp+16,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+18,hyp_point,x_i_temp+18,one_over_scale2);
+                        c+=10;
+                    }
+                    else
+                    {
+                        for(acc=1.0;c<=last_corr;c++)
+                        {
+                            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i+(c<<1),hyp_point,x_i+(c<<1),one_over_scale2);
+                        }
+                    }
+                    hyp_cost_array[j]+=log(acc);
+                }
+            }
+            if (chunk_size<point_count){
+                /*Prune out half of the hypotheses*/
+                new_last_hyp=(last_hyp+1)/2-1;
+                pivot=db_LeanQuickSelect(hyp_cost_array,last_hyp+1,new_last_hyp,temp_select);
+                for(j=0,c=0;(j<=last_hyp) && (c<=new_last_hyp);j++)
+                {
+                    if(hyp_cost_array[j]<=pivot)
+                    {
+                        hyp_cost_array[c]=hyp_cost_array[j];
+                        hyp_perm[c]=hyp_perm[j];
+                        c++;
+                    }
+                }
+                last_hyp=new_last_hyp;
+            }
+        }
+        /*Find the best hypothesis*/
+        lowest_cost=hyp_cost_array[0];
+        best_pos=0;
+        for(j=1;j<=last_hyp;j++)
+        {
+            if(hyp_cost_array[j]<lowest_cost)
+            {
+                lowest_cost=hyp_cost_array[j];
+                best_pos=j;
+            }
+        }
+
+        /*Move the best hypothesis*/
+        db_Copy9(H_temp,hyp_H_array+9*hyp_perm[best_pos]);
+
+        // outlier removal
+        if (outlierremoveflagE) // no polishment needed
+        {
+            point_count_new = db_RemoveOutliers_Homography(H_temp,x_i,xp_i,wp,im,im_p,im_r,im_raw,im_raw_p,point_count,one_over_scale2);
+        }
+        else
+        {
+            /*Polish*/
+            switch(homography_type)
+            {
+            case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+            case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            case DB_HOMOGRAPHY_TYPE_ROTATION:
+            case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            case DB_HOMOGRAPHY_TYPE_SCALING:
+            case DB_HOMOGRAPHY_TYPE_S_T:
+            case DB_HOMOGRAPHY_TYPE_R_T:
+            case DB_HOMOGRAPHY_TYPE_R_S:
+            case DB_HOMOGRAPHY_TYPE_AFFINE:
+            case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+            case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+            case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+                db_RobCamRotation_Polish_Generic(H_temp,db_mini(point_count,max_points),homography_type,x_i,xp_i,one_over_scale2,max_iterations);
+                break;
+            case DB_HOMOGRAPHY_TYPE_CAMROTATION:
+                db_RobCamRotation_Polish(H_temp,db_mini(point_count,max_points),x_i,xp_i,one_over_scale2,max_iterations);
+                break;
+            }
+
+        }
+
+    }
+    else db_Identity3x3(H_temp);
+
+    switch(homography_type)
+    {
+    case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+        if(stat) stat->nr_parameters=8;
+        break;
+    case DB_HOMOGRAPHY_TYPE_AFFINE:
+        if(stat) stat->nr_parameters=6;
+        break;
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+        if(stat) stat->nr_parameters=4;
+        break;
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION:
+        if(stat) stat->nr_parameters=3;
+        break;
+    case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+    case DB_HOMOGRAPHY_TYPE_S_T:
+    case DB_HOMOGRAPHY_TYPE_R_T:
+    case DB_HOMOGRAPHY_TYPE_R_S:
+        if(stat) stat->nr_parameters=2;
+        break;
+    case DB_HOMOGRAPHY_TYPE_ROTATION:
+    case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+    case DB_HOMOGRAPHY_TYPE_SCALING:
+        if(stat) stat->nr_parameters=1;
+        break;
+    }
+
+    db_RobImageHomography_Statistics(H_temp,db_mini(point_count,max_points),x_i,xp_i,one_over_scale2,stat);
+
+    /*Put on the calibration matrices*/
+    db_Multiply3x3_3x3(H_temp2,H_temp,K_inv);
+    db_Multiply3x3_3x3(H,Kp,H_temp2);
+
+    if (finalNumE)
+        *finalNumE = point_count_new;
+
+}
diff --git a/jni/feature_stab/db_vlvm/db_rob_image_homography.h b/jni/feature_stab/db_vlvm/db_rob_image_homography.h
new file mode 100644
index 000000000..59cde7daa
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_rob_image_homography.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_rob_image_homography.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_ROB_IMAGE_HOMOGRAPHY
+#define DB_ROB_IMAGE_HOMOGRAPHY
+
+#include "db_utilities.h"
+#include "db_robust.h"
+#include "db_metrics.h"
+
+#include <stdlib.h> // for NULL
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRobImageHomography (LM) Robust Image Homography
+ */
+/*\{*/
+
+#define DB_HOMOGRAPHY_TYPE_DEFAULT           0
+#define DB_HOMOGRAPHY_TYPE_PROJECTIVE        0
+#define DB_HOMOGRAPHY_TYPE_AFFINE            1
+#define DB_HOMOGRAPHY_TYPE_SIMILARITY        2
+#define DB_HOMOGRAPHY_TYPE_SIMILARITY_U      3
+#define DB_HOMOGRAPHY_TYPE_TRANSLATION       4
+#define DB_HOMOGRAPHY_TYPE_ROTATION          5
+#define DB_HOMOGRAPHY_TYPE_ROTATION_U        6
+#define DB_HOMOGRAPHY_TYPE_SCALING           7
+#define DB_HOMOGRAPHY_TYPE_S_T               8
+#define DB_HOMOGRAPHY_TYPE_R_T               9
+#define DB_HOMOGRAPHY_TYPE_R_S              10
+#define DB_HOMOGRAPHY_TYPE_CAMROTATION      11
+#define DB_HOMOGRAPHY_TYPE_CAMROTATION_F    12
+#define DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD 13
+
+/*!
+Solve for homography H such that xp~Hx
+\param H    best homography
+
+2D point to 2D point constraints:
+
+\param im           first image points
+\param im_p         second image points
+\param nr_points    number of points
+
+Calibration matrices:
+
+\param K    first camera
+\param Kp   second camera
+
+ Temporary space:
+
+ \param temp_d      pre-allocated space of size 12*nr_samples+10*nr_points doubles
+ \param temp_i      pre-allocated space of size max(nr_samples,nr_points) ints
+
+ Statistics for this estimation
+
+ \param stat        NULL - do not compute
+
+ \param homography_type see DB_HOMOGRAPHY_TYPE_* definitions above
+
+ Estimation parameters:
+
+ \param max_iterations  max number of polishing steps
+ \param max_points      only use this many points
+ \param scale           Cauchy scale coefficient (see db_ExpCauchyReprojectionError() )
+ \param nr_samples      number of times to compute a hypothesis
+ \param chunk_size      size of cost chunks
+*/
+DB_API void db_RobImageHomography(
+                              /*Best homography*/
+                              double H[9],
+                              /*2DPoint to 2DPoint constraints
+                              Points are assumed to be given in
+                              homogenous coordinates*/
+                              double *im,double *im_p,
+                              /*Nr of points in total*/
+                              int nr_points,
+                              /*Calibration matrices
+                              used to normalize the points*/
+                              double K[9],
+                              double Kp[9],
+                              /*Pre-allocated space temp_d
+                              should point to at least
+                              12*nr_samples+10*nr_points
+                              allocated positions*/
+                              double *temp_d,
+                              /*Pre-allocated space temp_i
+                              should point to at least
+                              max(nr_samples,nr_points)
+                              allocated positions*/
+                              int *temp_i,
+                              int homography_type=DB_HOMOGRAPHY_TYPE_DEFAULT,
+                              db_Statistics *stat=NULL,
+                              int max_iterations=DB_DEFAULT_MAX_ITERATIONS,
+                              int max_points=DB_DEFAULT_MAX_POINTS,
+                              double scale=DB_POINT_STANDARDDEV,
+                              int nr_samples=DB_DEFAULT_NR_SAMPLES,
+                              int chunk_size=DB_DEFAULT_CHUNK_SIZE,
+                              ///////////////////////////////////////////////////
+                              // flag for the outlier removal
+                              int outlierremoveflagE = 0,
+                              // if flag is 1, then the following variables
+                              // need to input
+                              ///////////////////////////////////////////////////
+                              // 3D coordinates
+                              double *wp=NULL,
+                              // its corresponding stereo pair's points
+                              double *im_r=NULL,
+                              // raw image coordinates
+                              double *im_raw=NULL, double *im_raw_p=NULL,
+                              // final matches
+                              int *final_NumE=0);
+
+DB_API double db_RobImageHomography_Cost(double H[9],int point_count,double *x_i,
+                                                double *xp_i,double one_over_scale2);
+
+
+DB_API void db_RobCamRotation_Polish(double H[9],int point_count,double *x_i,
+                                     double *xp_i, double one_over_scale2,
+                                     int max_iterations=DB_DEFAULT_MAX_ITERATIONS,
+                                     double improvement_requirement=DB_DEFAULT_IMP_REQ);
+
+
+DB_API void db_RobCamRotation_Polish_Generic(double H[9],int point_count,int homography_type,
+                                             double *x_i,double *xp_i,double one_over_scale2,
+                                             int max_iterations=DB_DEFAULT_MAX_ITERATIONS,
+                                             double improvement_requirement=DB_DEFAULT_IMP_REQ);
+
+
+#endif /* DB_ROB_IMAGE_HOMOGRAPHY */
diff --git a/jni/feature_stab/db_vlvm/db_robust.h b/jni/feature_stab/db_vlvm/db_robust.h
new file mode 100644
index 000000000..be0794c6e
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_robust.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_robust.h,v 1.4 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_ROBUST
+#define DB_ROBUST
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRobust (LM) Robust Estimation
+ */
+
+/*!
+    \struct     db_Statistics
+    \ingroup    LMRobust
+    \brief      (LnM) Sampling problem statistics
+    \date       Mon Sep 10 10:28:08 EDT 2007
+    \par        Copyright: 2007 Sarnoff Corporation.  All Rights Reserved
+ */
+ struct db_stat_struct
+ {
+     int nr_points;
+     int nr_inliers;
+     double inlier_fraction;
+     double cost;
+     double one_over_scale2;
+     double lambda1;
+     double lambda2;
+     double lambda3;
+     int nr_parameters;
+     int model_dimension;
+     double gric;
+     double inlier_evidence;
+     double posestd[6];
+     double rotationvecCov[9];
+     double translationvecCov[9];
+     int posecov_inliercount;
+     int posecovready;
+     double median_reprojection_error;
+ };
+ typedef db_stat_struct db_Statistics;
+
+#endif /* DB_ROBUST */
diff --git a/jni/feature_stab/db_vlvm/db_utilities.cpp b/jni/feature_stab/db_vlvm/db_utilities.cpp
new file mode 100644
index 000000000..ce2093b01
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities.cpp,v 1.4 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include <string.h>
+#include <stdio.h>
+
+float** db_SetupImageReferences_f(float *im,int w,int h)
+{
+    int i;
+    float **img;
+    assert(im);
+    img=new float* [h];
+    for(i=0;i<h;i++)
+    {
+        img[i]=im+w*i;
+    }
+    return(img);
+}
+
+unsigned char** db_SetupImageReferences_u(unsigned char *im,int w,int h)
+{
+    int i;
+    unsigned char **img;
+
+    assert(im);
+
+    img=new unsigned char* [h];
+    for(i=0;i<h;i++)
+    {
+        img[i]=im+w*i;
+    }
+    return(img);
+}
+float** db_AllocImage_f(int w,int h,int over_allocation)
+{
+    float **img,*im;
+
+    im=new float [w*h+over_allocation];
+    img=db_SetupImageReferences_f(im,w,h);
+
+    return(img);
+}
+
+unsigned char** db_AllocImage_u(int w,int h,int over_allocation)
+{
+    unsigned char **img,*im;
+
+    im=new unsigned char [w*h+over_allocation];
+    img=db_SetupImageReferences_u(im,w,h);
+
+    return(img);
+}
+
+void db_FreeImage_f(float **img,int h)
+{
+    delete [] (img[0]);
+    delete [] img;
+}
+
+void db_FreeImage_u(unsigned char **img,int h)
+{
+    delete [] (img[0]);
+    delete [] img;
+}
+
+// ----------------------------------------------------------------------------------------------------------- ;
+//
+// copy image (source to destination)
+// ---> must be a 2D image array with the same image size
+// ---> the size of the input and output images must be same
+//
+// ------------------------------------------------------------------------------------------------------------ ;
+void db_CopyImage_u(unsigned char **d,const unsigned char * const *s, int w, int h, int over_allocation)
+{
+    int i;
+
+    for (i=0;i<h;i++)
+    {
+        memcpy(d[i],s[i],w*sizeof(unsigned char));
+    }
+
+    memcpy(&d[h],&d[h],over_allocation);
+
+}
+
+inline void db_WarpImageLutFast_u(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                                  const float * const * lut_x, const float * const * lut_y)
+{
+    assert(src && dst);
+    int xd=0, yd=0;
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            //xd = static_cast<unsigned int>(lut_x[j][i]);
+            //yd = static_cast<unsigned int>(lut_y[j][i]);
+            xd = (unsigned int)(lut_x[j][i]);
+            yd = (unsigned int)(lut_y[j][i]);
+            if ( xd >= w || yd >= h ||
+                 xd < 0 || yd < 0)
+                dst[j][i] = 0;
+            else
+                dst[j][i] = src[yd][xd];
+        }
+}
+
+inline void db_WarpImageLutBilinear_u(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                                      const float * const * lut_x,const float * const* lut_y)
+{
+    assert(src && dst);
+    double xd=0.0, yd=0.0;
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            xd = static_cast<double>(lut_x[j][i]);
+            yd = static_cast<double>(lut_y[j][i]);
+            if ( xd > w   || yd > h ||
+                 xd < 0.0 || yd < 0.0)
+                dst[j][i] = 0;
+            else
+                dst[j][i] = db_BilinearInterpolation(yd, xd, src);
+        }
+}
+
+
+void db_WarpImageLut_u(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                       const float * const * lut_x,const float * const * lut_y, int type)
+{
+    switch (type)
+    {
+    case DB_WARP_FAST:
+        db_WarpImageLutFast_u(src,dst,w,h,lut_x,lut_y);
+        break;
+    case DB_WARP_BILINEAR:
+        db_WarpImageLutBilinear_u(src,dst,w,h,lut_x,lut_y);
+        break;
+    default:
+        break;
+    }
+}
+
+
+void db_PrintDoubleVector(double *a,long size)
+{
+    printf("[ ");
+    for(long i=0;i<size;i++) printf("%lf ",a[i]);
+    printf("]");
+}
+
+void db_PrintDoubleMatrix(double *a,long rows,long cols)
+{
+    printf("[\n");
+    for(long i=0;i<rows;i++)
+    {
+        for(long j=0;j<cols;j++) printf("%lf ",a[i*cols+j]);
+        printf("\n");
+    }
+    printf("]");
+}
diff --git a/jni/feature_stab/db_vlvm/db_utilities.h b/jni/feature_stab/db_vlvm/db_utilities.h
new file mode 100644
index 000000000..fa9c87745
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities.h
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_H
+#define DB_UTILITIES_H
+
+
+#ifdef _WIN32
+#pragma warning(disable: 4275)
+#pragma warning(disable: 4251)
+#pragma warning(disable: 4786)
+#pragma warning(disable: 4800)
+#pragma warning(disable: 4018) /* signed-unsigned mismatch */
+#endif /* _WIN32 */
+
+#ifdef _WIN32
+    #ifdef DBDYNAMIC_EXPORTS
+        #define DB_API __declspec(dllexport)
+    #else
+        #ifdef DBDYNAMIC_IMPORTS
+            #define DB_API __declspec(dllimport)
+        #else
+            #define DB_API
+        #endif
+    #endif
+#else
+    #define DB_API
+#endif /* _WIN32 */
+
+#ifdef _VERBOSE_
+#include <iostream>
+#endif
+
+#include <math.h>
+
+#include <assert.h>
+#include "db_utilities_constants.h"
+/*!
+ * \defgroup LMBasicUtilities (LM) Utility Functions (basic math, linear algebra and array manipulations)
+ */
+/*\{*/
+
+/*!
+ * Round double into int using fld and fistp instructions.
+ */
+inline int db_roundi (double x) {
+#ifdef WIN32_ASM
+    int n;
+    __asm
+    {
+        fld x;
+        fistp n;
+    }
+    return n;
+#else
+    return static_cast<int>(floor(x+0.5));
+#endif
+}
+
+/*!
+ * Square a double.
+ */
+inline double db_sqr(double a)
+{
+    return(a*a);
+}
+
+/*!
+ * Square a long.
+ */
+inline long db_sqr(long a)
+{
+    return(a*a);
+}
+
+/*!
+ * Square an int.
+ */
+inline long db_sqr(int a)
+{
+    return(a*a);
+}
+
+/*!
+ * Maximum of two doubles.
+ */
+inline double db_maxd(double a,double b)
+{
+    if(b>a) return(b);
+    else return(a);
+}
+/*!
+ * Minumum of two doubles.
+ */
+inline double db_mind(double a,double b)
+{
+    if(b<a) return(b);
+    else return(a);
+}
+
+
+/*!
+ * Maximum of two ints.
+ */
+inline int db_maxi(int a,int b)
+{
+    if(b>a) return(b);
+    else return(a);
+}
+
+/*!
+ * Minimum of two numbers.
+ */
+inline int db_mini(int a,int b)
+{
+    if(b<a) return(b);
+    else return(a);
+}
+/*!
+ * Maximum of two numbers.
+ */
+inline long db_maxl(long a,long b)
+{
+    if(b>a) return(b);
+    else return(a);
+}
+
+/*!
+ * Minimum of two numbers.
+ */
+inline long db_minl(long a,long b)
+{
+    if(b<a) return(b);
+    else return(a);
+}
+
+/*!
+ * Sign of a number.
+ * \return -1.0 if negative, 1.0 if positive.
+ */
+inline double db_sign(double x)
+{
+    if(x>=0.0) return(1.0);
+    else return(-1.0);
+}
+/*!
+ * Absolute value.
+ */
+inline int db_absi(int a)
+{
+    if(a<0) return(-a);
+    else return(a);
+}
+/*!
+ * Absolute value.
+ */
+inline float db_absf(float a)
+{
+    if(a<0) return(-a);
+    else return(a);
+}
+
+/*!
+ * Absolute value.
+ */
+inline double db_absd(double a)
+{
+    if(a<0) return(-a);
+    else return(a);
+}
+
+/*!
+ * Reciprocal (1/a). Prevents divide by 0.
+ * \return 1/a if a != 0. 1.0 otherwise.
+ */
+inline double db_SafeReciprocal(double a)
+{
+    return((a!=0.0)?(1.0/a):1.0);
+}
+
+/*!
+ * Division. Prevents divide by 0.
+ * \return a/b if b!=0. a otherwise.
+ */
+inline double db_SafeDivision(double a,double b)
+{
+    return((b!=0.0)?(a/b):a);
+}
+
+/*!
+ * Square root. Prevents imaginary output.
+ * \return sqrt(a) if a > 0.0. 0.0 otherewise.
+ */
+inline double db_SafeSqrt(double a)
+{
+    return((a>=0.0)?(sqrt(a)):0.0);
+}
+
+/*!
+ * Square root of a reciprocal. Prevents divide by 0 and imaginary output.
+ * \return sqrt(1/a) if a > 0.0. 1.0 otherewise.
+ */
+inline double db_SafeSqrtReciprocal(double a)
+{
+    return((a>0.0)?(sqrt(1.0/a)):1.0);
+}
+/*!
+ * Cube root.
+ */
+inline double db_CubRoot(double x)
+{
+    if(x>=0.0) return(pow(x,1.0/3.0));
+    else return(-pow(-x,1.0/3.0));
+}
+/*!
+ * Sum of squares of elements of x.
+ */
+inline double db_SquareSum3(const double x[3])
+{
+    return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2]));
+}
+/*!
+ * Sum of squares of elements of x.
+ */
+inline double db_SquareSum7(double x[7])
+{
+    return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])+
+        db_sqr(x[3])+db_sqr(x[4])+db_sqr(x[5])+
+        db_sqr(x[6]));
+}
+/*!
+ * Sum of squares of elements of x.
+ */
+inline double db_SquareSum9(double x[9])
+{
+    return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])+
+        db_sqr(x[3])+db_sqr(x[4])+db_sqr(x[5])+
+        db_sqr(x[6])+db_sqr(x[7])+db_sqr(x[8]));
+}
+/*!
+ * Copy a vector.
+ * \param xd destination
+ * \param xs source
+ */
+void inline db_Copy3(double xd[3],const double xs[3])
+{
+    xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2];
+}
+/*!
+ * Copy a vector.
+ * \param xd destination
+ * \param xs source
+ */
+void inline db_Copy6(double xd[6],const double xs[6])
+{
+    xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2];
+    xd[3]=xs[3];xd[4]=xs[4];xd[5]=xs[5];
+}
+/*!
+ * Copy a vector.
+ * \param xd destination
+ * \param xs source
+ */
+void inline db_Copy9(double xd[9],const double xs[9])
+{
+    xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2];
+    xd[3]=xs[3];xd[4]=xs[4];xd[5]=xs[5];
+    xd[6]=xs[6];xd[7]=xs[7];xd[8]=xs[8];
+}
+
+/*!
+ * Scalar product: Transpose(A)*B.
+ */
+inline double db_ScalarProduct4(const double A[4],const double B[4])
+{
+    return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+A[3]*B[3]);
+}
+/*!
+ * Scalar product: Transpose(A)*B.
+ */
+inline double db_ScalarProduct7(const double A[7],const double B[7])
+{
+    return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+
+        A[3]*B[3]+A[4]*B[4]+A[5]*B[5]+
+        A[6]*B[6]);
+}
+/*!
+ * Scalar product: Transpose(A)*B.
+ */
+inline double db_ScalarProduct9(const double A[9],const double B[9])
+{
+    return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+
+        A[3]*B[3]+A[4]*B[4]+A[5]*B[5]+
+        A[6]*B[6]+A[7]*B[7]+A[8]*B[8]);
+}
+/*!
+ * Vector addition: S=A+B.
+ */
+inline void db_AddVectors6(double S[6],const double A[6],const double B[6])
+{
+    S[0]=A[0]+B[0]; S[1]=A[1]+B[1]; S[2]=A[2]+B[2]; S[3]=A[3]+B[3]; S[4]=A[4]+B[4];
+    S[5]=A[5]+B[5];
+}
+/*!
+ * Multiplication: C(3x1)=A(3x3)*B(3x1).
+ */
+inline void db_Multiply3x3_3x1(double y[3],const double A[9],const double x[3])
+{
+    y[0]=A[0]*x[0]+A[1]*x[1]+A[2]*x[2];
+    y[1]=A[3]*x[0]+A[4]*x[1]+A[5]*x[2];
+    y[2]=A[6]*x[0]+A[7]*x[1]+A[8]*x[2];
+}
+inline void db_Multiply3x3_3x3(double C[9], const double A[9],const double B[9])
+{
+    C[0]=A[0]*B[0]+A[1]*B[3]+A[2]*B[6];
+    C[1]=A[0]*B[1]+A[1]*B[4]+A[2]*B[7];
+    C[2]=A[0]*B[2]+A[1]*B[5]+A[2]*B[8];
+
+    C[3]=A[3]*B[0]+A[4]*B[3]+A[5]*B[6];
+    C[4]=A[3]*B[1]+A[4]*B[4]+A[5]*B[7];
+    C[5]=A[3]*B[2]+A[4]*B[5]+A[5]*B[8];
+
+    C[6]=A[6]*B[0]+A[7]*B[3]+A[8]*B[6];
+    C[7]=A[6]*B[1]+A[7]*B[4]+A[8]*B[7];
+    C[8]=A[6]*B[2]+A[7]*B[5]+A[8]*B[8];
+}
+/*!
+ * Multiplication: C(4x1)=A(4x4)*B(4x1).
+ */
+inline void db_Multiply4x4_4x1(double y[4],const double A[16],const double x[4])
+{
+    y[0]=A[0]*x[0]+A[1]*x[1]+A[2]*x[2]+A[3]*x[3];
+    y[1]=A[4]*x[0]+A[5]*x[1]+A[6]*x[2]+A[7]*x[3];
+    y[2]=A[8]*x[0]+A[9]*x[1]+A[10]*x[2]+A[11]*x[3];
+    y[3]=A[12]*x[0]+A[13]*x[1]+A[14]*x[2]+A[15]*x[3];
+}
+/*!
+ * Scalar multiplication in place: A(3)=mult*A(3).
+ */
+inline void db_MultiplyScalar3(double *A,double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+}
+
+/*!
+ * Scalar multiplication: A(3)=mult*B(3).
+ */
+inline void db_MultiplyScalarCopy3(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+
+/*!
+ * Scalar multiplication: A(4)=mult*B(4).
+ */
+inline void db_MultiplyScalarCopy4(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+/*!
+ * Scalar multiplication: A(7)=mult*B(7).
+ */
+inline void db_MultiplyScalarCopy7(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+/*!
+ * Scalar multiplication: A(9)=mult*B(9).
+ */
+inline void db_MultiplyScalarCopy9(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+
+/*!
+ * \defgroup LMImageBasicUtilities (LM) Basic Image Utility Functions
+
+ Images in db are simply 2D arrays of unsigned char or float types.
+ Only the very basic operations are supported: allocation/deallocation,
+copying, simple pyramid construction and LUT warping. These images are used
+by db_CornerDetector_u and db_Matcher_u. The db_Image class is an attempt
+to wrap these images. It has not been tested well.
+
+ */
+/*\{*/
+/*!
+ * Given a float image array, allocates and returns the set of row poiners.
+ * \param im    image pointer
+ * \param w     image width
+ * \param h     image height
+ */
+DB_API float** db_SetupImageReferences_f(float *im,int w,int h);
+/*!
+ * Allocate a float image.
+ * Note: for feature detection images must be overallocated by 256 bytes.
+ * \param w                 width
+ * \param h                 height
+ * \param over_allocation   allocate this many extra bytes at the end
+ * \return row array pointer
+ */
+DB_API float** db_AllocImage_f(int w,int h,int over_allocation=256);
+/*!
+ * Free a float image
+ * \param img   row array pointer
+ * \param h     image height (number of rows)
+ */
+DB_API void db_FreeImage_f(float **img,int h);
+/*!
+ * Given an unsigned char image array, allocates and returns the set of row poiners.
+ * \param im    image pointer
+ * \param w     image width
+ * \param h     image height
+ */
+DB_API unsigned char** db_SetupImageReferences_u(unsigned char *im,int w,int h);
+/*!
+ * Allocate an unsigned char image.
+ * Note: for feature detection images must be overallocated by 256 bytes.
+ * \param w                 width
+ * \param h                 height
+ * \param over_allocation   allocate this many extra bytes at the end
+ * \return row array pointer
+ */
+DB_API unsigned char** db_AllocImage_u(int w,int h,int over_allocation=256);
+/*!
+ * Free an unsigned char image
+ * \param img   row array pointer
+ * \param h     image height (number of rows)
+ */
+DB_API void db_FreeImage_u(unsigned char **img,int h);
+
+/*!
+ Copy an image from s to d. Both s and d must be pre-allocated at of the same size.
+ Copy is done row by row.
+ \param s   source
+ \param d   destination
+ \param w   width
+ \param h   height
+ \param over_allocation copy this many bytes after the end of the last line
+ */
+DB_API void db_CopyImage_u(unsigned char **d,const unsigned char * const *s,int w,int h,int over_allocation=0);
+
+DB_API inline unsigned char db_BilinearInterpolation(double y, double x, const unsigned char * const * v)
+{
+    int floor_x=(int) x;
+    int floor_y=(int) y;
+
+    int ceil_x=floor_x+1;
+    int ceil_y=floor_y+1;
+
+    unsigned char f00 = v[floor_y][floor_x];
+    unsigned char f01 = v[floor_y][ceil_x];
+    unsigned char f10 = v[ceil_y][floor_x];
+    unsigned char f11 = v[ceil_y][ceil_x];
+
+    double xl = x-floor_x;
+    double yl = y-floor_y;
+
+    return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl);
+}
+/*\}*/
+/*!
+ * \ingroup LMRotation
+ * Compute an incremental rotation matrix using the update dx=[sin(phi) sin(ohm) sin(kap)]
+ */
+inline void db_IncrementalRotationMatrix(double R[9],const double dx[3])
+{
+    double sp,so,sk,om_sp2,om_so2,om_sk2,cp,co,ck,sp_so,cp_so;
+
+    /*Store sines*/
+    sp=dx[0]; so=dx[1]; sk=dx[2];
+    om_sp2=1.0-sp*sp;
+    om_so2=1.0-so*so;
+    om_sk2=1.0-sk*sk;
+    /*Compute cosines*/
+    cp=(om_sp2>=0.0)?sqrt(om_sp2):1.0;
+    co=(om_so2>=0.0)?sqrt(om_so2):1.0;
+    ck=(om_sk2>=0.0)?sqrt(om_sk2):1.0;
+    /*Compute matrix*/
+    sp_so=sp*so;
+    cp_so=cp*so;
+    R[0]=sp_so*sk+cp*ck; R[1]=co*sk; R[2]=cp_so*sk-sp*ck;
+    R[3]=sp_so*ck-cp*sk; R[4]=co*ck; R[5]=cp_so*ck+sp*sk;
+    R[6]=sp*co;          R[7]= -so;  R[8]=cp*co;
+}
+/*!
+ * Zero out 2 vector in place.
+ */
+void inline db_Zero2(double x[2])
+{
+    x[0]=x[1]=0;
+}
+/*!
+ * Zero out 3 vector in place.
+ */
+void inline db_Zero3(double x[3])
+{
+    x[0]=x[1]=x[2]=0;
+}
+/*!
+ * Zero out 4 vector in place.
+ */
+void inline db_Zero4(double x[4])
+{
+    x[0]=x[1]=x[2]=x[3]=0;
+}
+/*!
+ * Zero out 9 vector in place.
+ */
+void inline db_Zero9(double x[9])
+{
+    x[0]=x[1]=x[2]=x[3]=x[4]=x[5]=x[6]=x[7]=x[8]=0;
+}
+
+#define DB_WARP_FAST        0
+#define DB_WARP_BILINEAR    1
+
+/*!
+ * Perform a look-up table warp.
+ * The LUTs must be float images of the same size as source image.
+ * The source value x_s is determined from destination (x_d,y_d) through lut_x
+ * and y_s is determined from lut_y:
+   \code
+   x_s = lut_x[y_d][x_d];
+   y_s = lut_y[y_d][x_d];
+   \endcode
+
+ * \param src   source image
+ * \param dst   destination image
+ * \param w     width
+ * \param h     height
+ * \param lut_x LUT for x
+ * \param lut_y LUT for y
+ * \param type  warp type (DB_WARP_FAST or DB_WARP_BILINEAR)
+ */
+DB_API void db_WarpImageLut_u(const unsigned char * const * src,unsigned char ** dst, int w, int h,
+                               const float * const * lut_x, const float * const * lut_y, int type=DB_WARP_BILINEAR);
+
+DB_API void db_PrintDoubleVector(double *a,long size);
+DB_API void db_PrintDoubleMatrix(double *a,long rows,long cols);
+
+#include "db_utilities_constants.h"
+#include "db_utilities_algebra.h"
+#include "db_utilities_indexing.h"
+#include "db_utilities_linalg.h"
+#include "db_utilities_poly.h"
+#include "db_utilities_geometry.h"
+#include "db_utilities_random.h"
+#include "db_utilities_rotation.h"
+#include "db_utilities_camera.h"
+
+#define DB_INVALID (-1)
+
+
+#endif /* DB_UTILITIES_H */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_algebra.h b/jni/feature_stab/db_vlvm/db_utilities_algebra.h
new file mode 100644
index 000000000..2aedd74d5
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_algebra.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_algebra.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_ALGEBRA
+#define DB_UTILITIES_ALGEBRA
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMAlgebra (LM) Algebra utilities
+ */
+/*\{*/
+
+inline void db_HomogenousNormalize3(double *x)
+{
+    db_MultiplyScalar3(x,db_SafeSqrtReciprocal(db_SquareSum3(x)));
+}
+
+/*\}*/
+
+#endif /* DB_UTILITIES_ALGEBRA */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_camera.cpp b/jni/feature_stab/db_vlvm/db_utilities_camera.cpp
new file mode 100644
index 000000000..dceba9b62
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_camera.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_camera.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities_camera.h"
+#include "db_utilities.h"
+#include <assert.h>
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+void db_Approx3DCalMat(double K[9],double Kinv[9],int im_width,int im_height,double f_correction,int field)
+{
+    double iw,ih,av_size,field_fact;
+
+    if(field) field_fact=2.0;
+    else field_fact=1.0;
+
+    iw=(double)im_width;
+    ih=(double)(im_height*field_fact);
+    av_size=(iw+ih)/2.0;
+    K[0]=f_correction*av_size;
+    K[1]=0;
+    K[2]=iw/2.0;
+    K[3]=0;
+    K[4]=f_correction*av_size/field_fact;
+    K[5]=ih/2.0/field_fact;
+    K[6]=0;
+    K[7]=0;
+    K[8]=1;
+
+    db_InvertCalibrationMatrix(Kinv,K);
+}
diff --git a/jni/feature_stab/db_vlvm/db_utilities_camera.h b/jni/feature_stab/db_vlvm/db_utilities_camera.h
new file mode 100644
index 000000000..26ba4420a
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_camera.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_camera.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_CAMERA
+#define DB_UTILITIES_CAMERA
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMCamera (LM) Camera Utilities
+ */
+/*\{*/
+
+#include "db_utilities.h"
+
+#define DB_RADDISTMODE_BOUGEUT  4
+#define DB_RADDISTMODE_2NDORDER 5
+#define DB_RADDISTMODE_IDENTITY 6
+
+/*!
+Give reasonable guess of the calibration matrix for normalization purposes.
+Use real K matrix when doing real geometry.
+focal length = (w+h)/2.0*f_correction.
+\param K            calibration matrix (out)
+\param Kinv         inverse of K (out)
+\param im_width     image width
+\param im_height    image height
+\param f_correction focal length correction factor
+\param field        set to 1 if this is a field image (fy = fx/2)
+\return K(3x3) intrinsic calibration matrix
+*/
+DB_API void db_Approx3DCalMat(double K[9],double Kinv[9],int im_width,int im_height,double f_correction=1.0,int field=0);
+
+/*!
+ Make a 2x2 identity matrix
+ */
+void inline db_Identity2x2(double A[4])
+{
+    A[0]=1;A[1]=0;
+    A[2]=0;A[3]=1;
+}
+/*!
+ Make a 3x3 identity matrix
+ */
+void inline db_Identity3x3(double A[9])
+{
+    A[0]=1;A[1]=0;A[2]=0;
+    A[3]=0;A[4]=1;A[5]=0;
+    A[6]=0;A[7]=0;A[8]=1;
+}
+/*!
+ Invert intrinsic calibration matrix K(3x3)
+ If fx or fy is 0, I is returned.
+ */
+void inline db_InvertCalibrationMatrix(double Kinv[9],const double K[9])
+{
+    double a,b,c,d,e,f,ainv,dinv,adinv;
+
+    a=K[0];b=K[1];c=K[2];d=K[4];e=K[5];f=K[8];
+    if((a==0.0)||(d==0.0)) db_Identity3x3(Kinv);
+    else
+    {
+        Kinv[3]=0.0;
+        Kinv[6]=0.0;
+        Kinv[7]=0.0;
+        Kinv[8]=1.0;
+
+        ainv=1.0/a;
+        dinv=1.0/d;
+        adinv=ainv*dinv;
+        Kinv[0]=f*ainv;
+        Kinv[1]= -b*f*adinv;
+        Kinv[2]=(b*e-c*d)*adinv;
+        Kinv[4]=f*dinv;
+        Kinv[5]= -e*dinv;
+    }
+}
+/*!
+ De-homogenize image point: xd(1:2) = xs(1:2)/xs(3).
+ If xs(3) is 0, xd will become 0
+ \param xd  destination point
+ \param xs  source point
+ */
+void inline db_DeHomogenizeImagePoint(double xd[2],const double xs[3])
+{
+    double temp,div;
+
+    temp=xs[2];
+    if(temp!=0)
+    {
+        div=1.0/temp;
+        xd[0]=xs[0]*div;xd[1]=xs[1]*div;
+    }
+    else
+    {
+        xd[0]=0.0;xd[1]=0.0;
+    }
+}
+
+
+/*!
+ Orthonormalize 3D rotation R
+ */
+inline void db_OrthonormalizeRotation(double R[9])
+{
+    double s,mult;
+    /*Normalize first vector*/
+    s=db_sqr(R[0])+db_sqr(R[1])+db_sqr(R[2]);
+    mult=sqrt(1.0/(s?s:1));
+    R[0]*=mult; R[1]*=mult; R[2]*=mult;
+    /*Subtract scalar product from second vector*/
+    s=R[0]*R[3]+R[1]*R[4]+R[2]*R[5];
+    R[3]-=s*R[0]; R[4]-=s*R[1]; R[5]-=s*R[2];
+    /*Normalize second vector*/
+    s=db_sqr(R[3])+db_sqr(R[4])+db_sqr(R[5]);
+    mult=sqrt(1.0/(s?s:1));
+    R[3]*=mult; R[4]*=mult; R[5]*=mult;
+    /*Get third vector by vector product*/
+    R[6]=R[1]*R[5]-R[4]*R[2];
+    R[7]=R[2]*R[3]-R[5]*R[0];
+    R[8]=R[0]*R[4]-R[3]*R[1];
+}
+/*!
+Update a rotation with the update dx=[sin(phi) sin(ohm) sin(kap)]
+*/
+inline void db_UpdateRotation(double R_p_dx[9],double R[9],const double dx[3])
+{
+    double R_temp[9];
+    /*Update rotation*/
+    db_IncrementalRotationMatrix(R_temp,dx);
+    db_Multiply3x3_3x3(R_p_dx,R_temp,R);
+}
+/*!
+ Compute xp = Hx for inhomogenous image points.
+ */
+inline void db_ImageHomographyInhomogenous(double xp[2],const double H[9],const double x[2])
+{
+    double x3,m;
+
+    x3=H[6]*x[0]+H[7]*x[1]+H[8];
+    if(x3!=0.0)
+    {
+        m=1.0/x3;
+        xp[0]=m*(H[0]*x[0]+H[1]*x[1]+H[2]);
+        xp[1]=m*(H[3]*x[0]+H[4]*x[1]+H[5]);
+    }
+    else
+    {
+        xp[0]=xp[1]=0.0;
+    }
+}
+inline double db_FocalFromCamRotFocalHomography(const double H[9])
+{
+    double k1,k2;
+
+    k1=db_sqr(H[2])+db_sqr(H[5]);
+    k2=db_sqr(H[6])+db_sqr(H[7]);
+    if(k1>=k2)
+    {
+        return(db_SafeSqrt(db_SafeDivision(k1,1.0-db_sqr(H[8]))));
+    }
+    else
+    {
+        return(db_SafeSqrt(db_SafeDivision(1.0-db_sqr(H[8]),k2)));
+    }
+}
+
+inline double db_FocalAndRotFromCamRotFocalHomography(double R[9],const double H[9])
+{
+    double back,fi;
+
+    back=db_FocalFromCamRotFocalHomography(H);
+    fi=db_SafeReciprocal(back);
+    R[0]=H[0];      R[1]=H[1];      R[2]=fi*H[2];
+    R[3]=H[3];      R[4]=H[4];      R[5]=fi*H[5];
+    R[6]=back*H[6]; R[7]=back*H[7]; R[8]=H[8];
+    return(back);
+}
+/*!
+Compute Jacobian at zero of three coordinates dR*x with
+respect to the update dR([sin(phi) sin(ohm) sin(kap)]) given x.
+
+The Jacobian at zero of the homogenous coordinates with respect to
+    [sin(phi) sin(ohm) sin(kap)] is
+\code
+    [-rx2   0   rx1 ]
+    [  0   rx2 -rx0 ]
+    [ rx0 -rx1   0  ].
+\endcode
+
+*/
+inline void db_JacobianOfRotatedPointStride(double J[9],const double x[3],int stride)
+{
+    /*The Jacobian at zero of the homogenous coordinates with respect to
+    [sin(phi) sin(ohm) sin(kap)] is
+    [-rx2   0   rx1 ]
+    [  0   rx2 -rx0 ]
+    [ rx0 -rx1   0  ]*/
+
+    J[0]= -x[stride<<1];
+    J[1]=0;
+    J[2]=  x[stride];
+    J[3]=0;
+    J[4]=  x[stride<<1];
+    J[5]= -x[0];
+    J[6]=  x[0];
+    J[7]= -x[stride];
+    J[8]=0;
+}
+/*!
+ Invert an affine (if possible)
+ \param Hinv    inverted matrix
+ \param H       input matrix
+ \return true if success and false if matrix is ill-conditioned (det < 1e-7)
+ */
+inline bool db_InvertAffineTransform(double Hinv[9],const double H[9])
+{
+    double det=H[0]*H[4]-H[3]*H[1];
+    if (det<1e-7)
+    {
+        db_Copy9(Hinv,H);
+        return false;
+    }
+    else
+    {
+        Hinv[0]=H[4]/det;
+        Hinv[1]=-H[1]/det;
+        Hinv[3]=-H[3]/det;
+        Hinv[4]=H[0]/det;
+        Hinv[2]= -Hinv[0]*H[2]-Hinv[1]*H[5];
+        Hinv[5]= -Hinv[3]*H[2]-Hinv[4]*H[5];
+    }
+    return true;
+}
+
+/*!
+Update of upper 2x2 is multiplication by
+\code
+[s 0][ cos(theta) sin(theta)]
+[0 s][-sin(theta) cos(theta)]
+\endcode
+*/
+inline void db_MultiplyScaleOntoImageHomography(double H[9],double s)
+{
+
+    H[0]*=s;
+    H[1]*=s;
+    H[3]*=s;
+    H[4]*=s;
+}
+/*!
+Update of upper 2x2 is multiplication by
+\code
+[s 0][ cos(theta) sin(theta)]
+[0 s][-sin(theta) cos(theta)]
+\endcode
+*/
+inline void db_MultiplyRotationOntoImageHomography(double H[9],double theta)
+{
+    double c,s,H0,H1;
+
+
+    c=cos(theta);
+    s=db_SafeSqrt(1.0-db_sqr(c));
+    H0=  c*H[0]+s*H[3];
+    H[3]= -s*H[0]+c*H[3];
+    H[0]=H0;
+    H1=c*H[1]+s*H[4];
+    H[4]= -s*H[1]+c*H[4];
+    H[1]=H1;
+}
+
+inline void db_UpdateImageHomographyAffine(double H_p_dx[9],const double H[9],const double dx[6])
+{
+    db_AddVectors6(H_p_dx,H,dx);
+    db_Copy3(H_p_dx+6,H+6);
+}
+
+inline void db_UpdateImageHomographyProjective(double H_p_dx[9],const double H[9],const double dx[8],int frozen_coord)
+{
+    int i,j;
+
+    for(j=0,i=0;i<9;i++)
+    {
+        if(i!=frozen_coord)
+        {
+            H_p_dx[i]=H[i]+dx[j];
+            j++;
+        }
+        else H_p_dx[i]=H[i];
+    }
+}
+
+inline void db_UpdateRotFocalHomography(double H_p_dx[9],const double H[9],const double dx[4])
+{
+    double f,fp,fpi;
+    double R[9],dR[9];
+
+    /*Updated matrix is diag(f+df,f+df)*dR*R*diag(1/(f+df),1/(f+df),1)*/
+    f=db_FocalAndRotFromCamRotFocalHomography(R,H);
+    db_IncrementalRotationMatrix(dR,dx);
+    db_Multiply3x3_3x3(H_p_dx,dR,R);
+    fp=f+dx[3];
+    fpi=db_SafeReciprocal(fp);
+    H_p_dx[2]*=fp;
+    H_p_dx[5]*=fp;
+    H_p_dx[6]*=fpi;
+    H_p_dx[7]*=fpi;
+}
+
+/*\}*/
+#endif /* DB_UTILITIES_CAMERA */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_constants.h b/jni/feature_stab/db_vlvm/db_utilities_constants.h
new file mode 100644
index 000000000..07565efd2
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_constants.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_constants.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_CONSTANTS
+#define DB_UTILITIES_CONSTANTS
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+/****************Constants********************/
+#define DB_E             2.7182818284590452354
+#define DB_LOG2E         1.4426950408889634074
+#define DB_LOG10E        0.43429448190325182765
+#define DB_LN2           0.69314718055994530942
+#define DB_LN10          2.30258509299404568402
+#define DB_PI            3.1415926535897932384626433832795
+#define DB_PI_2          1.57079632679489661923
+#define DB_PI_4          0.78539816339744830962
+#define DB_1_PI          0.31830988618379067154
+#define DB_2_PI          0.63661977236758134308
+#define DB_SQRTPI        1.7724538509055160272981674833411
+#define DB_SQRT_2PI      2.506628274631000502415765284811
+#define DB_2_SQRTPI      1.12837916709551257390
+#define DB_SQRT2         1.41421356237309504880
+#define DB_SQRT3         1.7320508075688772935274463415059
+#define DB_SQRT1_2       0.70710678118654752440
+#define DB_EPS           2.220446049250313e-016 /* for 32 bit double */
+
+/****************Default Parameters********************/
+/*Preemptive ransac parameters*/
+#define DB_DEFAULT_NR_SAMPLES 500
+#define DB_DEFAULT_CHUNK_SIZE 100
+#define DB_DEFAULT_GROUP_SIZE 10
+
+/*Optimisation parameters*/
+#define DB_DEFAULT_MAX_POINTS 1000
+#define DB_DEFAULT_MAX_ITERATIONS 25
+#define DB_DEFAULT_IMP_REQ 0.001
+
+/*Feature standard deviation parameters*/
+#define DB_POINT_STANDARDDEV (1.0/(826.0)) /*1 pixel for CIF (fraction of (image width+image height)/2)*/
+#define DB_OUTLIER_THRESHOLD 3.0 /*In number of DB_POINT_STANDARDDEV's*/
+#define DB_WORST_CASE 50.0 /*In number of DB_POINT_STANDARDDEV's*/
+
+/*Front-end parameters*/
+#define DB_DEFAULT_TARGET_NR_CORNERS 5000
+#define DB_DEFAULT_NR_FEATURE_BLOCKS 10
+#define DB_DEFAULT_ABS_CORNER_THRESHOLD 50000000.0
+#define DB_DEFAULT_REL_CORNER_THRESHOLD 0.00005
+#define DB_DEFAULT_MAX_DISPARITY 0.1
+#define DB_DEFAULT_NO_DISPARITY -1.0
+#define DB_DEFAULT_MAX_TRACK_LENGTH 300
+
+#define DB_DEFAULT_MAX_NR_CAMERAS 1000
+
+#define DB_DEFAULT_TRIPLE_STEP 2
+#define DB_DEFAULT_DOUBLE_STEP 2
+#define DB_DEFAULT_SINGLE_STEP 1
+#define DB_DEFAULT_NR_SINGLES 10
+#define DB_DEFAULT_NR_DOUBLES 1
+#define DB_DEFAULT_NR_TRIPLES 1
+
+#define DB_DEFAULT_TRIFOCAL_FOUR_STEPS 40
+
+#define DB_DEFAULT_EPIPOLAR_ERROR 1 /*in pixels*/
+
+////////////////////////// DOXYGEN /////////////////////
+
+/*!
+ * \def DB_DEFAULT_GROUP_SIZE
+ * \ingroup LMRobust
+ * \brief Default group size for db_PreemptiveRansac class.
+ * Group size is the number of observation costs multiplied together
+ * before a log of the product is added to the total cost.
+*/
+
+/*!
+ * \def DB_DEFAULT_TARGET_NR_CORNERS
+ * \ingroup FeatureDetection
+ * \brief Default target number of corners
+*/
+/*!
+ * \def DB_DEFAULT_NR_FEATURE_BLOCKS
+ * \ingroup FeatureDetection
+ * \brief Default number of regions (horizontal or vertical) that are considered separately
+ * for feature detection. The greater the number, the more uniform the distribution of
+ * detected features.
+*/
+/*!
+ * \def DB_DEFAULT_ABS_CORNER_THRESHOLD
+ * \ingroup FeatureDetection
+ * \brief Absolute feature strength threshold.
+*/
+/*!
+ * \def DB_DEFAULT_REL_CORNER_THRESHOLD
+ * \ingroup FeatureDetection
+ * \brief Relative feature strength threshold.
+*/
+/*!
+ * \def DB_DEFAULT_MAX_DISPARITY
+ * \ingroup FeatureMatching
+ * \brief Maximum disparity (as fraction of image size) allowed in feature matching
+*/
+ /*!
+ * \def DB_DEFAULT_NO_DISPARITY
+ * \ingroup FeatureMatching
+ * \brief Indicates that vertical disparity is the same as horizontal disparity.
+*/
+///////////////////////////////////////////////////////////////////////////////////
+ /*!
+ * \def DB_E
+ * \ingroup LMBasicUtilities
+ * \brief e
+*/
+ /*!
+ * \def DB_LOG2E
+ * \ingroup LMBasicUtilities
+ * \brief log2(e)
+*/
+ /*!
+ * \def DB_LOG10E
+ * \ingroup LMBasicUtilities
+ * \brief log10(e)
+*/
+ /*!
+ * \def DB_LOG10E
+ * \ingroup LMBasicUtilities
+ * \brief log10(e)
+*/
+/*!
+ * \def DB_LN2
+ * \ingroup LMBasicUtilities
+ * \brief ln(2)
+*/
+/*!
+ * \def DB_LN10
+ * \ingroup LMBasicUtilities
+ * \brief ln(10)
+*/
+/*!
+ * \def DB_PI
+ * \ingroup LMBasicUtilities
+ * \brief Pi
+*/
+/*!
+ * \def DB_PI_2
+ * \ingroup LMBasicUtilities
+ * \brief Pi/2
+*/
+/*!
+ * \def DB_PI_4
+ * \ingroup LMBasicUtilities
+ * \brief Pi/4
+*/
+/*!
+ * \def DB_1_PI
+ * \ingroup LMBasicUtilities
+ * \brief 1/Pi
+*/
+/*!
+ * \def DB_2_PI
+ * \ingroup LMBasicUtilities
+ * \brief 2/Pi
+*/
+/*!
+ * \def DB_SQRTPI
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(Pi)
+*/
+/*!
+ * \def DB_SQRT_2PI
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(2*Pi)
+*/
+/*!
+ * \def DB_SQRT2
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(2)
+*/
+/*!
+ * \def DB_SQRT3
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(3)
+*/
+/*!
+ * \def DB_SQRT1_2
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(1/2)
+*/
+#endif /* DB_UTILITIES_CONSTANTS */
+
+
diff --git a/jni/feature_stab/db_vlvm/db_utilities_geometry.h b/jni/feature_stab/db_vlvm/db_utilities_geometry.h
new file mode 100644
index 000000000..f21558467
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_geometry.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_geometry.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_GEOMETRY_H
+#define DB_UTILITIES_GEOMETRY_H
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*! Get the inhomogenous 2D-point centroid of nr_point inhomogenous
+points in X*/
+inline void db_PointCentroid2D(double c[2],const double *X,int nr_points)
+{
+    int i;
+    double cx,cy,m;
+
+    cx=0;cy=0;
+    for(i=0;i<nr_points;i++)
+    {
+        cx+= *X++;
+        cy+= *X++;
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+    }
+    else c[0]=c[1]=0;
+}
+
+inline void db_PointCentroid2D(double c[2],const double * const *X,int nr_points)
+{
+    int i;
+    double cx,cy,m;
+    const double *temp;
+
+    cx=0;cy=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp= *X++;
+        cx+=temp[0];
+        cy+=temp[1];
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+    }
+    else c[0]=c[1]=0;
+}
+
+/*! Get the inhomogenous 3D-point centroid of nr_point inhomogenous
+points in X*/
+inline void db_PointCentroid3D(double c[3],const double *X,int nr_points)
+{
+    int i;
+    double cx,cy,cz,m;
+
+    cx=0;cy=0;cz=0;
+    for(i=0;i<nr_points;i++)
+    {
+        cx+= *X++;
+        cy+= *X++;
+        cz+= *X++;
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+        c[2]=cz*m;
+    }
+    else c[0]=c[1]=c[2]=0;
+}
+
+inline void db_PointCentroid3D(double c[3],const double * const *X,int nr_points)
+{
+    int i;
+    double cx,cy,cz,m;
+    const double *temp;
+
+    cx=0;cy=0;cz=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp= *X++;
+        cx+=temp[0];
+        cy+=temp[1];
+        cz+=temp[2];
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+        c[2]=cz*m;
+    }
+    else c[0]=c[1]=c[2]=0;
+}
+
+#endif /* DB_UTILITIES_GEOMETRY_H */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_indexing.cpp b/jni/feature_stab/db_vlvm/db_utilities_indexing.cpp
new file mode 100644
index 000000000..30ce03aa6
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_indexing.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_indexing.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities_indexing.h"
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+void db_Zero(double *d,long nr)
+{
+    long i;
+    for(i=0;i<nr;i++) d[i]=0.0;
+}
+
+/*This routine breaks number in source into values smaller and larger than
+a pivot element. Values equal to the pivot are ignored*/
+void db_LeanPartitionOnPivot(double pivot,double *dest,const double *source,long first,long last,long *first_equal,long *last_equal)
+{
+    double temp;
+    const double *s_point;
+    const double *s_top;
+    double *d_bottom;
+    double *d_top;
+
+    s_point=source+first;
+    s_top=source+last;
+    d_bottom=dest+first;
+    d_top=dest+last;
+
+    for(;s_point<=s_top;)
+    {
+        temp= *(s_point++);
+        if(temp<pivot) *(d_bottom++)=temp;
+        else if(temp>pivot) *(d_top--)=temp;
+    }
+    *first_equal=d_bottom-dest;
+    *last_equal=d_top-dest;
+}
+
+double db_LeanQuickSelect(const double *s,long nr_elements,long pos,double *temp)
+{
+  long first=0;
+  long last=nr_elements-1;
+  double pivot;
+  long first_equal,last_equal;
+  double *tempA;
+  double *tempB;
+  double *tempC;
+  const double *source;
+  double *dest;
+
+  tempA=temp;
+  tempB=temp+nr_elements;
+  source=s;
+  dest=tempA;
+
+  for(;last-first>2;)
+  {
+      pivot=db_TripleMedian(source[first],source[last],source[(first+last)/2]);
+      db_LeanPartitionOnPivot(pivot,dest,source,first,last,&first_equal,&last_equal);
+
+      if(first_equal>pos) last=first_equal-1;
+      else if(last_equal<pos) first=last_equal+1;
+      else
+      {
+        return(pivot);
+      }
+
+      /*Swap pointers*/
+      tempC=tempA;
+      tempA=tempB;
+      tempB=tempC;
+      source=tempB;
+      dest=tempA;
+  }
+  pivot=db_TripleMedian(source[first],source[last],source[(first+last)/2]);
+
+  return(pivot);
+}
+
+float* db_AlignPointer_f(float *p,unsigned long nr_bytes)
+{
+    float *ap;
+    unsigned long m;
+
+    m=((unsigned long)p)%nr_bytes;
+    if(m) ap=(float*) (((unsigned long)p)-m+nr_bytes);
+    else ap=p;
+    return(ap);
+}
+
+short* db_AlignPointer_s(short *p,unsigned long nr_bytes)
+{
+    short *ap;
+    unsigned long m;
+
+    m=((unsigned long)p)%nr_bytes;
+    if(m) ap=(short*) (((unsigned long)p)-m+nr_bytes);
+    else ap=p;
+    return(ap);
+}
diff --git a/jni/feature_stab/db_vlvm/db_utilities_indexing.h b/jni/feature_stab/db_vlvm/db_utilities_indexing.h
new file mode 100644
index 000000000..01eeb9ea2
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_indexing.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_indexing.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_INDEXING
+#define DB_UTILITIES_INDEXING
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+
+/*!
+ * \defgroup LMIndexing (LM) Indexing Utilities (Order Statistics, Matrix Operations)
+ */
+/*\{*/
+
+inline void db_SetupMatrixRefs(double **ar,long rows,long cols,double *a)
+{
+    long i;
+    for(i=0;i<rows;i++) ar[i]=&a[i*cols];
+}
+
+inline void db_SymmetricExtendUpperToLower(double **A,int rows,int cols)
+{
+    int i,j;
+    for(i=1;i<rows;i++) for(j=0;j<i;j++) A[i][j]=A[j][i];
+}
+
+void inline db_MultiplyMatrixVectorAtb(double *c,const double * const *At,const double *b,int arows,int acols)
+{
+    int i,j;
+    double acc;
+
+    for(i=0;i<arows;i++)
+    {
+        acc=0;
+        for(j=0;j<acols;j++) acc+=At[j][i]*b[j];
+        c[i]=acc;
+    }
+}
+
+inline void db_MultiplyMatricesAB(double **C,const double * const *A,const double * const *B,int arows,int acols,int bcols)
+{
+    int i,j,k;
+    double acc;
+
+    for(i=0;i<arows;i++) for(j=0;j<bcols;j++)
+    {
+        acc=0;
+        for(k=0;k<acols;k++) acc+=A[i][k]*B[k][j];
+        C[i][j]=acc;
+    }
+}
+
+inline void db_UpperMultiplyMatricesAtB(double **Cu,const double * const *At,const double * const *B,int arows,int acols,int bcols)
+{
+    int i,j,k;
+    double acc;
+
+    for(i=0;i<arows;i++) for(j=i;j<bcols;j++)
+    {
+        acc=0;
+        for(k=0;k<acols;k++) acc+=At[k][i]*B[k][j];
+        Cu[i][j]=acc;
+    }
+}
+
+DB_API void db_Zero(double *d,long nr);
+
+inline int db_MaxIndex2(double s[2])
+{
+    if(s[0]>=s[1]) return(0);
+    return(1);
+}
+
+inline int db_MaxIndex3(const double s[3])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    return(pos);
+}
+
+inline int db_MaxIndex4(const double s[4])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    return(pos);
+}
+
+inline int db_MaxIndex5(const double s[5])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    if(s[4]>best){best=s[4];pos=4;}
+    return(pos);
+}
+
+inline int db_MaxIndex6(const double s[6])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    if(s[4]>best){best=s[4];pos=4;}
+    if(s[5]>best){best=s[5];pos=5;}
+    return(pos);
+}
+
+inline int db_MaxIndex7(const double s[7])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    if(s[4]>best){best=s[4];pos=4;}
+    if(s[5]>best){best=s[5];pos=5;}
+    if(s[6]>best){best=s[6];pos=6;}
+    return(pos);
+}
+
+inline int db_MinIndex7(const double s[7])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]<best){best=s[1];pos=1;}
+    if(s[2]<best){best=s[2];pos=2;}
+    if(s[3]<best){best=s[3];pos=3;}
+    if(s[4]<best){best=s[4];pos=4;}
+    if(s[5]<best){best=s[5];pos=5;}
+    if(s[6]<best){best=s[6];pos=6;}
+    return(pos);
+}
+
+inline int db_MinIndex9(const double s[9])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]<best){best=s[1];pos=1;}
+    if(s[2]<best){best=s[2];pos=2;}
+    if(s[3]<best){best=s[3];pos=3;}
+    if(s[4]<best){best=s[4];pos=4;}
+    if(s[5]<best){best=s[5];pos=5;}
+    if(s[6]<best){best=s[6];pos=6;}
+    if(s[7]<best){best=s[7];pos=7;}
+    if(s[8]<best){best=s[8];pos=8;}
+    return(pos);
+}
+
+inline int db_MaxAbsIndex3(const double *s)
+{
+    double t,best;
+    int pos;
+
+    best=fabs(s[0]);pos=0;
+    t=fabs(s[1]);if(t>best){best=t;pos=1;}
+    t=fabs(s[2]);if(t>best){pos=2;}
+    return(pos);
+}
+
+inline int db_MaxAbsIndex9(const double *s)
+{
+    double t,best;
+    int pos;
+
+    best=fabs(s[0]);pos=0;
+    t=fabs(s[1]);if(t>best){best=t;pos=1;}
+    t=fabs(s[2]);if(t>best){best=t;pos=2;}
+    t=fabs(s[3]);if(t>best){best=t;pos=3;}
+    t=fabs(s[4]);if(t>best){best=t;pos=4;}
+    t=fabs(s[5]);if(t>best){best=t;pos=5;}
+    t=fabs(s[6]);if(t>best){best=t;pos=6;}
+    t=fabs(s[7]);if(t>best){best=t;pos=7;}
+    t=fabs(s[8]);if(t>best){best=t;pos=8;}
+    return(pos);
+}
+
+
+/*!
+Select ordinal pos (zero based) out of nr_elements in s.
+temp should point to alloced memory of at least nr_elements*2
+Optimized runtimes on 450MHz:
+\code
+  30 with   3 microsecs
+ 100 with  11 microsecs
+ 300 with  30 microsecs
+ 500 with  40 microsecs
+1000 with 100 microsecs
+5000 with 540 microsecs
+\endcode
+so the expected runtime is around
+(nr_elements/10) microseconds
+The total quickselect cost of splitting 500 hypotheses recursively
+is thus around 100 microseconds
+
+Does the same operation as std::nth_element().
+*/
+DB_API double db_LeanQuickSelect(const double *s,long nr_elements,long pos,double *temp);
+
+/*!
+ Median of 3 doubles
+ */
+inline double db_TripleMedian(double a,double b,double c)
+{
+    if(a>b)
+    {
+        if(c>a) return(a);
+        else if(c>b) return(c);
+        else return(b);
+    }
+    else
+    {
+        if(c>b) return(b);
+        else if(c>a) return(c);
+        else return(a);
+    }
+}
+
+/*!
+Align float pointer to nr_bytes by moving forward
+*/
+DB_API float* db_AlignPointer_f(float *p,unsigned long nr_bytes);
+
+/*!
+Align short pointer to nr_bytes by moving forward
+*/
+DB_API short* db_AlignPointer_s(short *p,unsigned long nr_bytes);
+
+#endif /* DB_UTILITIES_INDEXING */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_linalg.cpp b/jni/feature_stab/db_vlvm/db_utilities_linalg.cpp
new file mode 100644
index 000000000..8f68b303a
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_linalg.cpp
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_linalg.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities_linalg.h"
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+/*Cholesky-factorize symmetric positive definite 6 x 6 matrix A. Upper
+part of A is used from the input. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is 6-dimensional*/
+void db_CholeskyDecomp6x6(double A[36],double d[6])
+{
+    double s,temp;
+
+    /*[50 mult 35 add 6sqrt=85flops 6func]*/
+    /*i=0*/
+    s=A[0];
+    d[0]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[0]);
+    A[6]=A[1]*temp;
+    A[12]=A[2]*temp;
+    A[18]=A[3]*temp;
+    A[24]=A[4]*temp;
+    A[30]=A[5]*temp;
+    /*i=1*/
+    s=A[7]-A[6]*A[6];
+    d[1]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[1]);
+    A[13]=(A[8]-A[6]*A[12])*temp;
+    A[19]=(A[9]-A[6]*A[18])*temp;
+    A[25]=(A[10]-A[6]*A[24])*temp;
+    A[31]=(A[11]-A[6]*A[30])*temp;
+    /*i=2*/
+    s=A[14]-A[12]*A[12]-A[13]*A[13];
+    d[2]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[2]);
+    A[20]=(A[15]-A[12]*A[18]-A[13]*A[19])*temp;
+    A[26]=(A[16]-A[12]*A[24]-A[13]*A[25])*temp;
+    A[32]=(A[17]-A[12]*A[30]-A[13]*A[31])*temp;
+    /*i=3*/
+    s=A[21]-A[18]*A[18]-A[19]*A[19]-A[20]*A[20];
+    d[3]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[3]);
+    A[27]=(A[22]-A[18]*A[24]-A[19]*A[25]-A[20]*A[26])*temp;
+    A[33]=(A[23]-A[18]*A[30]-A[19]*A[31]-A[20]*A[32])*temp;
+    /*i=4*/
+    s=A[28]-A[24]*A[24]-A[25]*A[25]-A[26]*A[26]-A[27]*A[27];
+    d[4]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[4]);
+    A[34]=(A[29]-A[24]*A[30]-A[25]*A[31]-A[26]*A[32]-A[27]*A[33])*temp;
+    /*i=5*/
+    s=A[35]-A[30]*A[30]-A[31]*A[31]-A[32]*A[32]-A[33]*A[33]-A[34]*A[34];
+    d[5]=((s>0.0)?sqrt(s):1.0);
+}
+
+/*Cholesky-factorize symmetric positive definite n x n matrix A.Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is n-dimensional*/
+void db_CholeskyDecompSeparateDiagonal(double **A,double *d,int n)
+{
+    int i,j,k;
+    double s;
+    double temp = 0.0;
+
+    for(i=0;i<n;i++) for(j=i;j<n;j++)
+    {
+        if(i==j) s=d[i];
+        else s=A[i][j];
+        for(k=i-1;k>=0;k--) s-=A[i][k]*A[j][k];
+        if(i==j)
+        {
+            d[i]=((s>0.0)?sqrt(s):1.0);
+            temp=db_SafeReciprocal(d[i]);
+        }
+        else A[j][i]=s*temp;
+    }
+}
+
+/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of an n x n matrix and the right hand side b. The vector b is unchanged*/
+void db_CholeskyBacksub(double *x,const double * const *A,const double *d,int n,const double *b)
+{
+    int i,k;
+    double s;
+
+    for(i=0;i<n;i++)
+    {
+        for(s=b[i],k=i-1;k>=0;k--) s-=A[i][k]*x[k];
+        x[i]=db_SafeDivision(s,d[i]);
+    }
+    for(i=n-1;i>=0;i--)
+    {
+        for(s=x[i],k=i+1;k<n;k++) s-=A[k][i]*x[k];
+        x[i]=db_SafeDivision(s,d[i]);
+    }
+}
+
+/*Cholesky-factorize symmetric positive definite 3 x 3 matrix A. Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as subdiagonal part in A
+and diagonal in d, which is 3-dimensional*/
+void db_CholeskyDecomp3x3SeparateDiagonal(double A[9],double d[3])
+{
+    double s,temp;
+
+    /*i=0*/
+    s=d[0];
+    d[0]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[0]);
+    A[3]=A[1]*temp;
+    A[6]=A[2]*temp;
+    /*i=1*/
+    s=d[1]-A[3]*A[3];
+    d[1]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[1]);
+    A[7]=(A[5]-A[3]*A[6])*temp;
+    /*i=2*/
+    s=d[2]-A[6]*A[6]-A[7]*A[7];
+    d[2]=((s>0.0)?sqrt(s):1.0);
+}
+
+/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 3 x 3 matrix and the right hand side b. The vector b is unchanged*/
+void db_CholeskyBacksub3x3(double x[3],const double A[9],const double d[3],const double b[3])
+{
+    /*[42 mult 30 add=72flops]*/
+    x[0]=db_SafeDivision(b[0],d[0]);
+    x[1]=db_SafeDivision((b[1]-A[3]*x[0]),d[1]);
+    x[2]=db_SafeDivision((b[2]-A[6]*x[0]-A[7]*x[1]),d[2]);
+    x[2]=db_SafeDivision(x[2],d[2]);
+    x[1]=db_SafeDivision((x[1]-A[7]*x[2]),d[1]);
+    x[0]=db_SafeDivision((x[0]-A[6]*x[2]-A[3]*x[1]),d[0]);
+}
+
+/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 6 x 6 matrix and the right hand side b. The vector b is unchanged*/
+void db_CholeskyBacksub6x6(double x[6],const double A[36],const double d[6],const double b[6])
+{
+    /*[42 mult 30 add=72flops]*/
+    x[0]=db_SafeDivision(b[0],d[0]);
+    x[1]=db_SafeDivision((b[1]-A[6]*x[0]),d[1]);
+    x[2]=db_SafeDivision((b[2]-A[12]*x[0]-A[13]*x[1]),d[2]);
+    x[3]=db_SafeDivision((b[3]-A[18]*x[0]-A[19]*x[1]-A[20]*x[2]),d[3]);
+    x[4]=db_SafeDivision((b[4]-A[24]*x[0]-A[25]*x[1]-A[26]*x[2]-A[27]*x[3]),d[4]);
+    x[5]=db_SafeDivision((b[5]-A[30]*x[0]-A[31]*x[1]-A[32]*x[2]-A[33]*x[3]-A[34]*x[4]),d[5]);
+    x[5]=db_SafeDivision(x[5],d[5]);
+    x[4]=db_SafeDivision((x[4]-A[34]*x[5]),d[4]);
+    x[3]=db_SafeDivision((x[3]-A[33]*x[5]-A[27]*x[4]),d[3]);
+    x[2]=db_SafeDivision((x[2]-A[32]*x[5]-A[26]*x[4]-A[20]*x[3]),d[2]);
+    x[1]=db_SafeDivision((x[1]-A[31]*x[5]-A[25]*x[4]-A[19]*x[3]-A[13]*x[2]),d[1]);
+    x[0]=db_SafeDivision((x[0]-A[30]*x[5]-A[24]*x[4]-A[18]*x[3]-A[12]*x[2]-A[6]*x[1]),d[0]);
+}
+
+
+void db_Orthogonalize6x7(double A[42],int orthonormalize)
+{
+    int i;
+    double ss[6];
+
+    /*Compute square sums of rows*/
+    ss[0]=db_SquareSum7(A);
+    ss[1]=db_SquareSum7(A+7);
+    ss[2]=db_SquareSum7(A+14);
+    ss[3]=db_SquareSum7(A+21);
+    ss[4]=db_SquareSum7(A+28);
+    ss[5]=db_SquareSum7(A+35);
+
+    ss[1]-=db_OrthogonalizePair7(A+7 ,A,ss[0]);
+    ss[2]-=db_OrthogonalizePair7(A+14,A,ss[0]);
+    ss[3]-=db_OrthogonalizePair7(A+21,A,ss[0]);
+    ss[4]-=db_OrthogonalizePair7(A+28,A,ss[0]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A,ss[0]);
+
+    /*Pivot on largest ss (could also be done on ss/(original_ss))*/
+    i=db_MaxIndex5(ss+1);
+    db_OrthogonalizationSwap7(A+7,i,ss+1);
+
+    ss[2]-=db_OrthogonalizePair7(A+14,A+7,ss[1]);
+    ss[3]-=db_OrthogonalizePair7(A+21,A+7,ss[1]);
+    ss[4]-=db_OrthogonalizePair7(A+28,A+7,ss[1]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A+7,ss[1]);
+
+    i=db_MaxIndex4(ss+2);
+    db_OrthogonalizationSwap7(A+14,i,ss+2);
+
+    ss[3]-=db_OrthogonalizePair7(A+21,A+14,ss[2]);
+    ss[4]-=db_OrthogonalizePair7(A+28,A+14,ss[2]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A+14,ss[2]);
+
+    i=db_MaxIndex3(ss+3);
+    db_OrthogonalizationSwap7(A+21,i,ss+3);
+
+    ss[4]-=db_OrthogonalizePair7(A+28,A+21,ss[3]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A+21,ss[3]);
+
+    i=db_MaxIndex2(ss+4);
+    db_OrthogonalizationSwap7(A+28,i,ss+4);
+
+    ss[5]-=db_OrthogonalizePair7(A+35,A+28,ss[4]);
+
+    if(orthonormalize)
+    {
+        db_MultiplyScalar7(A   ,db_SafeSqrtReciprocal(ss[0]));
+        db_MultiplyScalar7(A+7 ,db_SafeSqrtReciprocal(ss[1]));
+        db_MultiplyScalar7(A+14,db_SafeSqrtReciprocal(ss[2]));
+        db_MultiplyScalar7(A+21,db_SafeSqrtReciprocal(ss[3]));
+        db_MultiplyScalar7(A+28,db_SafeSqrtReciprocal(ss[4]));
+        db_MultiplyScalar7(A+35,db_SafeSqrtReciprocal(ss[5]));
+    }
+}
+
+void db_Orthogonalize8x9(double A[72],int orthonormalize)
+{
+    int i;
+    double ss[8];
+
+    /*Compute square sums of rows*/
+    ss[0]=db_SquareSum9(A);
+    ss[1]=db_SquareSum9(A+9);
+    ss[2]=db_SquareSum9(A+18);
+    ss[3]=db_SquareSum9(A+27);
+    ss[4]=db_SquareSum9(A+36);
+    ss[5]=db_SquareSum9(A+45);
+    ss[6]=db_SquareSum9(A+54);
+    ss[7]=db_SquareSum9(A+63);
+
+    ss[1]-=db_OrthogonalizePair9(A+9 ,A,ss[0]);
+    ss[2]-=db_OrthogonalizePair9(A+18,A,ss[0]);
+    ss[3]-=db_OrthogonalizePair9(A+27,A,ss[0]);
+    ss[4]-=db_OrthogonalizePair9(A+36,A,ss[0]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A,ss[0]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A,ss[0]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A,ss[0]);
+
+    /*Pivot on largest ss (could also be done on ss/(original_ss))*/
+    i=db_MaxIndex7(ss+1);
+    db_OrthogonalizationSwap9(A+9,i,ss+1);
+
+    ss[2]-=db_OrthogonalizePair9(A+18,A+9,ss[1]);
+    ss[3]-=db_OrthogonalizePair9(A+27,A+9,ss[1]);
+    ss[4]-=db_OrthogonalizePair9(A+36,A+9,ss[1]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A+9,ss[1]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+9,ss[1]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+9,ss[1]);
+
+    i=db_MaxIndex6(ss+2);
+    db_OrthogonalizationSwap9(A+18,i,ss+2);
+
+    ss[3]-=db_OrthogonalizePair9(A+27,A+18,ss[2]);
+    ss[4]-=db_OrthogonalizePair9(A+36,A+18,ss[2]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A+18,ss[2]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+18,ss[2]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+18,ss[2]);
+
+    i=db_MaxIndex5(ss+3);
+    db_OrthogonalizationSwap9(A+27,i,ss+3);
+
+    ss[4]-=db_OrthogonalizePair9(A+36,A+27,ss[3]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A+27,ss[3]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+27,ss[3]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+27,ss[3]);
+
+    i=db_MaxIndex4(ss+4);
+    db_OrthogonalizationSwap9(A+36,i,ss+4);
+
+    ss[5]-=db_OrthogonalizePair9(A+45,A+36,ss[4]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+36,ss[4]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+36,ss[4]);
+
+    i=db_MaxIndex3(ss+5);
+    db_OrthogonalizationSwap9(A+45,i,ss+5);
+
+    ss[6]-=db_OrthogonalizePair9(A+54,A+45,ss[5]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+45,ss[5]);
+
+    i=db_MaxIndex2(ss+6);
+    db_OrthogonalizationSwap9(A+54,i,ss+6);
+
+    ss[7]-=db_OrthogonalizePair9(A+63,A+54,ss[6]);
+
+    if(orthonormalize)
+    {
+        db_MultiplyScalar9(A   ,db_SafeSqrtReciprocal(ss[0]));
+        db_MultiplyScalar9(A+9 ,db_SafeSqrtReciprocal(ss[1]));
+        db_MultiplyScalar9(A+18,db_SafeSqrtReciprocal(ss[2]));
+        db_MultiplyScalar9(A+27,db_SafeSqrtReciprocal(ss[3]));
+        db_MultiplyScalar9(A+36,db_SafeSqrtReciprocal(ss[4]));
+        db_MultiplyScalar9(A+45,db_SafeSqrtReciprocal(ss[5]));
+        db_MultiplyScalar9(A+54,db_SafeSqrtReciprocal(ss[6]));
+        db_MultiplyScalar9(A+63,db_SafeSqrtReciprocal(ss[7]));
+    }
+}
+
+void db_NullVectorOrthonormal6x7(double x[7],const double A[42])
+{
+    int i;
+    double omss[7];
+    const double *B;
+
+    /*Pivot by choosing row of the identity matrix
+    (the one corresponding to column of A with smallest square sum)*/
+    omss[0]=db_SquareSum6Stride7(A);
+    omss[1]=db_SquareSum6Stride7(A+1);
+    omss[2]=db_SquareSum6Stride7(A+2);
+    omss[3]=db_SquareSum6Stride7(A+3);
+    omss[4]=db_SquareSum6Stride7(A+4);
+    omss[5]=db_SquareSum6Stride7(A+5);
+    omss[6]=db_SquareSum6Stride7(A+6);
+    i=db_MinIndex7(omss);
+    /*orthogonalize that row against all previous rows
+    and normalize it*/
+    B=A+i;
+    db_MultiplyScalarCopy7(x,A,-B[0]);
+    db_RowOperation7(x,A+7 ,B[7]);
+    db_RowOperation7(x,A+14,B[14]);
+    db_RowOperation7(x,A+21,B[21]);
+    db_RowOperation7(x,A+28,B[28]);
+    db_RowOperation7(x,A+35,B[35]);
+    x[i]+=1.0;
+    db_MultiplyScalar7(x,db_SafeSqrtReciprocal(1.0-omss[i]));
+}
+
+void db_NullVectorOrthonormal8x9(double x[9],const double A[72])
+{
+    int i;
+    double omss[9];
+    const double *B;
+
+    /*Pivot by choosing row of the identity matrix
+    (the one corresponding to column of A with smallest square sum)*/
+    omss[0]=db_SquareSum8Stride9(A);
+    omss[1]=db_SquareSum8Stride9(A+1);
+    omss[2]=db_SquareSum8Stride9(A+2);
+    omss[3]=db_SquareSum8Stride9(A+3);
+    omss[4]=db_SquareSum8Stride9(A+4);
+    omss[5]=db_SquareSum8Stride9(A+5);
+    omss[6]=db_SquareSum8Stride9(A+6);
+    omss[7]=db_SquareSum8Stride9(A+7);
+    omss[8]=db_SquareSum8Stride9(A+8);
+    i=db_MinIndex9(omss);
+    /*orthogonalize that row against all previous rows
+    and normalize it*/
+    B=A+i;
+    db_MultiplyScalarCopy9(x,A,-B[0]);
+    db_RowOperation9(x,A+9 ,B[9]);
+    db_RowOperation9(x,A+18,B[18]);
+    db_RowOperation9(x,A+27,B[27]);
+    db_RowOperation9(x,A+36,B[36]);
+    db_RowOperation9(x,A+45,B[45]);
+    db_RowOperation9(x,A+54,B[54]);
+    db_RowOperation9(x,A+63,B[63]);
+    x[i]+=1.0;
+    db_MultiplyScalar9(x,db_SafeSqrtReciprocal(1.0-omss[i]));
+}
+
diff --git a/jni/feature_stab/db_vlvm/db_utilities_linalg.h b/jni/feature_stab/db_vlvm/db_utilities_linalg.h
new file mode 100644
index 000000000..1f63d4e57
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_linalg.h
@@ -0,0 +1,802 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_linalg.h,v 1.5 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_LINALG
+#define DB_UTILITIES_LINALG
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMLinAlg (LM) Linear Algebra Utilities (QR factorization, orthogonal basis, etc.)
+ */
+
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline void db_MultiplyScalar6(double A[6],double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+    (*A++) *= mult;
+}
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline void db_MultiplyScalar7(double A[7],double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+    (*A++) *= mult; (*A++) *= mult;
+}
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline void db_MultiplyScalar9(double A[9],double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline double db_SquareSum6Stride7(const double *x)
+{
+    return(db_sqr(x[0])+db_sqr(x[7])+db_sqr(x[14])+
+        db_sqr(x[21])+db_sqr(x[28])+db_sqr(x[35]));
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline double db_SquareSum8Stride9(const double *x)
+{
+    return(db_sqr(x[0])+db_sqr(x[9])+db_sqr(x[18])+
+        db_sqr(x[27])+db_sqr(x[36])+db_sqr(x[45])+
+        db_sqr(x[54])+db_sqr(x[63]));
+}
+
+/*!
+ \ingroup LMLinAlg
+ Cholesky-factorize symmetric positive definite 6 x 6 matrix A. Upper
+part of A is used from the input. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is 6-dimensional
+1.9 microseconds on 450MHz*/
+DB_API void db_CholeskyDecomp6x6(double A[36],double d[6]);
+
+/*!
+ \ingroup LMLinAlg
+ Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 6 x 6 matrix and the right hand side b. The vector b is unchanged
+1.3 microseconds on 450MHz*/
+DB_API void db_CholeskyBacksub6x6(double x[6],const double A[36],const double d[6],const double b[6]);
+
+/*!
+ \ingroup LMLinAlg
+ Cholesky-factorize symmetric positive definite n x n matrix A.Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is n-dimensional*/
+DB_API void db_CholeskyDecompSeparateDiagonal(double **A,double *d,int n);
+
+/*!
+ \ingroup LMLinAlg
+ Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of an n x n matrix and the right hand side b. The vector b is unchanged*/
+DB_API void db_CholeskyBacksub(double *x,const double * const *A,const double *d,int n,const double *b);
+
+/*!
+ \ingroup LMLinAlg
+ Cholesky-factorize symmetric positive definite 3 x 3 matrix A. Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as subdiagonal part in A
+and diagonal in d, which is 3-dimensional*/
+DB_API void db_CholeskyDecomp3x3SeparateDiagonal(double A[9],double d[3]);
+
+/*!
+ \ingroup LMLinAlg
+ Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 3 x 3 matrix and the right hand side b. The vector b is unchanged*/
+DB_API void db_CholeskyBacksub3x3(double x[3],const double A[9],const double d[3],const double b[3]);
+
+/*!
+ \ingroup LMLinAlg
+ perform A-=B*mult*/
+inline void db_RowOperation3(double A[3],const double B[3],double mult)
+{
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_RowOperation7(double A[7],const double B[7],double mult)
+{
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_RowOperation9(double A[9],const double B[9],double mult)
+{
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ Swap values of A[7] and B[7]
+ */
+inline void db_Swap7(double A[7],double B[7])
+{
+    double temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ Swap values of A[9] and B[9]
+ */
+inline void db_Swap9(double A[9],double B[9])
+{
+    double temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+}
+
+
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_Orthogonalize6x7(double A[42],int orthonormalize=0);
+
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_Orthogonalize8x9(double A[72],int orthonormalize=0);
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline double db_OrthogonalizePair7(double *x,const double *v,double ssv)
+{
+    double m,sp,sp_m;
+
+    m=db_SafeReciprocal(ssv);
+    sp=db_ScalarProduct7(x,v);
+    sp_m=sp*m;
+    db_RowOperation7(x,v,sp_m);
+    return(sp*sp_m);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline double db_OrthogonalizePair9(double *x,const double *v,double ssv)
+{
+    double m,sp,sp_m;
+
+    m=db_SafeReciprocal(ssv);
+    sp=db_ScalarProduct9(x,v);
+    sp_m=sp*m;
+    db_RowOperation9(x,v,sp_m);
+    return(sp*sp_m);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_OrthogonalizationSwap7(double *A,int i,double *ss)
+{
+    double temp;
+
+    db_Swap7(A,A+7*i);
+    temp=ss[0]; ss[0]=ss[i]; ss[i]=temp;
+}
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_OrthogonalizationSwap9(double *A,int i,double *ss)
+{
+    double temp;
+
+    db_Swap9(A,A+9*i);
+    temp=ss[0]; ss[0]=ss[i]; ss[i]=temp;
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_NullVectorOrthonormal6x7(double x[7],const double A[42]);
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_NullVectorOrthonormal8x9(double x[9],const double A[72]);
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_NullVector6x7Destructive(double x[7],double A[42])
+{
+    db_Orthogonalize6x7(A,1);
+    db_NullVectorOrthonormal6x7(x,A);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_NullVector8x9Destructive(double x[9],double A[72])
+{
+    db_Orthogonalize8x9(A,1);
+    db_NullVectorOrthonormal8x9(x,A);
+}
+
+inline int db_ScalarProduct512_s(const short *f,const short *g)
+{
+#ifndef DB_USE_MMX
+    int back;
+    back=0;
+    for(int i=1; i<=512; i++)
+        back+=(*f++)*(*g++);
+
+    return(back);
+#endif
+}
+
+
+inline int db_ScalarProduct32_s(const short *f,const short *g)
+{
+#ifndef DB_USE_MMX
+    int back;
+    back=0;
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    return(back);
+#endif
+}
+
+/*!
+ \ingroup LMLinAlg
+ Scalar product of 128-vectors (short)
+  Compile-time control: MMX, SSE2 or standard C
+ */
+inline int db_ScalarProduct128_s(const short *f,const short *g)
+{
+#ifndef DB_USE_MMX
+    int back;
+    back=0;
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    return(back);
+#else
+#ifdef DB_USE_SSE2
+    int back;
+
+    _asm
+    {
+        mov eax,f
+        mov ecx,g
+        /*First iteration************************************/
+        movdqa     xmm0,[eax]
+         pxor    xmm7,xmm7      /*Set xmm7 to zero*/
+        pmaddwd  xmm0,[ecx]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+16]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+16]
+         /*Stall*/
+        movdqa     xmm1,[eax+32]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+32]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+48]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+48]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+64]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+64]
+         /*Stall*/
+        movdqa     xmm1,[eax+80]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+80]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+96]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+96]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+112]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+112]
+         /*Stall*/
+        movdqa     xmm1,[eax+128]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+128]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+144]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+144]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+160]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+160]
+         /*Stall*/
+        movdqa     xmm1,[eax+176]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+176]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+192]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+192]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+208]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+208]
+         /*Stall*/
+        movdqa     xmm1,[eax+224]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+224]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+240]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+240]
+         /*Stall*/
+        /*Rest iteration************************************/
+        paddd    xmm7,xmm0
+
+        /* add up the sum squares */
+        movhlps     xmm0,xmm7   /* high half to low half */
+        paddd       xmm7,xmm0   /* add high to low */
+        pshuflw     xmm0,xmm7, 0xE /* reshuffle */
+        paddd       xmm7,xmm0   /* add remaining */
+        movd        back,xmm7
+
+        emms
+    }
+
+    return(back);
+#else
+    int back;
+
+    _asm
+    {
+        mov eax,f
+        mov ecx,g
+        /*First iteration************************************/
+        movq     mm0,[eax]
+         pxor    mm7,mm7      /*Set mm7 to zero*/
+        pmaddwd  mm0,[ecx]
+         /*Stall*/
+        movq     mm1,[eax+8]
+         /*Stall*/
+        pmaddwd  mm1,[ecx+8]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+16]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+16]
+         /*Stall*/
+        movq     mm0,[eax+24]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+24]
+         /*Stall*/
+        movq     mm1,[eax+32]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+32]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+40]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+40]
+         /*Stall*/
+        movq     mm0,[eax+48]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+48]
+         /*Stall*/
+        movq     mm1,[eax+56]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+56]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+64]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+64]
+         /*Stall*/
+        movq     mm0,[eax+72]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+72]
+         /*Stall*/
+        movq     mm1,[eax+80]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+80]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+88]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+88]
+         /*Stall*/
+        movq     mm0,[eax+96]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+96]
+         /*Stall*/
+        movq     mm1,[eax+104]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+104]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+112]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+112]
+         /*Stall*/
+        movq     mm0,[eax+120]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+120]
+         /*Stall*/
+        movq     mm1,[eax+128]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+128]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+136]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+136]
+         /*Stall*/
+        movq     mm0,[eax+144]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+144]
+         /*Stall*/
+        movq     mm1,[eax+152]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+152]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+160]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+160]
+         /*Stall*/
+        movq     mm0,[eax+168]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+168]
+         /*Stall*/
+        movq     mm1,[eax+176]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+176]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+184]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+184]
+         /*Stall*/
+        movq     mm0,[eax+192]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+192]
+         /*Stall*/
+        movq     mm1,[eax+200]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+200]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+208]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+208]
+         /*Stall*/
+        movq     mm0,[eax+216]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+216]
+         /*Stall*/
+        movq     mm1,[eax+224]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+224]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+232]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+232]
+         /*Stall*/
+        movq     mm0,[eax+240]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+240]
+         /*Stall*/
+        movq     mm1,[eax+248]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+248]
+         /*Stall*/
+        /*Rest iteration************************************/
+        paddd    mm7,mm0
+         /*Stall*/
+        /*Stall*/
+         /*Stall*/
+        paddd    mm7,mm1
+         /*Stall*/
+        movq     mm0,mm7
+         psrlq   mm7,32
+        paddd    mm0,mm7
+         /*Stall*/
+        /*Stall*/
+         /*Stall*/
+        movd     back,mm0
+        emms
+    }
+
+    return(back);
+#endif
+#endif /*DB_USE_MMX*/
+}
+
+/*!
+ \ingroup LMLinAlg
+ Scalar product of 16 byte aligned 128-vectors (float).
+  Compile-time control: SIMD (SSE) or standard C.
+ */
+inline float db_ScalarProduct128Aligned16_f(const float *f,const float *g)
+{
+#ifndef DB_USE_SIMD
+    float back;
+    back=0.0;
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    return(back);
+#else
+    float back;
+
+    _asm
+    {
+        mov eax,f
+        mov ecx,g
+        /*First iteration************************************/
+        movaps     xmm0,[eax]
+         xorps      xmm7,xmm7       /*Set mm7 to zero*/
+        mulps      xmm0,[ecx]
+         /*Stall*/
+        movaps     xmm1,[eax+16]
+         /*Stall*/
+        mulps      xmm1,[ecx+16]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+32]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+32]
+         /*Stall*/
+        movaps     xmm0,[eax+48]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+48]
+         /*Stall*/
+        movaps     xmm1,[eax+64]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+64]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+80]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+80]
+         /*Stall*/
+        movaps     xmm0,[eax+96]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+96]
+         /*Stall*/
+        movaps     xmm1,[eax+112]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+112]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+128]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+128]
+         /*Stall*/
+        movaps     xmm0,[eax+144]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+144]
+         /*Stall*/
+        movaps     xmm1,[eax+160]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+160]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+176]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+176]
+         /*Stall*/
+        movaps     xmm0,[eax+192]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+192]
+         /*Stall*/
+        movaps     xmm1,[eax+208]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+208]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+224]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+224]
+         /*Stall*/
+        movaps     xmm0,[eax+240]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+240]
+         /*Stall*/
+        movaps     xmm1,[eax+256]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+256]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+272]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+272]
+         /*Stall*/
+        movaps     xmm0,[eax+288]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+288]
+         /*Stall*/
+        movaps     xmm1,[eax+304]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+304]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+320]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+320]
+         /*Stall*/
+        movaps     xmm0,[eax+336]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+336]
+         /*Stall*/
+        movaps     xmm1,[eax+352]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+352]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+368]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+368]
+         /*Stall*/
+        movaps     xmm0,[eax+384]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+384]
+         /*Stall*/
+        movaps     xmm1,[eax+400]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+400]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+416]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+416]
+         /*Stall*/
+        movaps     xmm0,[eax+432]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+432]
+         /*Stall*/
+        movaps     xmm1,[eax+448]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+448]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+464]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+464]
+         /*Stall*/
+        movaps     xmm0,[eax+480]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+480]
+         /*Stall*/
+        movaps     xmm1,[eax+496]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+496]
+         /*Stall*/
+        /*Rest iteration************************************/
+        addps      xmm7,xmm0
+         /*Stall*/
+        addps      xmm7,xmm1
+         /*Stall*/
+        movaps xmm6,xmm7
+         /*Stall*/
+        shufps xmm6,xmm6,4Eh
+         /*Stall*/
+        addps  xmm7,xmm6
+         /*Stall*/
+        movaps xmm6,xmm7
+         /*Stall*/
+        shufps xmm6,xmm6,11h
+         /*Stall*/
+        addps  xmm7,xmm6
+         /*Stall*/
+        movss  back,xmm7
+    }
+
+    return(back);
+#endif /*DB_USE_SIMD*/
+}
+
+#endif /* DB_UTILITIES_LINALG */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_poly.cpp b/jni/feature_stab/db_vlvm/db_utilities_poly.cpp
new file mode 100644
index 000000000..013ac726e
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_poly.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_poly.cpp,v 1.2 2010/09/03 12:00:10 bsouthall Exp $ */
+
+#include "db_utilities_poly.h"
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+void db_SolveCubic(double *roots,int *nr_roots,double a,double b,double c,double d)
+{
+    double bp,bp2,cp,dp,q,r,srq;
+    double r2_min_q3,theta,bp_through3,theta_through3;
+    double cos_theta_through3,sin_theta_through3,min2_cos_theta_plu,min2_cos_theta_min;
+    double si_r_srq,A;
+
+    /*For nondegenerate cubics with three roots
+    [24 mult 9 add 2sqrt 1acos 1cos=33flops 4func]
+    For nondegenerate cubics with one root
+    [16 mult 6 add 1sqrt 1qbrt=24flops 3func]*/
+
+    if(a==0.0) db_SolveQuadratic(roots,nr_roots,b,c,d);
+    else
+    {
+        bp=b/a;
+        bp2=bp*bp;
+        cp=c/a;
+        dp=d/a;
+
+        q=(bp2-3.0*cp)/9.0;
+        r=(2.0*bp2*bp-9.0*bp*cp+27.0*dp)/54.0;
+        r2_min_q3=r*r-q*q*q;
+        if(r2_min_q3<0.0)
+        {
+            *nr_roots=3;
+            /*q has to be > 0*/
+            srq=sqrt(q);
+            theta=acos(db_maxd(-1.0,db_mind(1.0,r/(q*srq))));
+            bp_through3=bp/3.0;
+            theta_through3=theta/3.0;
+            cos_theta_through3=cos(theta_through3);
+            sin_theta_through3=sqrt(db_maxd(0.0,1.0-cos_theta_through3*cos_theta_through3));
+
+            /*cos(theta_through3+2*pi/3)=cos_theta_through3*cos(2*pi/3)-sin_theta_through3*sin(2*pi/3)
+            = -0.5*cos_theta_through3-sqrt(3)/2.0*sin_theta_through3
+            = -0.5*(cos_theta_through3+sqrt(3)*sin_theta_through3)*/
+            min2_cos_theta_plu=cos_theta_through3+DB_SQRT3*sin_theta_through3;
+            min2_cos_theta_min=cos_theta_through3-DB_SQRT3*sin_theta_through3;
+
+            roots[0]= -2.0*srq*cos_theta_through3-bp_through3;
+            roots[1]=srq*min2_cos_theta_plu-bp_through3;
+            roots[2]=srq*min2_cos_theta_min-bp_through3;
+        }
+        else if(r2_min_q3>0.0)
+        {
+            *nr_roots=1;
+            A= -db_sign(r)*db_CubRoot(db_absd(r)+sqrt(r2_min_q3));
+            bp_through3=bp/3.0;
+            if(A!=0.0) roots[0]=A+q/A-bp_through3;
+            else roots[0]= -bp_through3;
+        }
+        else
+        {
+            *nr_roots=2;
+            bp_through3=bp/3.0;
+            /*q has to be >= 0*/
+            si_r_srq=db_sign(r)*sqrt(q);
+            /*Single root*/
+            roots[0]= -2.0*si_r_srq-bp_through3;
+            /*Double root*/
+            roots[1]=si_r_srq-bp_through3;
+        }
+    }
+}
+
+void db_SolveQuartic(double *roots,int *nr_roots,double a,double b,double c,double d,double e)
+{
+    /*Normalized coefficients*/
+    double c0,c1,c2,c3;
+    /*Temporary coefficients*/
+    double c3through2,c3through4,c3c3through4_min_c2,min4_c0;
+    double lz,ms,ns,mn,m,n,lz_through2;
+    /*Cubic polynomial roots, nr of roots and coefficients*/
+    double c_roots[3];
+    int nr_c_roots;
+    double k0,k1;
+    /*nr additional roots from second quadratic*/
+    int addroots;
+
+    /*For nondegenerate quartics
+    [16mult 11add 2sqrt 1cubic 2quadratic=74flops 8funcs]*/
+
+    if(a==0.0) db_SolveCubic(roots,nr_roots,b,c,d,e);
+    else if(e==0.0)
+    {
+        db_SolveCubic(roots,nr_roots,a,b,c,d);
+        roots[*nr_roots]=0.0;
+        *nr_roots+=1;
+    }
+    else
+    {
+        /*Compute normalized coefficients*/
+        c3=b/a;
+        c2=c/a;
+        c1=d/a;
+        c0=e/a;
+        /*Compute temporary coefficients*/
+        c3through2=c3/2.0;
+        c3through4=c3/4.0;
+        c3c3through4_min_c2=c3*c3through4-c2;
+        min4_c0= -4.0*c0;
+        /*Compute coefficients of cubic*/
+        k0=min4_c0*c3c3through4_min_c2-c1*c1;
+        k1=c1*c3+min4_c0;
+        /*k2= -c2*/
+        /*k3=1.0*/
+
+        /*Solve it for roots*/
+        db_SolveCubic(c_roots,&nr_c_roots,1.0,-c2,k1,k0);
+
+        if(nr_c_roots>0)
+        {
+            lz=c_roots[0];
+            lz_through2=lz/2.0;
+            ms=lz+c3c3through4_min_c2;
+            ns=lz_through2*lz_through2-c0;
+            mn=lz*c3through4-c1/2.0;
+
+            if((ms>=0.0)&&(ns>=0.0))
+            {
+                m=sqrt(ms);
+                n=sqrt(ns)*db_sign(mn);
+
+                db_SolveQuadratic(roots,nr_roots,
+                    1.0,c3through2+m,lz_through2+n);
+
+                db_SolveQuadratic(&roots[*nr_roots],&addroots,
+                    1.0,c3through2-m,lz_through2-n);
+
+                *nr_roots+=addroots;
+            }
+            else *nr_roots=0;
+        }
+        else *nr_roots=0;
+    }
+}
+
+void db_SolveQuarticForced(double *roots,int *nr_roots,double a,double b,double c,double d,double e)
+{
+    /*Normalized coefficients*/
+    double c0,c1,c2,c3;
+    /*Temporary coefficients*/
+    double c3through2,c3through4,c3c3through4_min_c2,min4_c0;
+    double lz,ms,ns,mn,m,n,lz_through2;
+    /*Cubic polynomial roots, nr of roots and coefficients*/
+    double c_roots[3];
+    int nr_c_roots;
+    double k0,k1;
+    /*nr additional roots from second quadratic*/
+    int addroots;
+
+    /*For nondegenerate quartics
+    [16mult 11add 2sqrt 1cubic 2quadratic=74flops 8funcs]*/
+
+    if(a==0.0) db_SolveCubic(roots,nr_roots,b,c,d,e);
+    else if(e==0.0)
+    {
+        db_SolveCubic(roots,nr_roots,a,b,c,d);
+        roots[*nr_roots]=0.0;
+        *nr_roots+=1;
+    }
+    else
+    {
+        /*Compute normalized coefficients*/
+        c3=b/a;
+        c2=c/a;
+        c1=d/a;
+        c0=e/a;
+        /*Compute temporary coefficients*/
+        c3through2=c3/2.0;
+        c3through4=c3/4.0;
+        c3c3through4_min_c2=c3*c3through4-c2;
+        min4_c0= -4.0*c0;
+        /*Compute coefficients of cubic*/
+        k0=min4_c0*c3c3through4_min_c2-c1*c1;
+        k1=c1*c3+min4_c0;
+        /*k2= -c2*/
+        /*k3=1.0*/
+
+        /*Solve it for roots*/
+        db_SolveCubic(c_roots,&nr_c_roots,1.0,-c2,k1,k0);
+
+        if(nr_c_roots>0)
+        {
+            lz=c_roots[0];
+            lz_through2=lz/2.0;
+            ms=lz+c3c3through4_min_c2;
+            ns=lz_through2*lz_through2-c0;
+            mn=lz*c3through4-c1/2.0;
+
+            if(ms<0.0) ms=0.0;
+            if(ns<0.0) ns=0.0;
+
+            m=sqrt(ms);
+            n=sqrt(ns)*db_sign(mn);
+
+            db_SolveQuadratic(roots,nr_roots,
+                1.0,c3through2+m,lz_through2+n);
+
+            db_SolveQuadratic(&roots[*nr_roots],&addroots,
+                1.0,c3through2-m,lz_through2-n);
+
+            *nr_roots+=addroots;
+        }
+        else *nr_roots=0;
+    }
+}
diff --git a/jni/feature_stab/db_vlvm/db_utilities_poly.h b/jni/feature_stab/db_vlvm/db_utilities_poly.h
new file mode 100644
index 000000000..1f8789077
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_poly.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_poly.h,v 1.2 2010/09/03 12:00:11 bsouthall Exp $ */
+
+#ifndef DB_UTILITIES_POLY
+#define DB_UTILITIES_POLY
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMPolynomial (LM) Polynomial utilities (solvers, arithmetic, evaluation, etc.)
+ */
+/*\{*/
+
+/*!
+In debug mode closed form quadratic solving takes on the order of 15 microseconds
+while eig of the companion matrix takes about 1.1 milliseconds
+Speed-optimized code in release mode solves a quadratic in 0.3 microseconds on 450MHz
+*/
+inline void db_SolveQuadratic(double *roots,int *nr_roots,double a,double b,double c)
+{
+    double rs,srs,q;
+
+    /*For non-degenerate quadratics
+    [5 mult 2 add 1 sqrt=7flops 1func]*/
+    if(a==0.0)
+    {
+        if(b==0.0) *nr_roots=0;
+        else
+        {
+            roots[0]= -c/b;
+            *nr_roots=1;
+        }
+    }
+    else
+    {
+        rs=b*b-4.0*a*c;
+        if(rs>=0.0)
+        {
+            *nr_roots=2;
+            srs=sqrt(rs);
+            q= -0.5*(b+db_sign(b)*srs);
+            roots[0]=q/a;
+            /*If b is zero db_sign(b) returns 1,
+            so q is only zero when b=0 and c=0*/
+            if(q==0.0) *nr_roots=1;
+            else roots[1]=c/q;
+        }
+        else *nr_roots=0;
+    }
+}
+
+/*!
+In debug mode closed form cubic solving takes on the order of 45 microseconds
+while eig of the companion matrix takes about 1.3 milliseconds
+Speed-optimized code in release mode solves a cubic in 1.5 microseconds on 450MHz
+For a non-degenerate cubic with two roots, the first root is the single root and
+the second root is the double root
+*/
+DB_API void db_SolveCubic(double *roots,int *nr_roots,double a,double b,double c,double d);
+/*!
+In debug mode closed form quartic solving takes on the order of 0.1 milliseconds
+while eig of the companion matrix takes about 1.5 milliseconds
+Speed-optimized code in release mode solves a quartic in 2.6 microseconds on 450MHz*/
+DB_API void db_SolveQuartic(double *roots,int *nr_roots,double a,double b,double c,double d,double e);
+/*!
+Quartic solving where a solution is forced when splitting into quadratics, which
+can be good if the quartic is sometimes in fact a quadratic, such as in absolute orientation
+when the data is planar*/
+DB_API void db_SolveQuarticForced(double *roots,int *nr_roots,double a,double b,double c,double d,double e);
+
+inline double db_PolyEval1(const double p[2],double x)
+{
+    return(p[0]+x*p[1]);
+}
+
+inline void db_MultiplyPoly1_1(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1+a1*b0;
+    d[2]=      a1*b1;
+}
+
+inline void db_MultiplyPoly0_2(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1,b2;
+    a0=a[0];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1;
+    d[2]=a0*b2;
+}
+
+inline void db_MultiplyPoly1_2(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1,b2;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1+a1*b0;
+    d[2]=a0*b2+a1*b1;
+    d[3]=      a1*b2;
+}
+
+
+inline void db_MultiplyPoly1_3(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1,b2,b3;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];b2=b[2];b3=b[3];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1+a1*b0;
+    d[2]=a0*b2+a1*b1;
+    d[3]=a0*b3+a1*b2;
+    d[4]=      a1*b3;
+}
+/*!
+Multiply d=a*b where a is one degree and b is two degree*/
+inline void db_AddPolyProduct0_1(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1;
+    a0=a[0];
+    b0=b[0];b1=b[1];
+
+    d[0]+=a0*b0;
+    d[1]+=a0*b1;
+}
+inline void db_AddPolyProduct0_2(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1,b2;
+    a0=a[0];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]+=a0*b0;
+    d[1]+=a0*b1;
+    d[2]+=a0*b2;
+}
+/*!
+Multiply d=a*b where a is one degree and b is two degree*/
+inline void db_SubtractPolyProduct0_0(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0;
+    a0=a[0];
+    b0=b[0];
+
+    d[0]-=a0*b0;
+}
+
+inline void db_SubtractPolyProduct0_1(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1;
+    a0=a[0];
+    b0=b[0];b1=b[1];
+
+    d[0]-=a0*b0;
+    d[1]-=a0*b1;
+}
+
+inline void db_SubtractPolyProduct0_2(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1,b2;
+    a0=a[0];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]-=a0*b0;
+    d[1]-=a0*b1;
+    d[2]-=a0*b2;
+}
+
+inline void db_SubtractPolyProduct1_3(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1,b2,b3;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];b2=b[2];b3=b[3];
+
+    d[0]-=a0*b0;
+    d[1]-=a0*b1+a1*b0;
+    d[2]-=a0*b2+a1*b1;
+    d[3]-=a0*b3+a1*b2;
+    d[4]-=      a1*b3;
+}
+
+inline void    db_CharacteristicPolynomial4x4(double p[5],const double A[16])
+{
+    /*All two by two determinants of the first two rows*/
+    double two01[3],two02[3],two03[3],two12[3],two13[3],two23[3];
+    /*Polynomials representing third and fourth row of A*/
+    double P0[2],P1[2],P2[2],P3[2];
+    double P4[2],P5[2],P6[2],P7[2];
+    /*All three by three determinants of the first three rows*/
+    double neg_three0[4],neg_three1[4],three2[4],three3[4];
+
+    /*Compute 2x2 determinants*/
+    two01[0]=A[0]*A[5]-A[1]*A[4];
+    two01[1]= -(A[0]+A[5]);
+    two01[2]=1.0;
+
+    two02[0]=A[0]*A[6]-A[2]*A[4];
+    two02[1]= -A[6];
+
+    two03[0]=A[0]*A[7]-A[3]*A[4];
+    two03[1]= -A[7];
+
+    two12[0]=A[1]*A[6]-A[2]*A[5];
+    two12[1]=A[2];
+
+    two13[0]=A[1]*A[7]-A[3]*A[5];
+    two13[1]=A[3];
+
+    two23[0]=A[2]*A[7]-A[3]*A[6];
+
+    P0[0]=A[8];
+    P1[0]=A[9];
+    P2[0]=A[10];P2[1]= -1.0;
+    P3[0]=A[11];
+
+    P4[0]=A[12];
+    P5[0]=A[13];
+    P6[0]=A[14];
+    P7[0]=A[15];P7[1]= -1.0;
+
+    /*Compute 3x3 determinants.Note that the highest
+    degree polynomial goes first and the smaller ones
+    are added or subtracted from it*/
+    db_MultiplyPoly1_1(       neg_three0,P2,two13);
+    db_SubtractPolyProduct0_0(neg_three0,P1,two23);
+    db_SubtractPolyProduct0_1(neg_three0,P3,two12);
+
+    db_MultiplyPoly1_1(       neg_three1,P2,two03);
+    db_SubtractPolyProduct0_1(neg_three1,P3,two02);
+    db_SubtractPolyProduct0_0(neg_three1,P0,two23);
+
+    db_MultiplyPoly0_2(       three2,P3,two01);
+    db_AddPolyProduct0_1(     three2,P0,two13);
+    db_SubtractPolyProduct0_1(three2,P1,two03);
+
+    db_MultiplyPoly1_2(       three3,P2,two01);
+    db_AddPolyProduct0_1(     three3,P0,two12);
+    db_SubtractPolyProduct0_1(three3,P1,two02);
+
+    /*Compute 4x4 determinants*/
+    db_MultiplyPoly1_3(       p,P7,three3);
+    db_AddPolyProduct0_2(     p,P4,neg_three0);
+    db_SubtractPolyProduct0_2(p,P5,neg_three1);
+    db_SubtractPolyProduct0_2(p,P6,three2);
+}
+
+inline void db_RealEigenvalues4x4(double lambda[4],int *nr_roots,const double A[16],int forced=0)
+{
+    double p[5];
+
+    db_CharacteristicPolynomial4x4(p,A);
+    if(forced) db_SolveQuarticForced(lambda,nr_roots,p[4],p[3],p[2],p[1],p[0]);
+     else db_SolveQuartic(lambda,nr_roots,p[4],p[3],p[2],p[1],p[0]);
+}
+
+/*!
+Compute the unit norm eigenvector v of the matrix A corresponding
+to the eigenvalue lambda
+[96mult 60add 1sqrt=156flops 1sqrt]*/
+inline void db_EigenVector4x4(double v[4],double lambda,const double A[16])
+{
+    double a0,a5,a10,a15;
+    double d01,d02,d03,d12,d13,d23;
+    double e01,e02,e03,e12,e13,e23;
+    double C[16],n0,n1,n2,n3,m;
+
+    /*Compute diagonal
+    [4add=4flops]*/
+    a0=A[0]-lambda;
+    a5=A[5]-lambda;
+    a10=A[10]-lambda;
+    a15=A[15]-lambda;
+
+    /*Compute 2x2 determinants of rows 1,2 and 3,4
+    [24mult 12add=36flops]*/
+    d01=a0*a5    -A[1]*A[4];
+    d02=a0*A[6]  -A[2]*A[4];
+    d03=a0*A[7]  -A[3]*A[4];
+    d12=A[1]*A[6]-A[2]*a5;
+    d13=A[1]*A[7]-A[3]*a5;
+    d23=A[2]*A[7]-A[3]*A[6];
+
+    e01=A[8]*A[13]-A[9] *A[12];
+    e02=A[8]*A[14]-a10  *A[12];
+    e03=A[8]*a15  -A[11]*A[12];
+    e12=A[9]*A[14]-a10  *A[13];
+    e13=A[9]*a15  -A[11]*A[13];
+    e23=a10 *a15  -A[11]*A[14];
+
+    /*Compute matrix of cofactors
+    [48mult 32 add=80flops*/
+    C[0]=  (a5  *e23-A[6]*e13+A[7]*e12);
+    C[1]= -(A[4]*e23-A[6]*e03+A[7]*e02);
+    C[2]=  (A[4]*e13-a5  *e03+A[7]*e01);
+    C[3]= -(A[4]*e12-a5  *e02+A[6]*e01);
+
+    C[4]= -(A[1]*e23-A[2]*e13+A[3]*e12);
+    C[5]=  (a0  *e23-A[2]*e03+A[3]*e02);
+    C[6]= -(a0  *e13-A[1]*e03+A[3]*e01);
+    C[7]=  (a0  *e12-A[1]*e02+A[2]*e01);
+
+    C[8]=   (A[13]*d23-A[14]*d13+a15  *d12);
+    C[9]=  -(A[12]*d23-A[14]*d03+a15  *d02);
+    C[10]=  (A[12]*d13-A[13]*d03+a15  *d01);
+    C[11]= -(A[12]*d12-A[13]*d02+A[14]*d01);
+
+    C[12]= -(A[9]*d23-a10 *d13+A[11]*d12);
+    C[13]=  (A[8]*d23-a10 *d03+A[11]*d02);
+    C[14]= -(A[8]*d13-A[9]*d03+A[11]*d01);
+    C[15]=  (A[8]*d12-A[9]*d02+a10  *d01);
+
+    /*Compute square sums of rows
+    [16mult 12add=28flops*/
+    n0=db_sqr(C[0]) +db_sqr(C[1]) +db_sqr(C[2]) +db_sqr(C[3]);
+    n1=db_sqr(C[4]) +db_sqr(C[5]) +db_sqr(C[6]) +db_sqr(C[7]);
+    n2=db_sqr(C[8]) +db_sqr(C[9]) +db_sqr(C[10])+db_sqr(C[11]);
+    n3=db_sqr(C[12])+db_sqr(C[13])+db_sqr(C[14])+db_sqr(C[15]);
+
+    /*Take the largest norm row and normalize
+    [4mult 1 sqrt=4flops 1sqrt]*/
+    if(n0>=n1 && n0>=n2 && n0>=n3)
+    {
+        m=db_SafeReciprocal(sqrt(n0));
+        db_MultiplyScalarCopy4(v,C,m);
+    }
+    else if(n1>=n2 && n1>=n3)
+    {
+        m=db_SafeReciprocal(sqrt(n1));
+        db_MultiplyScalarCopy4(v,&(C[4]),m);
+    }
+    else if(n2>=n3)
+    {
+        m=db_SafeReciprocal(sqrt(n2));
+        db_MultiplyScalarCopy4(v,&(C[8]),m);
+    }
+    else
+    {
+        m=db_SafeReciprocal(sqrt(n3));
+        db_MultiplyScalarCopy4(v,&(C[12]),m);
+    }
+}
+
+
+
+/*\}*/
+#endif /* DB_UTILITIES_POLY */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_random.h b/jni/feature_stab/db_vlvm/db_utilities_random.h
new file mode 100644
index 000000000..ef24039c1
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_random.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_random.h,v 1.1 2010/08/19 18:09:20 bsouthall Exp $ */
+
+#ifndef DB_UTILITIES_RANDOM
+#define DB_UTILITIES_RANDOM
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRandom (LM) Random numbers, random sampling
+ */
+/*\{*/
+/*!
+ Random Number generator. Initialize with non-zero
+integer value r. A double between zero and one is
+returned.
+\param r    seed
+\return random double
+*/
+inline double db_QuickRandomDouble(int &r)
+{
+    int c;
+    c=r/127773;
+    r=16807*(r-c*127773)-2836*c;
+    if(r<0) r+=2147483647;
+    return((1.0/((double)2147483647))*r);
+    //return (((double)rand())/(double)RAND_MAX);
+}
+
+/*!
+Random Number generator. Initialize with non-zero
+integer value r. An int between and including 0 and max
+ \param r    seed
+ \param max    upped limit
+ \return random int
+*/
+inline int db_RandomInt(int &r,int max)
+{
+    double dtemp;
+    int itemp;
+    dtemp=db_QuickRandomDouble(r)*(max+1);
+    itemp=(int) dtemp;
+    if(itemp<=0) return(0);
+    if(itemp>=max) return(max);
+    return(itemp);
+}
+
+/*!
+ Generate a random sample indexing into [0..pool_size-1].
+ \param s            sample (out) pre-allocated array of size sample_size
+ \param sample_size    size of sample
+ \param pool_size    upper limit on item index
+ \param r_seed        random number generator seed
+ */
+inline void db_RandomSample(int *s,int sample_size,int pool_size,int &r_seed)
+{
+    int temp,temp2,i,j;
+
+    for(i=0;i<sample_size;i++)
+    {
+        temp=db_RandomInt(r_seed,pool_size-1-i);
+
+        for(j=0;j<i;j++)
+        {
+            if(s[j]<=temp) temp++;
+            else
+            {
+                /*swap*/
+                temp2=temp;
+                temp=s[j];
+                s[j]=temp2;
+            }
+        }
+        s[i]=temp;
+    }
+}
+/*\}*/
+#endif /* DB_UTILITIES_RANDOM */
diff --git a/jni/feature_stab/db_vlvm/db_utilities_rotation.h b/jni/feature_stab/db_vlvm/db_utilities_rotation.h
new file mode 100644
index 000000000..7f5f937b4
--- /dev/null
+++ b/jni/feature_stab/db_vlvm/db_utilities_rotation.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_rotation.h,v 1.2 2010/09/03 12:00:11 bsouthall Exp $ */
+
+#ifndef DB_UTILITIES_ROTATION
+#define DB_UTILITIES_ROTATION
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRotation (LM) Rotation Utilities (quaternions, orthonormal)
+ */
+/*\{*/
+/*!
+ Takes a unit quaternion and gives its corresponding rotation matrix.
+ \param R rotation matrix (out)
+ \param q quaternion
+ */
+inline void db_QuaternionToRotation(double R[9],const double q[4])
+{
+    double q0q0,q0qx,q0qy,q0qz,qxqx,qxqy,qxqz,qyqy,qyqz,qzqz;
+
+    q0q0=q[0]*q[0];
+    q0qx=q[0]*q[1];
+    q0qy=q[0]*q[2];
+    q0qz=q[0]*q[3];
+    qxqx=q[1]*q[1];
+    qxqy=q[1]*q[2];
+    qxqz=q[1]*q[3];
+    qyqy=q[2]*q[2];
+    qyqz=q[2]*q[3];
+    qzqz=q[3]*q[3];
+
+    R[0]=q0q0+qxqx-qyqy-qzqz; R[1]=2.0*(qxqy-q0qz);     R[2]=2.0*(qxqz+q0qy);
+    R[3]=2.0*(qxqy+q0qz);     R[4]=q0q0-qxqx+qyqy-qzqz; R[5]=2.0*(qyqz-q0qx);
+    R[6]=2.0*(qxqz-q0qy);     R[7]=2.0*(qyqz+q0qx);     R[8]=q0q0-qxqx-qyqy+qzqz;
+}
+
+/*\}*/
+#endif /* DB_UTILITIES_ROTATION */