diff options
author | Sascha Haeberling <haeberling@google.com> | 2013-08-14 11:20:34 -0700 |
---|---|---|
committer | Sascha Haeberling <haeberling@google.com> | 2013-08-14 11:20:34 -0700 |
commit | 8bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96 (patch) | |
tree | 3382b4b8b52be577c3d0525df0e7da8178673d1c /jni/feature_stab/db_vlvm | |
parent | a34e8c7d439d17355df51e5e536bdbbd1744cc74 (diff) | |
download | android_packages_apps_Snap-8bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96.tar.gz android_packages_apps_Snap-8bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96.tar.bz2 android_packages_apps_Snap-8bddf8ce4f3dcbb56edb12cee7e93f3a9daa3f96.zip |
Copy over libjni_mosaic from Camera. We need to support the SRI pano
mode for Carlsbad.
Change-Id: Id14e64d8248236e8170c12cfca2cbf2ca952e993
Diffstat (limited to 'jni/feature_stab/db_vlvm')
28 files changed, 12006 insertions, 0 deletions
diff --git a/jni/feature_stab/db_vlvm/db_bundle.h b/jni/feature_stab/db_vlvm/db_bundle.h new file mode 100644 index 000000000..e4fb8db2c --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_bundle.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_bundle.h,v 1.2 2011/06/17 14:03:30 mbansal Exp $ */ + +#ifndef DB_BUNDLE_H +#define DB_BUNDLE_H + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMBundle (LM) Bundle adjustment utilities (a.k.a. Levenberg-Marquardt algorithm) + */ +/*\{*/ + +#include "db_utilities.h" + +/*! +Solve for update dx such that diagmult(1+lambda,transpose(J)%J)%dx= -Jtf +using only upper half of JtJ, destroying lower half below diagonal in the process +dimension is n and d should point to n allocated doubles of scratch memory +*/ +inline void db_Compute_dx(double *dx,double **JtJ,double *min_Jtf,double lambda,double *d,int n) +{ + int i; + double opl; + + opl=1.0+lambda; + for(i=0;i<n;i++) d[i]=JtJ[i][i]*opl; + + db_CholeskyDecompSeparateDiagonal(JtJ,d,n); + db_CholeskyBacksub(dx,JtJ,d,n,min_Jtf); +} + +/*! +Solve for update dx such that diagmult(1+lambda,transpose(J)%J)%dx= -Jtf +using only upper half of JtJ, destroying lower half below diagonal in the process +*/ +inline void db_Compute_dx_3x3(double dx[3],double JtJ[9],const double min_Jtf[3],double lambda) +{ + double d[3],opl; + + opl=1.0+lambda; + d[0]=JtJ[0]*opl; + d[1]=JtJ[4]*opl; + d[2]=JtJ[8]*opl; + db_CholeskyDecomp3x3SeparateDiagonal(JtJ,d); + db_CholeskyBacksub3x3(dx,JtJ,d,min_Jtf); +} + +/*\}*/ + +#endif /* DB_BUNDLE_H */ diff --git a/jni/feature_stab/db_vlvm/db_feature_detection.cpp b/jni/feature_stab/db_vlvm/db_feature_detection.cpp new file mode 100644 index 000000000..28cb4a781 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_feature_detection.cpp @@ -0,0 +1,1770 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/ + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +#include "db_utilities.h" +#include "db_feature_detection.h" +#ifdef _VERBOSE_ +#include <iostream> +#endif +#include <float.h> + +#define DB_SUB_PIXEL + +#define BORDER 10 // 5 + +float** db_AllocStrengthImage_f(float **im,int w,int h) +{ + int i,n,aw; + long c,size; + float **img,*aim,*p; + + /*Determine number of 124 element chunks needed*/ + n=(db_maxi(1,w-6)+123)/124; + /*Determine the total allocation width aw*/ + aw=n*124+8; + /*Allocate*/ + size=aw*h+16; + *im=new float [size]; + /*Clean up*/ + p=(*im); + for(c=0;c<size;c++) p[c]=0.0; + /*Get a 16 byte aligned pointer*/ + aim=db_AlignPointer_f(*im,16); + /*Allocate pointer table*/ + img=new float* [h]; + /*Initialize the pointer table*/ + for(i=0;i<h;i++) + { + img[i]=aim+aw*i+1; + } + + return(img); +} + +void db_FreeStrengthImage_f(float *im,float **img,int h) +{ + delete [] im; + delete [] img; +} + +/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width +Memory references occur one pixel outside the subrow*/ +inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width) +{ + int c; + + for(c=0;c<chunk_width;c++) + { + Ix[c]=img[i][j+c-1]-img[i][j+c+1]; + Iy[c]=img[i-1][j+c]-img[i+1][j+c]; + } +} + +/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128 +Memory references occur one pixel outside the subrow*/ +inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc) +{ +#ifdef DB_USE_MMX + const unsigned char *r1,*r2,*r3; + + r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j; + + _asm + { + mov esi,16 + mov eax,r1 + mov ebx,r2 + mov ecx,r3 + mov edx,dxx + + /*Get bitmask into mm7*/ + mov edi,7F7F7F7Fh + movd mm7,edi + punpckldq mm7,mm7 + +loopstart: + /***************dx part 1-12*********************************/ + movq mm0,[eax] /*1 Get upper*/ + pxor mm6,mm6 /*2 Set to zero*/ + movq mm1,[ecx] /*3 Get lower*/ + psrlq mm0,1 /*4 Shift*/ + psrlq mm1,1 /*5 Shift*/ + pand mm0,mm7 /*6 And*/ + movq mm2,[ebx-1] /*13 Get left*/ + pand mm1,mm7 /*7 And*/ + psubb mm0,mm1 /*8 Subtract*/ + pxor mm5,mm5 /*14 Set to zero*/ + movq mm1,mm0 /*9 Copy*/ + pcmpgtb mm6,mm0 /*10 Create unpack mask*/ + movq mm3,[ebx+1] /*15 Get right*/ + punpcklbw mm0,mm6 /*11 Unpack low*/ + punpckhbw mm1,mm6 /*12 Unpack high*/ + /***************dy part 13-24*********************************/ + movq mm4,mm0 /*25 Copy dx*/ + psrlq mm2,1 /*16 Shift*/ + pmullw mm0,mm0 /*26 Multiply dx*dx*/ + psrlq mm3,1 /*17 Shift*/ + pand mm2,mm7 /*18 And*/ + pand mm3,mm7 /*19 And*/ + /*Stall*/ + psubb mm2,mm3 /*20 Subtract*/ + /*Stall*/ + movq mm3,mm2 /*21 Copy*/ + pcmpgtb mm5,mm2 /*22 Create unpack mask*/ + punpcklbw mm2,mm5 /*23 Unpack low*/ + /*Stall*/ + punpckhbw mm3,mm5 /*24 Unpack high*/ + /***************dxx dxy dyy low part 25-49*********************************/ + pmullw mm4,mm2 /*27 Multiply dx*dy*/ + pmullw mm2,mm2 /*28 Multiply dy*dy*/ + pxor mm6,mm6 /*29 Set to zero*/ + movq mm5,mm0 /*30 Copy dx*dx*/ + pcmpgtw mm6,mm0 /*31 Create unpack mask for dx*dx*/ + punpcklwd mm0,mm6 /*32 Unpack dx*dx lows*/ + /*Stall*/ + punpckhwd mm5,mm6 /*33 Unpack dx*dx highs*/ + pxor mm6,mm6 /*36 Set to zero*/ + movq [edx],mm0 /*34 Store dx*dx lows*/ + movq mm0,mm4 /*37 Copy dx*dy*/ + movq [edx+8],mm5 /*35 Store dx*dx highs*/ + pcmpgtw mm6,mm4 /*38 Create unpack mask for dx*dy*/ + punpcklwd mm4,mm6 /*39 Unpack dx*dy lows*/ + /*Stall*/ + punpckhwd mm0,mm6 /*40 Unpack dx*dy highs*/ + pxor mm6,mm6 /*43 Set to zero*/ + movq [edx+512],mm4 /*41 Store dx*dy lows*/ + movq mm5,mm2 /*44 Copy dy*dy*/ + movq [edx+520],mm0 /*42 Store dx*dy highs*/ + pcmpgtw mm6,mm2 /*45 Create unpack mask for dy*dy*/ + punpcklwd mm2,mm6 /*46 Unpack dy*dy lows*/ + movq mm4,mm1 /*50 Copy dx*/ + punpckhwd mm5,mm6 /*47 Unpack dy*dy highs*/ + pmullw mm1,mm1 /*51 Multiply dx*dx*/ + movq [edx+1024],mm2 /*48 Store dy*dy lows*/ + pmullw mm4,mm3 /*52 Multiply dx*dy*/ + movq [edx+1032],mm5 /*49 Store dy*dy highs*/ + /***************dxx dxy dyy high part 50-79*********************************/ + pmullw mm3,mm3 /*53 Multiply dy*dy*/ + pxor mm6,mm6 /*54 Set to zero*/ + movq mm5,mm1 /*55 Copy dx*dx*/ + pcmpgtw mm6,mm1 /*56 Create unpack mask for dx*dx*/ + pxor mm2,mm2 /*61 Set to zero*/ + punpcklwd mm1,mm6 /*57 Unpack dx*dx lows*/ + movq mm0,mm4 /*62 Copy dx*dy*/ + punpckhwd mm5,mm6 /*58 Unpack dx*dx highs*/ + pcmpgtw mm2,mm4 /*63 Create unpack mask for dx*dy*/ + movq [edx+16],mm1 /*59 Store dx*dx lows*/ + punpcklwd mm4,mm2 /*64 Unpack dx*dy lows*/ + movq [edx+24],mm5 /*60 Store dx*dx highs*/ + punpckhwd mm0,mm2 /*65 Unpack dx*dy highs*/ + movq [edx+528],mm4 /*66 Store dx*dy lows*/ + pxor mm6,mm6 /*68 Set to zero*/ + movq [edx+536],mm0 /*67 Store dx*dy highs*/ + movq mm5,mm3 /*69 Copy dy*dy*/ + pcmpgtw mm6,mm3 /*70 Create unpack mask for dy*dy*/ + add eax,8 /*75*/ + punpcklwd mm3,mm6 /*71 Unpack dy*dy lows*/ + add ebx,8 /*76*/ + punpckhwd mm5,mm6 /*72 Unpack dy*dy highs*/ + add ecx,8 /*77*/ + movq [edx+1040],mm3 /*73 Store dy*dy lows*/ + /*Stall*/ + movq [edx+1048],mm5 /*74 Store dy*dy highs*/ + /*Stall*/ + add edx,32 /*78*/ + dec esi /*79*/ + jnz loopstart + + emms + } + +#else + int c; + int Ix,Iy; + + for(c=0;c<nc;c++) + { + Ix=(img[i][j+c-1]-img[i][j+c+1])>>1; + Iy=(img[i-1][j+c]-img[i+1][j+c])>>1; + dxx[c]=Ix*Ix; + dxx[c+128]=Ix*Iy; + dxx[c+256]=Iy*Iy; + } +#endif /*DB_USE_MMX*/ +} + +/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/ +inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width, + float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4, + float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4) +{ + int c; + float dx,dy; + float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4; + + for(c=0;c<chunk_width;c++) + { + dx=Ix0[c]; + dy=Iy0[c]; + Ixx0=dx*dx; + Ixy0=dx*dy; + Iyy0=dy*dy; + + dx=Ix1[c]; + dy=Iy1[c]; + Ixx1=dx*dx; + Ixy1=dx*dy; + Iyy1=dy*dy; + + dx=Ix2[c]; + dy=Iy2[c]; + Ixx2=dx*dx; + Ixy2=dx*dy; + Iyy2=dy*dy; + + dx=Ix3[c]; + dy=Iy3[c]; + Ixx3=dx*dx; + Ixy3=dx*dy; + Iyy3=dy*dy; + + dx=Ix4[c]; + dy=Iy4[c]; + Ixx4=dx*dx; + Ixy4=dx*dy; + Iyy4=dy*dy; + + /*Filter vertically*/ + gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4; + gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4; + gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4; + } +} + +/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/ +inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc) +{ +#ifdef DB_USE_MMX + int c; + + _asm + { + mov c,64 + mov eax,d0 + mov ebx,d1 + mov ecx,d2 + mov edx,d3 + mov edi,d4 + mov esi,g + +loopstart: + /***************dxx part 1-14*********************************/ + movq mm0,[eax] /*1 Get dxx0*/ + /*Stall*/ + movq mm1,[ebx] /*2 Get dxx1*/ + /*Stall*/ + movq mm2,[ecx] /*5 Get dxx2*/ + pslld mm1,2 /*3 Shift dxx1*/ + movq mm3,[edx] /*10 Get dxx3*/ + paddd mm0,mm1 /*4 Accumulate dxx1*/ + movq mm4,[eax+512] /*15 Get dxy0*/ + pslld mm2,1 /*6 Shift dxx2 1*/ + paddd mm0,mm2 /*7 Accumulate dxx2 1*/ + pslld mm2,1 /*8 Shift dxx2 2*/ + movq mm5,[ebx+512] /*16 Get dxy1*/ + paddd mm0,mm2 /*9 Accumulate dxx2 2*/ + pslld mm3,2 /*11 Shift dxx3*/ + /*Stall*/ + paddd mm0,mm3 /*12 Accumulate dxx3*/ + pslld mm5,2 /*17 Shift dxy1*/ + paddd mm0,[edi] /*13 Accumulate dxx4*/ + paddd mm4,mm5 /*18 Accumulate dxy1*/ + movq mm6,[ecx+512] /*19 Get dxy2*/ + /*Stall*/ + movq [esi],mm0 /*14 Store dxx sums*/ + /***************dxy part 15-28*********************************/ + pslld mm6,1 /*20 Shift dxy2 1*/ + paddd mm4,mm6 /*21 Accumulate dxy2 1*/ + pslld mm6,1 /*22 Shift dxy2 2*/ + movq mm0,[eax+1024] /*29 Get dyy0*/ + paddd mm4,mm6 /*23 Accumulate dxy2 2*/ + movq mm7,[edx+512] /*24 Get dxy3*/ + pslld mm7,2 /*25 Shift dxy3*/ + movq mm1,[ebx+1024] /*30 Get dyy1*/ + paddd mm4,mm7 /*26 Accumulate dxy3*/ + paddd mm4,[edi+512] /*27 Accumulate dxy4*/ + pslld mm1,2 /*31 Shift dyy1*/ + movq mm2,[ecx+1024] /*33 Get dyy2*/ + paddd mm0,mm1 /*32 Accumulate dyy1*/ + movq [esi+512],mm4 /*28 Store dxy sums*/ + pslld mm2,1 /*34 Shift dyy2 1*/ + /***************dyy part 29-49*********************************/ + + + movq mm3,[edx+1024] /*38 Get dyy3*/ + paddd mm0,mm2 /*35 Accumulate dyy2 1*/ + paddd mm0,[edi+1024] /*41 Accumulate dyy4*/ + pslld mm2,1 /*36 Shift dyy2 2*/ + paddd mm0,mm2 /*37 Accumulate dyy2 2*/ + pslld mm3,2 /*39 Shift dyy3*/ + paddd mm0,mm3 /*40 Accumulate dyy3*/ + add eax,8 /*43*/ + add ebx,8 /*44*/ + add ecx,8 /*45*/ + movq [esi+1024],mm0 /*42 Store dyy sums*/ + /*Stall*/ + add edx,8 /*46*/ + add edi,8 /*47*/ + add esi,8 /*48*/ + dec c /*49*/ + jnz loopstart + + emms + } + +#else + int c,dd; + + for(c=0;c<nc;c++) + { + /*Filter vertically*/ + dd=d2[c]; + g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c]; + + dd=d2[c+128]; + g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128]; + + dd=d2[c+256]; + g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256]; + } +#endif /*DB_USE_MMX*/ +} + +/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j +and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width +and starting at (i,j-2)*/ +inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width) +{ + float Gxx,Gxy,Gyy,det,trc; + int c; + + for(c=0;c<chunk_width;c++) + { + Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4]; + Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4]; + Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4]; + + det=Gxx*Gyy-Gxy*Gxy; + trc=Gxx+Gyy; + s[i][j+c]=det-0.06f*trc*trc; + } +} + +/*Filter g of length 128 in place with 14641. Output is shifted two steps +and of length 124*/ +inline void db_Filter14641_128_i(int *g,int nc) +{ +#ifdef DB_USE_MMX + int mask; + + mask=0xFFFFFFFF; + _asm + { + mov esi,31 + mov eax,g + + /*Get bitmask 00000000FFFFFFFF into mm7*/ + movd mm7,mask + + /*Warming iteration one 1-16********************/ + movq mm6,[eax] /*1 Load new data*/ + paddd mm0,mm6 /*2 Add 1* behind two steps*/ + movq mm2,mm6 /*3 Start with 1* in front two steps*/ + pslld mm6,1 /*4*/ + paddd mm1,mm6 /*5 Add 2* same place*/ + pslld mm6,1 /*6*/ + paddd mm1,mm6 /*7 Add 4* same place*/ + pshufw mm6,mm6,4Eh /*8 Swap the two double-words using bitmask 01001110=4Eh*/ + paddd mm1,mm6 /*9 Add 4* swapped*/ + movq mm5,mm6 /*10 Copy*/ + pand mm6,mm7 /*11 Get low double-word only*/ + paddd mm2,mm6 /*12 Add 4* in front one step*/ + pxor mm6,mm5 /*13 Get high double-word only*/ + paddd mm0,mm6 /*14 Add 4* behind one step*/ + movq mm0,mm1 /*15 Shift along*/ + movq mm1,mm2 /*16 Shift along*/ + /*Warming iteration two 17-32********************/ + movq mm4,[eax+8] /*17 Load new data*/ + paddd mm0,mm4 /*18 Add 1* behind two steps*/ + movq mm2,mm4 /*19 Start with 1* in front two steps*/ + pslld mm4,1 /*20*/ + paddd mm1,mm4 /*21 Add 2* same place*/ + pslld mm4,1 /*22*/ + paddd mm1,mm4 /*23 Add 4* same place*/ + pshufw mm4,mm4,4Eh /*24 Swap the two double-words using bitmask 01001110=4Eh*/ + paddd mm1,mm4 /*25 Add 4* swapped*/ + movq mm3,mm4 /*26 Copy*/ + pand mm4,mm7 /*27 Get low double-word only*/ + paddd mm2,mm4 /*28 Add 4* in front one step*/ + pxor mm4,mm3 /*29 Get high double-word only*/ + paddd mm0,mm4 /*30 Add 4* behind one step*/ + movq mm0,mm1 /*31 Shift along*/ + movq mm1,mm2 /*32 Shift along*/ + + /*Loop********************/ +loopstart: + /*First part of loop 33-47********/ + movq mm6,[eax+16] /*33 Load new data*/ + /*Stall*/ + paddd mm0,mm6 /*34 Add 1* behind two steps*/ + movq mm2,mm6 /*35 Start with 1* in front two steps*/ + movq mm4,[eax+24] /*48 Load new data*/ + pslld mm6,1 /*36*/ + paddd mm1,mm6 /*37 Add 2* same place*/ + pslld mm6,1 /*38*/ + paddd mm1,mm6 /*39 Add 4* same place*/ + pshufw mm6,mm6,4Eh /*40 Swap the two double-words using bitmask 01001110=4Eh*/ + paddd mm1,mm4 /*49 Add 1* behind two steps*/ + movq mm5,mm6 /*41 Copy*/ + paddd mm1,mm6 /*42 Add 4* swapped*/ + pand mm6,mm7 /*43 Get low double-word only*/ + paddd mm2,mm6 /*44 Add 4* in front one step*/ + pxor mm6,mm5 /*45 Get high double-word only*/ + paddd mm0,mm6 /*46 Add 4* behind one step*/ + movq mm6,mm4 /*50a Copy*/ + pslld mm4,1 /*51*/ + /*Stall*/ + movq [eax],mm0 /*47 Store result two steps behind*/ + /*Second part of loop 48-66********/ + movq mm0,mm6 /*50b Start with 1* in front two steps*/ + paddd mm2,mm4 /*52 Add 2* same place*/ + pslld mm4,1 /*53*/ + paddd mm2,mm4 /*54 Add 4* same place*/ + pshufw mm4,mm4,4Eh /*55 Swap the two double-words using bitmask 01001110=4Eh*/ + paddd mm2,mm4 /*56 Add 4* swapped*/ + movq mm3,mm4 /*57 Copy*/ + pand mm4,mm7 /*58 Get low double-word only*/ + /*Stall*/ + paddd mm0,mm4 /*59 Add 4* in front one step*/ + pxor mm4,mm3 /*60 Get high double-word only*/ + paddd mm1,mm4 /*61 Add 4* behind one step*/ + add eax,16 /*65*/ + dec esi /*66*/ + /*Stall*/ + movq [eax-8],mm1 /*62 Store result two steps behind*/ + movq mm1,mm0 /*63 Shift along*/ + movq mm0,mm2 /*64 Shift along*/ + jnz loopstart + + emms + } + +#else + int c; + + for(c=0;c<nc-4;c++) + { + g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4]; + } +#endif /*DB_USE_MMX*/ +} + +/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s +of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought. +s should be 16 byte aligned*/ +inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc) +{ + float k; + + k=0.06f; + + db_Filter14641_128_i(gxx,nc); + db_Filter14641_128_i(gxy,nc); + db_Filter14641_128_i(gyy,nc); + +#ifdef DB_USE_SIMD + + + _asm + { + mov esi,15 + mov eax,gxx + mov ebx,gxy + mov ecx,gyy + mov edx,s + + /*broadcast k to all positions of xmm7*/ + movss xmm7,k + shufps xmm7,xmm7,0 + + /*****Warm up 1-10**************************************/ + cvtpi2ps xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/ + /*Stall*/ + cvtpi2ps xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/ + movlhps xmm0,xmm0 /*2 Move them to the high double-word*/ + cvtpi2ps xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/ + movlhps xmm1,xmm1 /*5 Move them to the high double-word*/ + cvtpi2ps xmm0,[eax] /*3 Convert two integers into floating point of low double-word*/ + movlhps xmm2,xmm2 /*8 Move them to the high double-word*/ + cvtpi2ps xmm1,[ebx] /*6 Convert two integers into floating point of low double-word*/ + movaps xmm3,xmm0 /*10 Copy Cxx*/ + cvtpi2ps xmm2,[ecx] /*9 Convert two integers into floating point of low double-word*/ + /*Stall*/ +loopstart: + /*****First part of loop 11-18***********************/ + mulps xmm0,xmm2 /*11 Multiply to get Gxx*Gyy*/ + addps xmm2,xmm3 /*12 Add to get Gxx+Gyy*/ + cvtpi2ps xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/ + mulps xmm1,xmm1 /*13 Multiply to get Gxy*Gxy*/ + mulps xmm2,xmm2 /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ + movlhps xmm4,xmm4 /*20 Move them to the high double-word*/ + cvtpi2ps xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/ + /*Stall*/ + subps xmm0,xmm1 /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/ + mulps xmm2,xmm7 /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ + cvtpi2ps xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/ + /*Stall*/ + movlhps xmm5,xmm5 /*23 Move them to the high double-word*/ + /*Stall*/ + cvtpi2ps xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/ + subps xmm0,xmm2 /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ + cvtpi2ps xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/ + /*Stall*/ + movaps [edx],xmm0 /*18 Store*/ + /*****Second part of loop 26-40***********************/ + movlhps xmm6,xmm6 /*26 Move them to the high double-word*/ + cvtpi2ps xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/ + movaps xmm3,xmm4 /*28 Copy Cxx*/ + mulps xmm4,xmm6 /*29 Multiply to get Gxx*Gyy*/ + addps xmm6,xmm3 /*30 Add to get Gxx+Gyy*/ + cvtpi2ps xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/ + mulps xmm5,xmm5 /*31 Multiply to get Gxy*Gxy*/ + cvtpi2ps xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/ + mulps xmm6,xmm6 /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ + cvtpi2ps xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/ + movlhps xmm0,xmm0 /*(2 Next) Move them to the high double-word*/ + subps xmm4,xmm5 /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/ + movlhps xmm1,xmm1 /*(5 Next) Move them to the high double-word*/ + cvtpi2ps xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/ + mulps xmm6,xmm7 /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ + cvtpi2ps xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/ + movlhps xmm2,xmm2 /*(8 Next) Move them to the high double-word*/ + movaps xmm3,xmm0 /*(10 Next) Copy Cxx*/ + add eax,32 /*37*/ + subps xmm4,xmm6 /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ + add ebx,32 /*38*/ + cvtpi2ps xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/ + /*Stall*/ + movaps [edx+16],xmm4 /*36 Store*/ + /*Stall*/ + add ecx,32 /*39*/ + add edx,32 /*40*/ + dec esi /*41*/ + jnz loopstart + + /****Cool down***************/ + mulps xmm0,xmm2 /*Multiply to get Gxx*Gyy*/ + addps xmm2,xmm3 /*Add to get Gxx+Gyy*/ + mulps xmm1,xmm1 /*Multiply to get Gxy*Gxy*/ + mulps xmm2,xmm2 /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ + subps xmm0,xmm1 /*Subtract to get Gxx*Gyy-Gxy*Gxy*/ + mulps xmm2,xmm7 /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ + subps xmm0,xmm2 /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ + movaps [edx],xmm0 /*Store*/ + } + +#else + float Gxx,Gxy,Gyy,det,trc; + int c; + + //for(c=0;c<124;c++) + for(c=0;c<nc-4;c++) + { + Gxx=(float)gxx[c]; + Gxy=(float)gxy[c]; + Gyy=(float)gyy[c]; + + det=Gxx*Gyy-Gxy*Gxy; + trc=Gxx+Gyy; + s[c]=det-k*trc*trc; + } +#endif /*DB_USE_SIMD*/ +} + +/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and +store it into the corresponding region of s. left and top have to be at least 3 and +right and bottom have to be at most width-4,height-4*/ +inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom, + /*temp should point to at least + 13*(right-left+5) of allocated memory*/ + float *temp) +{ + float *Ix[5],*Iy[5]; + float *gxx,*gxy,*gyy; + int i,chunk_width,chunk_width_p4; + + chunk_width=right-left+1; + chunk_width_p4=chunk_width+4; + gxx=temp; + gxy=gxx+chunk_width_p4; + gyy=gxy+chunk_width_p4; + for(i=0;i<5;i++) + { + Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4); + Iy[i]=Ix[i]+chunk_width_p4; + } + + /*Fill four rows of the wrap-around derivative buffers*/ + for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4); + + /*For each output row*/ + for(i=top;i<=bottom;i++) + { + /*Step the derivative buffers*/ + db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4); + + /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/ + db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4, + Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5], + Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]); + + /*Filter gxx,gxy,gyy horizontally and compute corner response s*/ + db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width); + } +} + +/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and +store it into the corresponding region of s. left and top have to be at least 3 and +right and bottom have to be at most width-4,height-4. The left of the region in s should +be 16 byte aligned*/ +inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom, + /*temp should point to at least + 18*128 of allocated memory*/ + int *temp, int nc) +{ + int *Ixx[5],*Ixy[5],*Iyy[5]; + int *gxx,*gxy,*gyy; + int i; + + gxx=temp; + gxy=gxx+128; + gyy=gxy+128; + for(i=0;i<5;i++) + { + Ixx[i]=gyy+(3*i+1)*128; + Ixy[i]=gyy+(3*i+2)*128; + Iyy[i]=gyy+(3*i+3)*128; + } + + /*Fill four rows of the wrap-around derivative buffers*/ + for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc); + + /*For each output row*/ + for(i=top;i<=bottom;i++) + { + /*Step the derivative buffers*/ + db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc); + + /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/ + db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc); + + /*Filter gxx,gxy,gyy horizontally and compute corner response s*/ + db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc); + } + +} + +/*Compute Harris corner strength of img. Strength is returned for the region +with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the +same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high +for a meaningful result*/ +void db_HarrisStrength_f(float **s,const float * const *img,int w,int h, + /*temp should point to at least + 13*(chunk_width+4) of allocated memory*/ + float *temp, + int chunk_width) +{ + int x,next_x,last,right; + + last=w-4; + for(x=3;x<=last;x=next_x) + { + next_x=x+chunk_width; + right=next_x-1; + if(right>last) right=last; + /*Compute the Harris strength of a chunk*/ + db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp); + } +} + +/*Compute Harris corner strength of img. Strength is returned for the region +with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the +same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high +for a meaningful result.Moreover, the image should be overallocated by 256 bytes. +s[i][3] should by 16 byte aligned for any i*/ +void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h, + /*temp should point to at least + 18*128 of allocated memory*/ + int *temp) +{ + int x,next_x,last; + int nc; + + last=w-4; + for(x=3;x<=last;x=next_x) + { + next_x=x+124; + + // mayban: to revert to the original full chunks state, change the line below to: nc = 128; + nc = db_mini(128,last-x+1); + //nc = 128; + + /*Compute the Harris strength of a chunk*/ + db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc); + } +} + +inline float db_Max_128Aligned16_f(float *v) +{ +#ifdef DB_USE_SIMD + float back; + + _asm + { + mov eax,v + + /*Chunk1*/ + movaps xmm0,[eax] + movaps xmm1,[eax+16] + movaps xmm2,[eax+32] + movaps xmm3,[eax+48] + movaps xmm4,[eax+64] + movaps xmm5,[eax+80] + movaps xmm6,[eax+96] + movaps xmm7,[eax+112] + + /*Chunk2*/ + maxps xmm0,[eax+128] + maxps xmm1,[eax+144] + maxps xmm2,[eax+160] + maxps xmm3,[eax+176] + maxps xmm4,[eax+192] + maxps xmm5,[eax+208] + maxps xmm6,[eax+224] + maxps xmm7,[eax+240] + + /*Chunk3*/ + maxps xmm0,[eax+256] + maxps xmm1,[eax+272] + maxps xmm2,[eax+288] + maxps xmm3,[eax+304] + maxps xmm4,[eax+320] + maxps xmm5,[eax+336] + maxps xmm6,[eax+352] + maxps xmm7,[eax+368] + + /*Chunk4*/ + maxps xmm0,[eax+384] + maxps xmm1,[eax+400] + maxps xmm2,[eax+416] + maxps xmm3,[eax+432] + maxps xmm4,[eax+448] + maxps xmm5,[eax+464] + maxps xmm6,[eax+480] + maxps xmm7,[eax+496] + + /*Collect*/ + maxps xmm0,xmm1 + maxps xmm2,xmm3 + maxps xmm4,xmm5 + maxps xmm6,xmm7 + maxps xmm0,xmm2 + maxps xmm4,xmm6 + maxps xmm0,xmm4 + movhlps xmm1,xmm0 + maxps xmm0,xmm1 + shufps xmm1,xmm0,1 + maxps xmm0,xmm1 + movss back,xmm0 + } + + return(back); +#else + float val,max_val; + float *p,*stop_p; + max_val=v[0]; + for(p=v+1,stop_p=v+128;p!=stop_p;) + { + val= *p++; + if(val>max_val) max_val=val; + } + return(max_val); +#endif /*DB_USE_SIMD*/ +} + +inline float db_Max_64Aligned16_f(float *v) +{ +#ifdef DB_USE_SIMD + float back; + + _asm + { + mov eax,v + + /*Chunk1*/ + movaps xmm0,[eax] + movaps xmm1,[eax+16] + movaps xmm2,[eax+32] + movaps xmm3,[eax+48] + movaps xmm4,[eax+64] + movaps xmm5,[eax+80] + movaps xmm6,[eax+96] + movaps xmm7,[eax+112] + + /*Chunk2*/ + maxps xmm0,[eax+128] + maxps xmm1,[eax+144] + maxps xmm2,[eax+160] + maxps xmm3,[eax+176] + maxps xmm4,[eax+192] + maxps xmm5,[eax+208] + maxps xmm6,[eax+224] + maxps xmm7,[eax+240] + + /*Collect*/ + maxps xmm0,xmm1 + maxps xmm2,xmm3 + maxps xmm4,xmm5 + maxps xmm6,xmm7 + maxps xmm0,xmm2 + maxps xmm4,xmm6 + maxps xmm0,xmm4 + movhlps xmm1,xmm0 + maxps xmm0,xmm1 + shufps xmm1,xmm0,1 + maxps xmm0,xmm1 + movss back,xmm0 + } + + return(back); +#else + float val,max_val; + float *p,*stop_p; + max_val=v[0]; + for(p=v+1,stop_p=v+64;p!=stop_p;) + { + val= *p++; + if(val>max_val) max_val=val; + } + return(max_val); +#endif /*DB_USE_SIMD*/ +} + +inline float db_Max_32Aligned16_f(float *v) +{ +#ifdef DB_USE_SIMD + float back; + + _asm + { + mov eax,v + + /*Chunk1*/ + movaps xmm0,[eax] + movaps xmm1,[eax+16] + movaps xmm2,[eax+32] + movaps xmm3,[eax+48] + movaps xmm4,[eax+64] + movaps xmm5,[eax+80] + movaps xmm6,[eax+96] + movaps xmm7,[eax+112] + + /*Collect*/ + maxps xmm0,xmm1 + maxps xmm2,xmm3 + maxps xmm4,xmm5 + maxps xmm6,xmm7 + maxps xmm0,xmm2 + maxps xmm4,xmm6 + maxps xmm0,xmm4 + movhlps xmm1,xmm0 + maxps xmm0,xmm1 + shufps xmm1,xmm0,1 + maxps xmm0,xmm1 + movss back,xmm0 + } + + return(back); +#else + float val,max_val; + float *p,*stop_p; + max_val=v[0]; + for(p=v+1,stop_p=v+32;p!=stop_p;) + { + val= *p++; + if(val>max_val) max_val=val; + } + return(max_val); +#endif /*DB_USE_SIMD*/ +} + +inline float db_Max_16Aligned16_f(float *v) +{ +#ifdef DB_USE_SIMD + float back; + + _asm + { + mov eax,v + + /*Chunk1*/ + movaps xmm0,[eax] + movaps xmm1,[eax+16] + movaps xmm2,[eax+32] + movaps xmm3,[eax+48] + + /*Collect*/ + maxps xmm0,xmm1 + maxps xmm2,xmm3 + maxps xmm0,xmm2 + movhlps xmm1,xmm0 + maxps xmm0,xmm1 + shufps xmm1,xmm0,1 + maxps xmm0,xmm1 + movss back,xmm0 + } + + return(back); +#else + float val,max_val; + float *p,*stop_p; + max_val=v[0]; + for(p=v+1,stop_p=v+16;p!=stop_p;) + { + val= *p++; + if(val>max_val) max_val=val; + } + return(max_val); +#endif /*DB_USE_SIMD*/ +} + +inline float db_Max_8Aligned16_f(float *v) +{ +#ifdef DB_USE_SIMD + float back; + + _asm + { + mov eax,v + + /*Chunk1*/ + movaps xmm0,[eax] + movaps xmm1,[eax+16] + + /*Collect*/ + maxps xmm0,xmm1 + movhlps xmm1,xmm0 + maxps xmm0,xmm1 + shufps xmm1,xmm0,1 + maxps xmm0,xmm1 + movss back,xmm0 + } + + return(back); +#else + float val,max_val; + float *p,*stop_p; + max_val=v[0]; + for(p=v+1,stop_p=v+8;p!=stop_p;) + { + val= *p++; + if(val>max_val) max_val=val; + } + return(max_val); +#endif /*DB_USE_SIMD*/ +} + +inline float db_Max_Aligned16_f(float *v,int size) +{ + float val,max_val; + float *stop_v; + + max_val=v[0]; + for(;size>=128;size-=128) + { + val=db_Max_128Aligned16_f(v); + v+=128; + if(val>max_val) max_val=val; + } + if(size&64) + { + val=db_Max_64Aligned16_f(v); + v+=64; + if(val>max_val) max_val=val; + } + if(size&32) + { + val=db_Max_32Aligned16_f(v); + v+=32; + if(val>max_val) max_val=val; + } + if(size&16) + { + val=db_Max_16Aligned16_f(v); + v+=16; + if(val>max_val) max_val=val; + } + if(size&8) + { + val=db_Max_8Aligned16_f(v); + v+=8; + if(val>max_val) max_val=val; + } + if(size&7) + { + for(stop_v=v+(size&7);v!=stop_v;) + { + val= *v++; + if(val>max_val) max_val=val; + } + } + + return(max_val); +} + +/*Find maximum value of img in the region starting at (left,top) +and with width w and height h. img[left] should be 16 byte aligned*/ +float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h) +{ + float val,max_val; + int i,stop_i; + + if(w && h) + { + stop_i=top+h; + max_val=img[top][left]; + + for(i=top;i<stop_i;i++) + { + val=db_Max_Aligned16_f(img[i]+left,w); + if(val>max_val) max_val=val; + } + return(max_val); + } + return(0.0); +} + +inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2) +{ +#ifdef DB_USE_SIMD + _asm + { + mov eax,v1 + mov ebx,v2 + mov ecx,m + + /*Chunk1*/ + movaps xmm0,[eax] + movaps xmm1,[eax+16] + movaps xmm2,[eax+32] + movaps xmm3,[eax+48] + movaps xmm4,[eax+64] + movaps xmm5,[eax+80] + movaps xmm6,[eax+96] + movaps xmm7,[eax+112] + maxps xmm0,[ebx] + maxps xmm1,[ebx+16] + maxps xmm2,[ebx+32] + maxps xmm3,[ebx+48] + maxps xmm4,[ebx+64] + maxps xmm5,[ebx+80] + maxps xmm6,[ebx+96] + maxps xmm7,[ebx+112] + movaps [ecx],xmm0 + movaps [ecx+16],xmm1 + movaps [ecx+32],xmm2 + movaps [ecx+48],xmm3 + movaps [ecx+64],xmm4 + movaps [ecx+80],xmm5 + movaps [ecx+96],xmm6 + movaps [ecx+112],xmm7 + + /*Chunk2*/ + movaps xmm0,[eax+128] + movaps xmm1,[eax+144] + movaps xmm2,[eax+160] + movaps xmm3,[eax+176] + movaps xmm4,[eax+192] + movaps xmm5,[eax+208] + movaps xmm6,[eax+224] + movaps xmm7,[eax+240] + maxps xmm0,[ebx+128] + maxps xmm1,[ebx+144] + maxps xmm2,[ebx+160] + maxps xmm3,[ebx+176] + maxps xmm4,[ebx+192] + maxps xmm5,[ebx+208] + maxps xmm6,[ebx+224] + maxps xmm7,[ebx+240] + movaps [ecx+128],xmm0 + movaps [ecx+144],xmm1 + movaps [ecx+160],xmm2 + movaps [ecx+176],xmm3 + movaps [ecx+192],xmm4 + movaps [ecx+208],xmm5 + movaps [ecx+224],xmm6 + movaps [ecx+240],xmm7 + + /*Chunk3*/ + movaps xmm0,[eax+256] + movaps xmm1,[eax+272] + movaps xmm2,[eax+288] + movaps xmm3,[eax+304] + movaps xmm4,[eax+320] + movaps xmm5,[eax+336] + movaps xmm6,[eax+352] + movaps xmm7,[eax+368] + maxps xmm0,[ebx+256] + maxps xmm1,[ebx+272] + maxps xmm2,[ebx+288] + maxps xmm3,[ebx+304] + maxps xmm4,[ebx+320] + maxps xmm5,[ebx+336] + maxps xmm6,[ebx+352] + maxps xmm7,[ebx+368] + movaps [ecx+256],xmm0 + movaps [ecx+272],xmm1 + movaps [ecx+288],xmm2 + movaps [ecx+304],xmm3 + movaps [ecx+320],xmm4 + movaps [ecx+336],xmm5 + movaps [ecx+352],xmm6 + movaps [ecx+368],xmm7 + + /*Chunk4*/ + movaps xmm0,[eax+384] + movaps xmm1,[eax+400] + movaps xmm2,[eax+416] + movaps xmm3,[eax+432] + movaps xmm4,[eax+448] + movaps xmm5,[eax+464] + movaps xmm6,[eax+480] + movaps xmm7,[eax+496] + maxps xmm0,[ebx+384] + maxps xmm1,[ebx+400] + maxps xmm2,[ebx+416] + maxps xmm3,[ebx+432] + maxps xmm4,[ebx+448] + maxps xmm5,[ebx+464] + maxps xmm6,[ebx+480] + maxps xmm7,[ebx+496] + movaps [ecx+384],xmm0 + movaps [ecx+400],xmm1 + movaps [ecx+416],xmm2 + movaps [ecx+432],xmm3 + movaps [ecx+448],xmm4 + movaps [ecx+464],xmm5 + movaps [ecx+480],xmm6 + movaps [ecx+496],xmm7 + } +#else + int i; + float a,b; + for(i=0;i<128;i++) + { + a=v1[i]; + b=v2[i]; + if(a>=b) m[i]=a; + else m[i]=b; + } +#endif /*DB_USE_SIMD*/ +} + +inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2) +{ +#ifdef DB_USE_SIMD + _asm + { + mov eax,v1 + mov ebx,v2 + mov ecx,m + + /*Chunk1*/ + movups xmm0,[eax] + movups xmm1,[eax+16] + movups xmm2,[eax+32] + movups xmm3,[eax+48] + movups xmm4,[eax+64] + movups xmm5,[eax+80] + movups xmm6,[eax+96] + movups xmm7,[eax+112] + maxps xmm0,[ebx] + maxps xmm1,[ebx+16] + maxps xmm2,[ebx+32] + maxps xmm3,[ebx+48] + maxps xmm4,[ebx+64] + maxps xmm5,[ebx+80] + maxps xmm6,[ebx+96] + maxps xmm7,[ebx+112] + movaps [ecx],xmm0 + movaps [ecx+16],xmm1 + movaps [ecx+32],xmm2 + movaps [ecx+48],xmm3 + movaps [ecx+64],xmm4 + movaps [ecx+80],xmm5 + movaps [ecx+96],xmm6 + movaps [ecx+112],xmm7 + + /*Chunk2*/ + movups xmm0,[eax+128] + movups xmm1,[eax+144] + movups xmm2,[eax+160] + movups xmm3,[eax+176] + movups xmm4,[eax+192] + movups xmm5,[eax+208] + movups xmm6,[eax+224] + movups xmm7,[eax+240] + maxps xmm0,[ebx+128] + maxps xmm1,[ebx+144] + maxps xmm2,[ebx+160] + maxps xmm3,[ebx+176] + maxps xmm4,[ebx+192] + maxps xmm5,[ebx+208] + maxps xmm6,[ebx+224] + maxps xmm7,[ebx+240] + movaps [ecx+128],xmm0 + movaps [ecx+144],xmm1 + movaps [ecx+160],xmm2 + movaps [ecx+176],xmm3 + movaps [ecx+192],xmm4 + movaps [ecx+208],xmm5 + movaps [ecx+224],xmm6 + movaps [ecx+240],xmm7 + + /*Chunk3*/ + movups xmm0,[eax+256] + movups xmm1,[eax+272] + movups xmm2,[eax+288] + movups xmm3,[eax+304] + movups xmm4,[eax+320] + movups xmm5,[eax+336] + movups xmm6,[eax+352] + movups xmm7,[eax+368] + maxps xmm0,[ebx+256] + maxps xmm1,[ebx+272] + maxps xmm2,[ebx+288] + maxps xmm3,[ebx+304] + maxps xmm4,[ebx+320] + maxps xmm5,[ebx+336] + maxps xmm6,[ebx+352] + maxps xmm7,[ebx+368] + movaps [ecx+256],xmm0 + movaps [ecx+272],xmm1 + movaps [ecx+288],xmm2 + movaps [ecx+304],xmm3 + movaps [ecx+320],xmm4 + movaps [ecx+336],xmm5 + movaps [ecx+352],xmm6 + movaps [ecx+368],xmm7 + + /*Chunk4*/ + movups xmm0,[eax+384] + movups xmm1,[eax+400] + movups xmm2,[eax+416] + movups xmm3,[eax+432] + movups xmm4,[eax+448] + movups xmm5,[eax+464] + movups xmm6,[eax+480] + movups xmm7,[eax+496] + maxps xmm0,[ebx+384] + maxps xmm1,[ebx+400] + maxps xmm2,[ebx+416] + maxps xmm3,[ebx+432] + maxps xmm4,[ebx+448] + maxps xmm5,[ebx+464] + maxps xmm6,[ebx+480] + maxps xmm7,[ebx+496] + movaps [ecx+384],xmm0 + movaps [ecx+400],xmm1 + movaps [ecx+416],xmm2 + movaps [ecx+432],xmm3 + movaps [ecx+448],xmm4 + movaps [ecx+464],xmm5 + movaps [ecx+480],xmm6 + movaps [ecx+496],xmm7 + } +#else + int i; + float a,b; + for(i=0;i<128;i++) + { + a=v1[i]; + b=v2[i]; + if(a>=b) m[i]=a; + else m[i]=b; + } +#endif /*DB_USE_SIMD*/ +} + +/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and +stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row. +The input s should be of width at least 128, and exist for 2 pixels outside the specified region. +s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/ +inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom, + /*temp should point to at least + 6*132 floats of 16-byte-aligned allocated memory*/ + float *temp) +{ +#ifdef DB_USE_SIMD + int i,lm2; + float *two[4]; + float *four,*five; + + lm2=left-2; + + /*Set pointers to pre-allocated memory*/ + four=temp; + five=four+132; + for(i=0;i<4;i++) + { + two[i]=five+(i+1)*132; + } + + /*Set rests of four and five to zero to avoid + floating point exceptions*/ + for(i=129;i<132;i++) + { + four[i]=0.0; + five[i]=0.0; + } + + /*Fill three rows of the wrap-around max buffers*/ + for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2); + + /*For each output row*/ + for(;i<=bottom;i++) + { + /*Compute max of the lowest pair of rows in the five row window*/ + db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2); + /*Compute max of the lowest and highest pair of rows in the five row window*/ + db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]); + /*Compute max of all rows*/ + db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]); + /*Compute max of 2x5 chunks*/ + db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five); + /*Compute max of pairs of 2x5 chunks*/ + db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five); + /*Compute max of pairs of 5x5 except middle*/ + db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five); + } + +#else + int i,j,right; + float sv; + + right=left+128; + for(i=top;i<=bottom;i++) for(j=left;j<right;j++) + { + sv=s[i][j]; + + if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] && + sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] && + sv>s[ i][j-2] && sv>s[ i][j-1] && sv>s[ i][j+1] && sv>s[ i][j+2] && + sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] && + sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2]) + { + sf[i][j-2]=0.0; + } + else sf[i][j-2]=sv; + } +#endif /*DB_USE_SIMD*/ +} + +/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and +stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels +outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned. +Top must be at least 3. Reading and writing from and to the input and output images is done +as if the region had a width equal to a multiple of 124. If this is not the case, the images +should be over-allocated and the input cleared for a sufficient region*/ +void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom, + /*temp should point to at least + 6*132 floats of 16-byte-aligned allocated memory*/ + float *temp) +{ + int x,next_x; + + for(x=left;x<=right;x=next_x) + { + next_x=x+124; + db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp); + } +} + +/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp +which should point to space of at least as many positions as there are pixels in the chunk*/ +inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp) +{ + int i,j,nr; + float s; + + nr=0; + for(i=top;i<=bottom;i++) for(j=left;j<=right;j++) + { + s=strength[i][j]; + + if(s>=threshold && + s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] && + s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] && + s>strength[ i][j-2] && s>strength[ i][j-1] && s>strength[ i][j+1] && s>strength[ i][j+2] && + s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] && + s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2]) + { + x_temp[nr]=(double) j; + y_temp[nr]=(double) i; + s_temp[nr]=(double) s; + nr++; + } + } + return(nr); +} + + +//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ) +inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys) +{ + int x = (int) xd; + int y = (int) yd; + + float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1]; + float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x]; + float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0; + + float denom = (fxx * fyy - fxy * fxy) * (float) 2.0; + + xs = xd; + ys = yd; + + if ( db_absf(denom) <= FLT_EPSILON ) + { + return; + } + else + { + float fx = strength[y][x+1] - strength[y][x-1]; + float fy = strength[y+1][x] - strength[y-1][x]; + + float dx = (fyy * fx - fxy * fy) / denom; + float dy = (fxx * fy - fxy * fx) / denom; + + if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 ) + { + return; + } + else + { + xs -= dx; + ys -= dy; + } + } + + return; +} + +/*Extract corners from the image part from (left,top) to (right,bottom). +Store in x and y, extracting at most satnr corners in each block of size (bw,bh). +The pointer temp_d should point to at least 5*bw*bh positions. +area_factor holds how many corners max to extract per 10000 pixels*/ +void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom, + int bw,int bh,unsigned long area_factor, + float threshold,double *temp_d, + double *x_coord,double *y_coord,int *nr_corners) +{ + double *x_temp,*y_temp,*s_temp,*select_temp; + double loc_thresh; + unsigned long bwbh,area,saturation; + int x,next_x,last_x; + int y,next_y,last_y; + int nr,nr_points,i,stop; + + bwbh=bw*bh; + x_temp=temp_d; + y_temp=x_temp+bwbh; + s_temp=y_temp+bwbh; + select_temp=s_temp+bwbh; + +#ifdef DB_SUB_PIXEL + // subpixel processing may sometimes push the corner ourside the real border + // increasing border size: + left++; + top++; + bottom--; + right--; +#endif /*DB_SUB_PIXEL*/ + + nr_points=0; + for(y=top;y<=bottom;y=next_y) + { + next_y=y+bh; + last_y=next_y-1; + if(last_y>bottom) last_y=bottom; + for(x=left;x<=right;x=next_x) + { + next_x=x+bw; + last_x=next_x-1; + if(last_x>right) last_x=right; + + area=(last_x-x+1)*(last_y-y+1); + saturation=(area*area_factor)/10000; + nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp); + if(nr) + { + if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp); + else loc_thresh=threshold; + + stop=nr_points+saturation; + for(i=0;(i<nr)&&(nr_points<stop);i++) + { + if(s_temp[i]>=loc_thresh) + { + #ifdef DB_SUB_PIXEL + db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]); + #else + x_coord[nr_points]=x_temp[i]; + y_coord[nr_points]=y_temp[i]; + #endif + + nr_points++; + } + } + } + } + } + *nr_corners=nr_points; +} + +db_CornerDetector_f::db_CornerDetector_f() +{ + m_w=0; m_h=0; +} + +db_CornerDetector_f::~db_CornerDetector_f() +{ + Clean(); +} + +void db_CornerDetector_f::Clean() +{ + if(m_w!=0) + { + delete [] m_temp_f; + delete [] m_temp_d; + db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h); + } + m_w=0; m_h=0; +} + +unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners, + int nr_horizontal_blocks,int nr_vertical_blocks, + double absolute_threshold,double relative_threshold) +{ + int chunkwidth=208; + int block_width,block_height; + unsigned long area_factor; + int active_width,active_height; + + active_width=db_maxi(1,im_width-10); + active_height=db_maxi(1,im_height-10); + block_width=db_maxi(1,active_width/nr_horizontal_blocks); + block_height=db_maxi(1,active_height/nr_vertical_blocks); + + area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/ + (((double)active_width)*((double)active_height))))); + + return(Start(im_width,im_height,block_width,block_height,area_factor, + absolute_threshold,relative_threshold,chunkwidth)); +} + +unsigned long db_CornerDetector_f::Start(int im_width,int im_height, + int block_width,int block_height,unsigned long area_factor, + double absolute_threshold,double relative_threshold,int chunkwidth) +{ + Clean(); + + m_w=im_width; + m_h=im_height; + m_cw=chunkwidth; + m_bw=block_width; + m_bh=block_height; + m_area_factor=area_factor; + m_r_thresh=relative_threshold; + m_a_thresh=absolute_threshold; + m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000); + + m_temp_f=new float[13*(m_cw+4)]; + m_temp_d=new double[5*m_bw*m_bh]; + m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h); + + return(m_max_nr); +} + +void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const +{ + float max_val,threshold; + + db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw); + + if(m_r_thresh) + { + max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6); + threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh); + } + else threshold= (float) m_a_thresh; + + db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold, + m_temp_d,x_coord,y_coord,nr_corners); +} + +db_CornerDetector_u::db_CornerDetector_u() +{ + m_w=0; m_h=0; +} + +db_CornerDetector_u::~db_CornerDetector_u() +{ + Clean(); +} + +db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd) +{ + Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor, + cd.m_a_thresh, cd.m_r_thresh); +} + +db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd) +{ + if ( this == &cd ) return *this; + + Clean(); + + Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor, + cd.m_a_thresh, cd.m_r_thresh); + + return *this; +} + +void db_CornerDetector_u::Clean() +{ + if(m_w!=0) + { + delete [] m_temp_i; + delete [] m_temp_d; + db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h); + } + m_w=0; m_h=0; +} + +unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners, + int nr_horizontal_blocks,int nr_vertical_blocks, + double absolute_threshold,double relative_threshold) +{ + int block_width,block_height; + unsigned long area_factor; + int active_width,active_height; + + active_width=db_maxi(1,im_width-10); + active_height=db_maxi(1,im_height-10); + block_width=db_maxi(1,active_width/nr_horizontal_blocks); + block_height=db_maxi(1,active_height/nr_vertical_blocks); + + area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/ + (((double)active_width)*((double)active_height))))); + + return(Start(im_width,im_height,block_width,block_height,area_factor, + 16.0*absolute_threshold,relative_threshold)); +} + +unsigned long db_CornerDetector_u::Start(int im_width,int im_height, + int block_width,int block_height,unsigned long area_factor, + double absolute_threshold,double relative_threshold) +{ + Clean(); + + m_w=im_width; + m_h=im_height; + m_bw=block_width; + m_bh=block_height; + m_area_factor=area_factor; + m_r_thresh=relative_threshold; + m_a_thresh=absolute_threshold; + m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000); + + m_temp_i=new int[18*128]; + m_temp_d=new double[5*m_bw*m_bh]; + m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h); + + return(m_max_nr); +} + +void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners, + const unsigned char * const *msk, unsigned char fgnd) const +{ + float max_val,threshold; + + db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i); + + + if(m_r_thresh) + { + max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6); + threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh); + } + else threshold= (float) m_a_thresh; + + db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold, + m_temp_d,x_coord,y_coord,nr_corners); + + + if ( msk ) + { + int nr_corners_mask=0; + + for ( int i = 0; i < *nr_corners; ++i) + { + int cor_x = db_roundi(*(x_coord+i)); + int cor_y = db_roundi(*(y_coord+i)); + if ( msk[cor_y][cor_x] == fgnd ) + { + x_coord[nr_corners_mask] = x_coord[i]; + y_coord[nr_corners_mask] = y_coord[i]; + nr_corners_mask++; + } + } + *nr_corners = nr_corners_mask; + } +} + +void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) { + if ( m_w!=0 ) + db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh), + m_temp_d,x_coord,y_coord,nr_corners); +} + diff --git a/jni/feature_stab/db_vlvm/db_feature_detection.h b/jni/feature_stab/db_vlvm/db_feature_detection.h new file mode 100644 index 000000000..68ffcc9ad --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_feature_detection.h @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*$Id: db_feature_detection.h,v 1.3 2011/06/17 14:03:30 mbansal Exp $*/ + +#ifndef DB_FEATURE_DETECTION_H +#define DB_FEATURE_DETECTION_H + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup FeatureDetection Feature Detection + */ +#include "db_utilities.h" +#include "db_utilities_constants.h" +#include <stdlib.h> //for NULL + +/*! + * \class db_CornerDetector_f + * \ingroup FeatureDetection + * \brief Harris corner detector for float images. + * + * This class performs Harris corner extraction on *float* images managed + * with functions in \ref LMImageBasicUtilities. + */ +class DB_API db_CornerDetector_f +{ +public: + db_CornerDetector_f(); + ~db_CornerDetector_f(); + + /*! + * Set parameters and pre-allocate memory. Return an upper bound + * on the number of corners detected in one frame. + * \param im_width width + * \param im_height height + * \param target_nr_corners + * \param nr_horizontal_blocks + * \param nr_vertical_blocks + * \param absolute_threshold + * \param relative_threshold + */ + unsigned long Init(int im_width,int im_height, + int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS, + int nr_horizontal_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS, + int nr_vertical_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS, + double absolute_threshold=DB_DEFAULT_ABS_CORNER_THRESHOLD, + double relative_threshold=DB_DEFAULT_REL_CORNER_THRESHOLD); + + /*! + * Detect the corners. + * x_coord and y_coord should be pre-allocated arrays of length returned by Init(). + * \param img row array pointer + * \param x_coord corner locations + * \param y_coord corner locations + * \param nr_corners actual number of corners computed + */ + void DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const; + void SetAbsoluteThreshold(double a_thresh) { m_a_thresh = a_thresh; }; + void SetRelativeThreshold(double r_thresh) { m_r_thresh = r_thresh; }; +protected: + void Clean(); + unsigned long Start(int im_width,int im_height, + int block_width,int block_height,unsigned long area_factor, + double absolute_threshold,double relative_threshold,int chunkwidth); + + int m_w,m_h,m_cw,m_bw,m_bh; + /*Area factor holds the maximum number of corners to detect + per 10000 pixels*/ + unsigned long m_area_factor,m_max_nr; + double m_a_thresh,m_r_thresh; + float *m_temp_f; + double *m_temp_d; + float **m_strength,*m_strength_mem; +}; +/*! + * \class db_CornerDetector_u + * \ingroup FeatureDetection + * \brief Harris corner detector for byte images. + * + * This class performs Harris corner extraction on *byte* images managed + * with functions in \ref LMImageBasicUtilities. + */ +class DB_API db_CornerDetector_u +{ +public: + db_CornerDetector_u(); + virtual ~db_CornerDetector_u(); + + /*! + Copy ctor duplicates settings. + Memory is not copied. + */ + db_CornerDetector_u(const db_CornerDetector_u& cd); + /*! + Assignment optor duplicates settings. + Memory not copied. + */ + db_CornerDetector_u& operator=(const db_CornerDetector_u& cd); + + /*! + * Set parameters and pre-allocate memory. Return an upper bound + * on the number of corners detected in one frame + */ + virtual unsigned long Init(int im_width,int im_height, + int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS, + int nr_horizontal_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS, + int nr_vertical_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS, + double absolute_threshold=DB_DEFAULT_ABS_CORNER_THRESHOLD, + double relative_threshold=DB_DEFAULT_REL_CORNER_THRESHOLD); + + /*! + * Detect the corners. + * Observe that the image should be overallocated by at least 256 bytes + * at the end. + * x_coord and y_coord should be pre-allocated arrays of length returned by Init(). + * Specifying image mask will restrict corner output to foreground regions. + * Foreground value can be specified using fgnd. By default any >0 mask value + * is considered to be foreground + * \param img row array pointer + * \param x_coord corner locations + * \param y_coord corner locations + * \param nr_corners actual number of corners computed + * \param msk row array pointer to mask image + * \param fgnd foreground value in the mask + */ + virtual void DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners, + const unsigned char * const * msk=NULL, unsigned char fgnd=255) const; + + /*! + Set absolute feature threshold + */ + virtual void SetAbsoluteThreshold(double a_thresh) { m_a_thresh = a_thresh; }; + /*! + Set relative feature threshold + */ + virtual void SetRelativeThreshold(double r_thresh) { m_r_thresh = r_thresh; }; + + /*! + Extract corners from a pre-computed strength image. + \param strength Harris strength image + \param x_coord corner locations + \param y_coord corner locations + \param nr_corners actual number of corners computed + */ + virtual void ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners); +protected: + virtual void Clean(); + /*The absolute threshold to this function should be 16.0 times + normal*/ + unsigned long Start(int im_width,int im_height, + int block_width,int block_height,unsigned long area_factor, + double absolute_threshold,double relative_threshold); + + int m_w,m_h,m_bw,m_bh; + /*Area factor holds the maximum number of corners to detect + per 10000 pixels*/ + unsigned long m_area_factor,m_max_nr; + double m_a_thresh,m_r_thresh; + int *m_temp_i; + double *m_temp_d; + float **m_strength,*m_strength_mem; +}; + +#endif /*DB_FEATURE_DETECTION_H*/ diff --git a/jni/feature_stab/db_vlvm/db_feature_matching.cpp b/jni/feature_stab/db_vlvm/db_feature_matching.cpp new file mode 100644 index 000000000..d278d0cf6 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_feature_matching.cpp @@ -0,0 +1,3410 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*$Id: db_feature_matching.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/ + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +#include "db_utilities.h" +#include "db_feature_matching.h" +#ifdef _VERBOSE_ +#include <iostream> +#endif + + +int AffineWarpPoint_NN_LUT_x[11][11]; +int AffineWarpPoint_NN_LUT_y[11][11]; + +float AffineWarpPoint_BL_LUT_x[11][11]; +float AffineWarpPoint_BL_LUT_y[11][11]; + + +inline float db_SignedSquareNormCorr7x7_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g) +{ + unsigned char *pf,*pg; + float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-3; + xm_g=x_g-3; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=49.0f*fgsum-fsum*gsum; + den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline float db_SignedSquareNormCorr9x9_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g) +{ + unsigned char *pf,*pg; + float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-4; + xm_g=x_g-4; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=81.0f*fgsum-fsum*gsum; + den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline float db_SignedSquareNormCorr11x11_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g) +{ + unsigned char *pf,*pg; + float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-5; + xm_g=x_g-5; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=121.0f*fgsum-fsum*gsum; + den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline void db_SignedSquareNormCorr11x11_Pre_u(unsigned char **f_img,int x_f,int y_f,float *sum,float *recip) +{ + unsigned char *pf; + float den; + int f,f2sum,fsum; + int xm_f; + + xm_f=x_f-5; + + pf=f_img[y_f-5]+xm_f; + f= *pf++; f2sum=f*f; fsum=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+5]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + *sum= (float) fsum; + den=(121.0f*f2sum-fsum*fsum); + *recip=(float)(((den!=0.0)?1.0/den:0.0)); +} + +inline void db_SignedSquareNormCorr5x5_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip) +{ + float den; + int f2sum,fsum; + int xm_f=x_f-2; + +#ifndef DB_USE_SSE2 + const unsigned char *pf; + short f; + + pf=f_img[y_f-2]+xm_f; + f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + //int xwi; + //int ywi; + //f2sum=0; + //fsum=0; + //for (int r=-5;r<=5;r++){ + // ywi=y_f+r; + // for (int c=-5;c<=5;c++){ + // xwi=x_f+c; + // f=f_img[ywi][xwi]; + // f2sum+=f*f; + // fsum+=f; + // (*patch++)=f; + // } + //} + (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; + (*patch++)=0; (*patch++)=0; +#endif /* DB_USE_SSE2 */ + + *sum= (float) fsum; + den=(25.0f*f2sum-fsum*fsum); + *recip= (float)((den!=0.0)?1.0/den:0.0); +} + +inline void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip) +{ + float den; + int f2sum,fsum; + int xm_f=x_f-10; + short f; + + int xwi; + int ywi; + f2sum=0; + fsum=0; + for (int r=-10;r<=10;r++){ + ywi=y_f+r; + for (int c=-10;c<=10;c++){ + xwi=x_f+c; + f=f_img[ywi][xwi]; + f2sum+=f*f; + fsum+=f; + (*patch++)=f; + } + } + + for(int i=442; i<512; i++) + (*patch++)=0; + + *sum= (float) fsum; + den=(441.0f*f2sum-fsum*fsum); + *recip= (float)((den!=0.0)?1.0/den:0.0); + + +} + +/* Lay out the image in the patch, computing norm and +*/ +inline void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip) +{ + float den; + int f2sum,fsum; + int xm_f=x_f-5; + +#ifndef DB_USE_SSE2 + const unsigned char *pf; + short f; + + pf=f_img[y_f-5]+xm_f; + f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+5]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + //int xwi; + //int ywi; + //f2sum=0; + //fsum=0; + //for (int r=-5;r<=5;r++){ + // ywi=y_f+r; + // for (int c=-5;c<=5;c++){ + // xwi=x_f+c; + // f=f_img[ywi][xwi]; + // f2sum+=f*f; + // fsum+=f; + // (*patch++)=f; + // } + //} + + (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; + (*patch++)=0; (*patch++)=0; +#else + const unsigned char *pf0 =f_img[y_f-5]+xm_f; + const unsigned char *pf1 =f_img[y_f-4]+xm_f; + const unsigned char *pf2 =f_img[y_f-3]+xm_f; + const unsigned char *pf3 =f_img[y_f-2]+xm_f; + const unsigned char *pf4 =f_img[y_f-1]+xm_f; + const unsigned char *pf5 =f_img[y_f ]+xm_f; + const unsigned char *pf6 =f_img[y_f+1]+xm_f; + const unsigned char *pf7 =f_img[y_f+2]+xm_f; + const unsigned char *pf8 =f_img[y_f+3]+xm_f; + const unsigned char *pf9 =f_img[y_f+4]+xm_f; + const unsigned char *pf10=f_img[y_f+5]+xm_f; + + /* pixel mask */ + const unsigned char pm[16] = { + 0xFF,0xFF, + 0xFF,0xFF, + 0xFF,0xFF, + 0,0,0,0,0, + 0,0,0,0,0}; + const unsigned char * pm_p = pm; + + _asm + { + mov ecx,patch /* load patch pointer */ + mov ebx, pm_p /* load pixel mask pointer */ + movdqu xmm1,[ebx] /* load pixel mask */ + + pxor xmm5,xmm5 /* set xmm5 to 0 accumulator for sum squares */ + pxor xmm4,xmm4 /* set xmm4 to 0 accumulator for sum */ + pxor xmm0,xmm0 /* set xmm0 to 0 */ + + /* row 0 */ + mov eax,pf0 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqa [ecx+0*22],xmm7 /* move short values to patch */ + movdqa [ecx+0*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 1 */ + mov eax,pf1 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+1*22],xmm7 /* move short values to patch */ + movdqu [ecx+1*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 2 */ + mov eax,pf2 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+2*22],xmm7 /* move short values to patch */ + movdqu [ecx+2*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 3 */ + mov eax,pf3 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+3*22],xmm7 /* move short values to patch */ + movdqu [ecx+3*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 4 */ + mov eax,pf4 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+4*22],xmm7 /* move short values to patch */ + movdqu [ecx+4*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 5 */ + mov eax,pf5 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+5*22],xmm7 /* move short values to patch */ + movdqu [ecx+5*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 6 */ + mov eax,pf6 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+6*22],xmm7 /* move short values to patch */ + movdqu [ecx+6*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 7 */ + mov eax,pf7 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+7*22],xmm7 /* move short values to patch */ + movdqu [ecx+7*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 8 */ + mov eax,pf8 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqa [ecx+8*22],xmm7 /* move short values to patch */ + movdqa [ecx+8*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 9 */ + mov eax,pf9 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+9*22],xmm7 /* move short values to patch */ + movdqu [ecx+9*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit uints into 16 bit uints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* row 10 */ + mov eax,pf10 /* load image pointer */ + movdqu xmm7,[eax] /* load 16 pixels */ + movdqa xmm6,xmm7 + + punpcklbw xmm7,xmm0 /* unpack low pixels (first 8)*/ + punpckhbw xmm6,xmm0 /* unpack high pixels (last 8)*/ + + pand xmm6,xmm1 /* mask out pixels 12-16 */ + + movdqu [ecx+10*22],xmm7 /* move short values to patch */ + movdqu [ecx+10*22+16],xmm6 /* move short values to patch */ + + paddusw xmm4,xmm7 /* accumulate sums */ + pmaddwd xmm7,xmm7 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm7 /* accumulate sum squares */ + + paddw xmm4,xmm6 /* accumulate sums */ + pmaddwd xmm6,xmm6 /* multiply 16 bit ints and add into 32 bit ints */ + paddd xmm5,xmm6 /* accumulate sum squares */ + + /* add up the sum squares */ + movhlps xmm0,xmm5 /* high half to low half */ + paddd xmm5,xmm0 /* add high to low */ + pshuflw xmm0,xmm5, 0xE /* reshuffle */ + paddd xmm5,xmm0 /* add remaining */ + movd f2sum,xmm5 + + /* add up the sum */ + movhlps xmm0,xmm4 + paddw xmm4,xmm0 /* halves added */ + pshuflw xmm0,xmm4,0xE + paddw xmm4,xmm0 /* quarters added */ + pshuflw xmm0,xmm4,0x1 + paddw xmm4,xmm0 /* eighth added */ + movd fsum, xmm4 + + emms + } + + fsum = fsum & 0xFFFF; + + patch[126] = 0; + patch[127] = 0; +#endif /* DB_USE_SSE2 */ + + *sum= (float) fsum; + den=(121.0f*f2sum-fsum*fsum); + *recip= (float)((den!=0.0)?1.0/den:0.0); +} + +void AffineWarpPointOffset(float &r_w,float &c_w,double Hinv[9],int r,int c) +{ + r_w=(float)(Hinv[3]*c+Hinv[4]*r); + c_w=(float)(Hinv[0]*c+Hinv[1]*r); +} + + + +/*! +Prewarp the patches with given affine transform. For a given homogeneous point "x", "H*x" is +the warped point and for any displacement "d" in the warped image resulting in point "y", the +corresponding point in the original image is given by "Hinv*y", which can be simplified for affine H. +If "affine" is 1, then nearest neighbor method is used, else if it is 2, then +bilinear method is used. + */ +inline void db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(short *patch,const unsigned char * const *f_img, + int xi,int yi,float *sum,float *recip, + const double Hinv[9],int affine) +{ + float den; + short f; + int f2sum,fsum; + + f2sum=0; + fsum=0; + + if (affine==1) + { + for (int r=0;r<11;r++){ + for (int c=0;c<11;c++){ + f=f_img[yi+AffineWarpPoint_NN_LUT_y[r][c]][xi+AffineWarpPoint_NN_LUT_x[r][c]]; + f2sum+=f*f; + fsum+=f; + (*patch++)=f; + } + } + } + else if (affine==2) + { + for (int r=0;r<11;r++){ + for (int c=0;c<11;c++){ + f=db_BilinearInterpolation(yi+AffineWarpPoint_BL_LUT_y[r][c] + ,xi+AffineWarpPoint_BL_LUT_x[r][c],f_img); + f2sum+=f*f; + fsum+=f; + (*patch++)=f; + } + } + } + + + + (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; + (*patch++)=0; (*patch++)=0; + + *sum= (float) fsum; + den=(121.0f*f2sum-fsum*fsum); + *recip= (float)((den!=0.0)?1.0/den:0.0); +} + + +inline float db_SignedSquareNormCorr11x11_Post_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g, + float fsum_gsum,float f_recip_g_recip) +{ + unsigned char *pf,*pg; + int fgsum; + float fg_corr; + int xm_f,xm_g; + + xm_f=x_f-5; + xm_g=x_g-5; + + pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g; + fgsum=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + fg_corr=121.0f*fgsum-fsum_gsum; + if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip); + return(-fg_corr*fg_corr*f_recip_g_recip); +} + +float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip) +{ + float fgsum,fg_corr; + + fgsum= (float) db_ScalarProduct512_s(f_patch,g_patch); + + fg_corr=441.0f*fgsum-fsum_gsum; + if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip); + return(-fg_corr*fg_corr*f_recip_g_recip); +} + + +float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip) +{ + float fgsum,fg_corr; + + fgsum= (float) db_ScalarProduct128_s(f_patch,g_patch); + + fg_corr=121.0f*fgsum-fsum_gsum; + if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip); + return(-fg_corr*fg_corr*f_recip_g_recip); +} + +float db_SignedSquareNormCorr5x5Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip) +{ + float fgsum,fg_corr; + + fgsum= (float) db_ScalarProduct32_s(f_patch,g_patch); + + fg_corr=25.0f*fgsum-fsum_gsum; + if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip); + return(-fg_corr*fg_corr*f_recip_g_recip); +} + + +inline float db_SignedSquareNormCorr15x15_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g) +{ + unsigned char *pf,*pg; + float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-7; + xm_g=x_g-7; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=225.0f*fgsum-fsum*gsum; + den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline float db_SignedSquareNormCorr7x7_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g) +{ + float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-3; + xm_g=x_g-3; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=49.0f*fgsum-fsum*gsum; + den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline float db_SignedSquareNormCorr9x9_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g) +{ + float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-4; + xm_g=x_g-4; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=81.0f*fgsum-fsum*gsum; + den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline float db_SignedSquareNormCorr11x11_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g) +{ + float *pf,*pg; + float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-5; + xm_g=x_g-5; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=121.0f*fgsum-fsum*gsum; + den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +inline void db_SignedSquareNormCorr11x11_Pre_f(float **f_img,int x_f,int y_f,float *sum,float *recip) +{ + float *pf,den; + float f,f2sum,fsum; + int xm_f; + + xm_f=x_f-5; + + pf=f_img[y_f-5]+xm_f; + f= *pf++; f2sum=f*f; fsum=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f-1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + pf=f_img[y_f+5]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf++; f2sum+=f*f; fsum+=f; + f= *pf; f2sum+=f*f; fsum+=f; + + *sum=fsum; + den=(121.0f*f2sum-fsum*fsum); + *recip= (float) ((den!=0.0)?1.0/den:0.0); +} + +inline void db_SignedSquareNormCorr11x11_PreAlign_f(float *patch,const float * const *f_img,int x_f,int y_f,float *sum,float *recip) +{ + const float *pf; + float den,f,f2sum,fsum; + int xm_f; + + xm_f=x_f-5; + + pf=f_img[y_f-5]+xm_f; + f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f-1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+1]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+2]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+3]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+4]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + pf=f_img[y_f+5]+xm_f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f; + f= *pf; f2sum+=f*f; fsum+=f; (*patch++)=f; + + (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; + (*patch++)=0.0; (*patch++)=0.0; + + *sum=fsum; + den=(121.0f*f2sum-fsum*fsum); + *recip= (float) ((den!=0.0)?1.0/den:0.0); +} + +inline float db_SignedSquareNormCorr11x11_Post_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g, + float fsum_gsum,float f_recip_g_recip) +{ + float *pf,*pg; + float fgsum,fg_corr; + int xm_f,xm_g; + + xm_f=x_f-5; + xm_g=x_g-5; + + pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g; + fgsum=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g; + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); + fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg); + + fg_corr=121.0f*fgsum-fsum_gsum; + if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip); + return(-fg_corr*fg_corr*f_recip_g_recip); +} + +inline float db_SignedSquareNormCorr11x11Aligned_Post_f(const float *f_patch,const float *g_patch,float fsum_gsum,float f_recip_g_recip) +{ + float fgsum,fg_corr; + + fgsum=db_ScalarProduct128Aligned16_f(f_patch,g_patch); + + fg_corr=121.0f*fgsum-fsum_gsum; + if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip); + return(-fg_corr*fg_corr*f_recip_g_recip); +} + +inline float db_SignedSquareNormCorr15x15_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g) +{ + float *pf,*pg; + float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den; + int xm_f,xm_g; + + xm_f=x_f-7; + xm_g=x_g-7; + fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0; + + pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + f= *pf; g= *pg; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g; + + fg_corr=225.0f*fgsum-fsum*gsum; + den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum); + if(den!=0.0) + { + if(fg_corr>=0.0) return(fg_corr*fg_corr/den); + return(-fg_corr*fg_corr/den); + } + return(0.0); +} + +db_Bucket_f** db_AllocBuckets_f(int nr_h,int nr_v,int bd) +{ + int i,j; + db_Bucket_f **bp,*b; + + b=new db_Bucket_f [(nr_h+2)*(nr_v+2)]; + bp=new db_Bucket_f* [(nr_v+2)]; + bp=bp+1; + for(i= -1;i<=nr_v;i++) + { + bp[i]=b+1+(nr_h+2)*(i+1); + for(j= -1;j<=nr_h;j++) + { + bp[i][j].ptr=new db_PointInfo_f [bd]; + } + } + + return(bp); +} + +db_Bucket_u** db_AllocBuckets_u(int nr_h,int nr_v,int bd) +{ + int i,j; + db_Bucket_u **bp,*b; + + b=new db_Bucket_u [(nr_h+2)*(nr_v+2)]; + bp=new db_Bucket_u* [(nr_v+2)]; + bp=bp+1; + for(i= -1;i<=nr_v;i++) + { + bp[i]=b+1+(nr_h+2)*(i+1); + for(j= -1;j<=nr_h;j++) + { + bp[i][j].ptr=new db_PointInfo_u [bd]; + } + } + + return(bp); +} + +void db_FreeBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v) +{ + int i,j; + + for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) + { + delete [] bp[i][j].ptr; + } + delete [] (bp[-1]-1); + delete [] (bp-1); +} + +void db_FreeBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v) +{ + int i,j; + + for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) + { + delete [] bp[i][j].ptr; + } + delete [] (bp[-1]-1); + delete [] (bp-1); +} + +void db_EmptyBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v) +{ + int i,j; + for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0; +} + +void db_EmptyBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v) +{ + int i,j; + for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0; +} + +float* db_FillBuckets_f(float *patch_space,const float * const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners) +{ + int i,xi,yi,xpos,ypos,nr; + db_Bucket_f *br; + db_PointInfo_f *pir; + + db_EmptyBuckets_f(bp,nr_h,nr_v); + for(i=0;i<nr_corners;i++) + { + xi=(int) x[i]; + yi=(int) y[i]; + xpos=xi/bw; + ypos=yi/bh; + if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v) + { + br=&bp[ypos][xpos]; + nr=br->nr; + if(nr<bd) + { + pir=&(br->ptr[nr]); + pir->x=xi; + pir->y=yi; + pir->id=i; + pir->pir=0; + pir->patch=patch_space; + br->nr=nr+1; + + db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip)); + patch_space+=128; + } + } + } + return(patch_space); +} + +short* db_FillBuckets_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,int use_smaller_matching_window, int use_21) +{ + int i,xi,yi,xpos,ypos,nr; + db_Bucket_u *br; + db_PointInfo_u *pir; + + db_EmptyBuckets_u(bp,nr_h,nr_v); + for(i=0;i<nr_corners;i++) + { + xi=(int)db_roundi(x[i]); + yi=(int)db_roundi(y[i]); + xpos=xi/bw; + ypos=yi/bh; + if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v) + { + br=&bp[ypos][xpos]; + nr=br->nr; + if(nr<bd) + { + pir=&(br->ptr[nr]); + pir->x=xi; + pir->y=yi; + pir->id=i; + pir->pir=0; + pir->patch=patch_space; + br->nr=nr+1; + + if(use_21) + { + db_SignedSquareNormCorr21x21_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip)); + patch_space+=512; + } + else + { + if(!use_smaller_matching_window) + { + db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip)); + patch_space+=128; + } + else + { + db_SignedSquareNormCorr5x5_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip)); + patch_space+=32; + } + } + } + } + } + return(patch_space); +} + + + +float* db_FillBucketsPrewarped_f(float *patch_space,const float *const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,const double H[9]) +{ + int i,xi,yi,xpos,ypos,nr,wxi,wyi; + db_Bucket_f *br; + db_PointInfo_f *pir; + double xd[2],wx[2]; + + db_EmptyBuckets_f(bp,nr_h,nr_v); + for(i=0;i<nr_corners;i++) + { + xd[0]=x[i]; + xd[1]=y[i]; + xi=(int) xd[0]; + yi=(int) xd[1]; + db_ImageHomographyInhomogenous(wx,H,xd); + wxi=(int) wx[0]; + wyi=(int) wx[1]; + + xpos=((wxi+bw)/bw)-1; + ypos=((wyi+bh)/bh)-1; + if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v) + { + br=&bp[ypos][xpos]; + nr=br->nr; + if(nr<bd) + { + pir=&(br->ptr[nr]); + pir->x=wxi; + pir->y=wyi; + pir->id=i; + pir->pir=0; + pir->patch=patch_space; + br->nr=nr+1; + + db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip)); + patch_space+=128; + } + } + } + return(patch_space); +} + +short* db_FillBucketsPrewarped_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp, + int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y, + int nr_corners,const double H[9]) +{ + int i,xi,yi,xpos,ypos,nr,wxi,wyi; + db_Bucket_u *br; + db_PointInfo_u *pir; + double xd[2],wx[2]; + + db_EmptyBuckets_u(bp,nr_h,nr_v); + for(i=0;i<nr_corners;i++) + { + xd[0]=x[i]; + xd[1]=y[i]; + xi=(int) db_roundi(xd[0]); + yi=(int) db_roundi(xd[1]); + db_ImageHomographyInhomogenous(wx,H,xd); + wxi=(int) wx[0]; + wyi=(int) wx[1]; + + xpos=((wxi+bw)/bw)-1; + ypos=((wyi+bh)/bh)-1; + if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v) + { + br=&bp[ypos][xpos]; + nr=br->nr; + if(nr<bd) + { + pir=&(br->ptr[nr]); + pir->x=wxi; + pir->y=wyi; + pir->id=i; + pir->pir=0; + pir->patch=patch_space; + br->nr=nr+1; + + db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip)); + patch_space+=128; + } + } + } + return(patch_space); +} + + + +short* db_FillBucketsPrewarpedAffine_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp, + int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y, + int nr_corners,const double H[9],const double Hinv[9],const int warpboundsp[4], + int affine) +{ + int i,xi,yi,xpos,ypos,nr,wxi,wyi; + db_Bucket_u *br; + db_PointInfo_u *pir; + double xd[2],wx[2]; + + db_EmptyBuckets_u(bp,nr_h,nr_v); + for(i=0;i<nr_corners;i++) + { + xd[0]=x[i]; + xd[1]=y[i]; + xi=(int) db_roundi(xd[0]); + yi=(int) db_roundi(xd[1]); + db_ImageHomographyInhomogenous(wx,H,xd); + wxi=(int) wx[0]; + wyi=(int) wx[1]; + + xpos=((wxi+bw)/bw)-1; + ypos=((wyi+bh)/bh)-1; + + + if (xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v) + { + if( xi>warpboundsp[0] && xi<warpboundsp[1] && yi>warpboundsp[2] && yi<warpboundsp[3]) + { + + br=&bp[ypos][xpos]; + nr=br->nr; + if(nr<bd) + { + pir=&(br->ptr[nr]); + pir->x=wxi; + pir->y=wyi; + pir->id=i; + pir->pir=0; + pir->patch=patch_space; + br->nr=nr+1; + + db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip),Hinv,affine); + patch_space+=128; + } + } + } + } + return(patch_space); +} + + + +inline void db_MatchPointPair_f(db_PointInfo_f *pir_l,db_PointInfo_f *pir_r, + unsigned long kA,unsigned long kB) +{ + int x_l,y_l,x_r,y_r,xm,ym; + double score; + + x_l=pir_l->x; + y_l=pir_l->y; + x_r=pir_r->x; + y_r=pir_r->y; + xm=x_l-x_r; + ym=y_l-y_r; + /*Check if disparity is within the maximum disparity + with the formula xm^2*256+ym^2*kA<kB + where kA=256*w^2/h^2 + and kB=256*max_disp^2*w^2*/ + if(((xm*xm)<<8)+ym*ym*kA<kB) + { + /*Correlate*/ + score=db_SignedSquareNormCorr11x11Aligned_Post_f(pir_l->patch,pir_r->patch, + (pir_l->sum)*(pir_r->sum), + (pir_l->recip)*(pir_r->recip)); + + if((!(pir_l->pir)) || (score>pir_l->s)) + { + /*Update left corner*/ + pir_l->s=score; + pir_l->pir=pir_r; + } + if((!(pir_r->pir)) || (score>pir_r->s)) + { + /*Update right corner*/ + pir_r->s=score; + pir_r->pir=pir_l; + } + } +} + +inline void db_MatchPointPair_u(db_PointInfo_u *pir_l,db_PointInfo_u *pir_r, + unsigned long kA,unsigned long kB, unsigned int rect_window,bool use_smaller_matching_window, int use_21) +{ + int xm,ym; + double score; + bool compute_score; + + + if( rect_window ) + compute_score = ((unsigned)db_absi(pir_l->x - pir_r->x)<kA && (unsigned)db_absi(pir_l->y - pir_r->y)<kB); + else + { /*Check if disparity is within the maximum disparity + with the formula xm^2*256+ym^2*kA<kB + where kA=256*w^2/h^2 + and kB=256*max_disp^2*w^2*/ + xm= pir_l->x - pir_r->x; + ym= pir_l->y - pir_r->y; + compute_score = ((xm*xm)<<8)+ym*ym*kA < kB; + } + + if ( compute_score ) + { + if(use_21) + { + score=db_SignedSquareNormCorr21x21Aligned_Post_s(pir_l->patch,pir_r->patch, + (pir_l->sum)*(pir_r->sum), + (pir_l->recip)*(pir_r->recip)); + } + else + { + /*Correlate*/ + if(!use_smaller_matching_window) + { + score=db_SignedSquareNormCorr11x11Aligned_Post_s(pir_l->patch,pir_r->patch, + (pir_l->sum)*(pir_r->sum), + (pir_l->recip)*(pir_r->recip)); + } + else + { + score=db_SignedSquareNormCorr5x5Aligned_Post_s(pir_l->patch,pir_r->patch, + (pir_l->sum)*(pir_r->sum), + (pir_l->recip)*(pir_r->recip)); + } + } + + if((!(pir_l->pir)) || (score>pir_l->s)) + { + /*Update left corner*/ + pir_l->s=score; + pir_l->pir=pir_r; + } + if((!(pir_r->pir)) || (score>pir_r->s)) + { + /*Update right corner*/ + pir_r->s=score; + pir_r->pir=pir_l; + } + } +} + +inline void db_MatchPointAgainstBucket_f(db_PointInfo_f *pir_l,db_Bucket_f *b_r, + unsigned long kA,unsigned long kB) +{ + int p_r,nr; + db_PointInfo_f *pir_r; + + nr=b_r->nr; + pir_r=b_r->ptr; + for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_f(pir_l,pir_r+p_r,kA,kB); +} + +inline void db_MatchPointAgainstBucket_u(db_PointInfo_u *pir_l,db_Bucket_u *b_r, + unsigned long kA,unsigned long kB,int rect_window, bool use_smaller_matching_window, int use_21) +{ + int p_r,nr; + db_PointInfo_u *pir_r; + + nr=b_r->nr; + pir_r=b_r->ptr; + + for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_u(pir_l,pir_r+p_r,kA,kB, rect_window, use_smaller_matching_window, use_21); + +} + +void db_MatchBuckets_f(db_Bucket_f **bp_l,db_Bucket_f **bp_r,int nr_h,int nr_v, + unsigned long kA,unsigned long kB) +{ + int i,j,k,a,b,br_nr; + db_Bucket_f *br; + db_PointInfo_f *pir_l; + + /*For all buckets*/ + for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++) + { + br=&bp_l[i][j]; + br_nr=br->nr; + /*For all points in bucket*/ + for(k=0;k<br_nr;k++) + { + pir_l=br->ptr+k; + for(a=i-1;a<=i+1;a++) + { + for(b=j-1;b<=j+1;b++) + { + db_MatchPointAgainstBucket_f(pir_l,&bp_r[a][b],kA,kB); + } + } + } + } +} + +void db_MatchBuckets_u(db_Bucket_u **bp_l,db_Bucket_u **bp_r,int nr_h,int nr_v, + unsigned long kA,unsigned long kB,int rect_window,bool use_smaller_matching_window, int use_21) +{ + int i,j,k,a,b,br_nr; + db_Bucket_u *br; + db_PointInfo_u *pir_l; + + /*For all buckets*/ + for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++) + { + br=&bp_l[i][j]; + br_nr=br->nr; + /*For all points in bucket*/ + for(k=0;k<br_nr;k++) + { + pir_l=br->ptr+k; + for(a=i-1;a<=i+1;a++) + { + for(b=j-1;b<=j+1;b++) + { + db_MatchPointAgainstBucket_u(pir_l,&bp_r[a][b],kA,kB,rect_window,use_smaller_matching_window, use_21); + } + } + } + } +} + +void db_CollectMatches_f(db_Bucket_f **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches) +{ + int i,j,k,br_nr; + unsigned long count; + db_Bucket_f *br; + db_PointInfo_f *pir,*pir2; + + count=0; + /*For all buckets*/ + for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++) + { + br=&bp_l[i][j]; + br_nr=br->nr; + /*For all points in bucket*/ + for(k=0;k<br_nr;k++) + { + pir=br->ptr+k; + pir2=pir->pir; + if(pir2) + { + /*This point has a best match*/ + if((pir2->pir)==pir) + { + /*We have a mutually consistent match*/ + if(count<target) + { + id_l[count]=pir->id; + id_r[count]=pir2->id; + count++; + } + } + } + } + } + *nr_matches=count; +} + +void db_CollectMatches_u(db_Bucket_u **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches) +{ + int i,j,k,br_nr; + unsigned long count; + db_Bucket_u *br; + db_PointInfo_u *pir,*pir2; + + count=0; + /*For all buckets*/ + for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++) + { + br=&bp_l[i][j]; + br_nr=br->nr; + /*For all points in bucket*/ + for(k=0;k<br_nr;k++) + { + pir=br->ptr+k; + pir2=pir->pir; + if(pir2) + { + /*This point has a best match*/ + if((pir2->pir)==pir) + { + /*We have a mutually consistent match*/ + if(count<target) + { + id_l[count]=pir->id; + id_r[count]=pir2->id; + count++; + } + } + } + } + } + *nr_matches=count; +} + +db_Matcher_f::db_Matcher_f() +{ + m_w=0; m_h=0; +} + +db_Matcher_f::~db_Matcher_f() +{ + Clean(); +} + +void db_Matcher_f::Clean() +{ + if(m_w) + { + /*Free buckets*/ + db_FreeBuckets_f(m_bp_l,m_nr_h,m_nr_v); + db_FreeBuckets_f(m_bp_r,m_nr_h,m_nr_v); + /*Free space for patch layouts*/ + delete [] m_patch_space; + } + m_w=0; m_h=0; +} + +unsigned long db_Matcher_f::Init(int im_width,int im_height,double max_disparity,int target_nr_corners) +{ + Clean(); + m_w=im_width; + m_h=im_height; + m_bw=db_maxi(1,(int) (max_disparity*((double)im_width))); + m_bh=db_maxi(1,(int) (max_disparity*((double)im_height))); + m_nr_h=1+(im_width-1)/m_bw; + m_nr_v=1+(im_height-1)/m_bh; + m_bd=db_maxi(1,(int)(((double)target_nr_corners)* + max_disparity*max_disparity)); + m_target=target_nr_corners; + m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h))); + m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w))); + + /*Alloc bucket structure*/ + m_bp_l=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd); + m_bp_r=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd); + + /*Alloc 16byte-aligned space for patch layouts*/ + m_patch_space=new float [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16]; + m_aligned_patch_space=db_AlignPointer_f(m_patch_space,16); + + return(m_target); +} + +void db_Matcher_f::Match(const float * const *l_img,const float * const *r_img, + const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r, + int *id_l,int *id_r,int *nr_matches,const double H[9]) +{ + float *ps; + + /*Insert the corners into bucket structure*/ + ps=db_FillBuckets_f(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l); + if(H==0) db_FillBuckets_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r); + else db_FillBucketsPrewarped_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H); + + /*Compute all the necessary match scores*/ + db_MatchBuckets_f(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB); + + /*Collect the correspondences*/ + db_CollectMatches_f(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches); +} + +db_Matcher_u::db_Matcher_u() +{ + m_w=0; m_h=0; + m_rect_window = 0; + m_bw=m_bh=m_nr_h=m_nr_v=m_bd=m_target=0; + m_bp_l=m_bp_r=0; + m_patch_space=m_aligned_patch_space=0; +} + +db_Matcher_u::db_Matcher_u(const db_Matcher_u& cm) +{ + Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v); +} + +db_Matcher_u& db_Matcher_u::operator= (const db_Matcher_u& cm) +{ + if ( this == &cm ) return *this; + Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v); + return *this; +} + + +db_Matcher_u::~db_Matcher_u() +{ + Clean(); +} + +void db_Matcher_u::Clean() +{ + if(m_w) + { + /*Free buckets*/ + db_FreeBuckets_u(m_bp_l,m_nr_h,m_nr_v); + db_FreeBuckets_u(m_bp_r,m_nr_h,m_nr_v); + /*Free space for patch layouts*/ + delete [] m_patch_space; + } + m_w=0; m_h=0; +} + + +unsigned long db_Matcher_u::Init(int im_width,int im_height,double max_disparity,int target_nr_corners, + double max_disparity_v, bool use_smaller_matching_window, int use_21) +{ + Clean(); + m_w=im_width; + m_h=im_height; + m_max_disparity=max_disparity; + m_max_disparity_v=max_disparity_v; + + if ( max_disparity_v != DB_DEFAULT_NO_DISPARITY ) + { + m_rect_window = 1; + + m_bw=db_maxi(1,(int)(max_disparity*((double)im_width))); + m_bh=db_maxi(1,(int)(max_disparity_v*((double)im_height))); + + m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity_v)); + + m_kA=(int)(max_disparity*m_w); + m_kB=(int)(max_disparity_v*m_h); + + } else + { + m_bw=(int)db_maxi(1,(int)(max_disparity*((double)im_width))); + m_bh=(int)db_maxi(1,(int)(max_disparity*((double)im_height))); + + m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity)); + + m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h))); + m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w))); + } + + m_nr_h=1+(im_width-1)/m_bw; + m_nr_v=1+(im_height-1)/m_bh; + + m_target=target_nr_corners; + + /*Alloc bucket structure*/ + m_bp_l=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd); + m_bp_r=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd); + + m_use_smaller_matching_window = use_smaller_matching_window; + m_use_21 = use_21; + + if(m_use_21) + { + /*Alloc 64byte-aligned space for patch layouts*/ + m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*512+64]; + m_aligned_patch_space=db_AlignPointer_s(m_patch_space,64); + } + else + { + if(!m_use_smaller_matching_window) + { + /*Alloc 16byte-aligned space for patch layouts*/ + m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16]; + m_aligned_patch_space=db_AlignPointer_s(m_patch_space,16); + } + else + { + /*Alloc 4byte-aligned space for patch layouts*/ + m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*32+4]; + m_aligned_patch_space=db_AlignPointer_s(m_patch_space,4); + } + } + + return(m_target); +} + +void db_Matcher_u::Match(const unsigned char * const *l_img,const unsigned char * const *r_img, + const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r, + int *id_l,int *id_r,int *nr_matches,const double H[9],int affine) +{ + short *ps; + + /*Insert the corners into bucket structure*/ + ps=db_FillBuckets_u(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l,m_use_smaller_matching_window,m_use_21); + if(H==0) + db_FillBuckets_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,m_use_smaller_matching_window,m_use_21); + else + { + if (affine) + { + double Hinv[9]; + db_InvertAffineTransform(Hinv,H); + float r_w, c_w; + float stretch_x[2]; + float stretch_y[2]; + AffineWarpPointOffset(r_w,c_w,Hinv, 5,5); + stretch_x[0]=db_absf(c_w);stretch_y[0]=db_absf(r_w); + AffineWarpPointOffset(r_w,c_w,Hinv, 5,-5); + stretch_x[1]=db_absf(c_w);stretch_y[1]=db_absf(r_w); + int max_stretxh_x=(int) (db_maxd(stretch_x[0],stretch_x[1])); + int max_stretxh_y=(int) (db_maxd(stretch_y[0],stretch_y[1])); + int warpbounds[4]={max_stretxh_x,m_w-1-max_stretxh_x,max_stretxh_y,m_h-1-max_stretxh_y}; + + for (int r=-5;r<=5;r++){ + for (int c=-5;c<=5;c++){ + AffineWarpPointOffset(r_w,c_w,Hinv,r,c); + AffineWarpPoint_BL_LUT_y[r+5][c+5]=r_w; + AffineWarpPoint_BL_LUT_x[r+5][c+5]=c_w; + + AffineWarpPoint_NN_LUT_y[r+5][c+5]=db_roundi(r_w); + AffineWarpPoint_NN_LUT_x[r+5][c+5]=db_roundi(c_w); + + } + } + + db_FillBucketsPrewarpedAffine_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd, + x_r,y_r,nr_r,H,Hinv,warpbounds,affine); + } + else + db_FillBucketsPrewarped_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H); + } + + + /*Compute all the necessary match scores*/ + db_MatchBuckets_u(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB, m_rect_window,m_use_smaller_matching_window,m_use_21); + + /*Collect the correspondences*/ + db_CollectMatches_u(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches); +} + +int db_Matcher_u::IsAllocated() +{ + return (int)(m_w != 0); +} diff --git a/jni/feature_stab/db_vlvm/db_feature_matching.h b/jni/feature_stab/db_vlvm/db_feature_matching.h new file mode 100644 index 000000000..6c056b9a3 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_feature_matching.h @@ -0,0 +1,260 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*$Id: db_feature_matching.h,v 1.3 2011/06/17 14:03:30 mbansal Exp $*/ + +#ifndef DB_FEATURE_MATCHING_H +#define DB_FEATURE_MATCHING_H + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup FeatureMatching Feature Matching + */ +#include "db_utilities.h" +#include "db_utilities_constants.h" + +DB_API void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip); +DB_API void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip); +float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip); +float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip); + +class db_PointInfo_f +{ +public: + /*Coordinates of point*/ + int x; + int y; + /*Id nr of point*/ + int id; + /*Best match score*/ + double s; + /*Best match candidate*/ + db_PointInfo_f *pir; + /*Precomputed coefficients + of image patch*/ + float sum; + float recip; + /*Pointer to patch layout*/ + const float *patch; +}; + +class db_Bucket_f +{ +public: + db_PointInfo_f *ptr; + int nr; +}; + +class db_PointInfo_u +{ +public: + /*Coordinates of point*/ + int x; + int y; + /*Id nr of point*/ + int id; + /*Best match score*/ + double s; + /*Best match candidate*/ + db_PointInfo_u *pir; + /*Precomputed coefficients + of image patch*/ + float sum; + float recip; + /*Pointer to patch layout*/ + const short *patch; +}; + +class db_Bucket_u +{ +public: + db_PointInfo_u *ptr; + int nr; +}; +/*! + * \class db_Matcher_f + * \ingroup FeatureMatching + * \brief Feature matcher for float images. + * + * Normalized correlation feature matcher for <b>float</b> images. + * Correlation window size is constant and set to 11x11. + * See \ref FeatureDetection to detect Harris corners. + * Images are managed with functions in \ref LMImageBasicUtilities. + */ +class DB_API db_Matcher_f +{ +public: + db_Matcher_f(); + ~db_Matcher_f(); + + /*! + * Set parameters and pre-allocate memory. Return an upper bound + * on the number of matches. + * \param im_width width + * \param im_height height + * \param max_disparity maximum distance (as fraction of image size) between matches + * \param target_nr_corners maximum number of matches + * \return maximum number of matches + */ + unsigned long Init(int im_width,int im_height, + double max_disparity=DB_DEFAULT_MAX_DISPARITY, + int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS); + + /*! + * Match two sets of features. + * If the prewarp H is not NULL it will be applied to the features + * in the right image before matching. + * Parameters id_l and id_r must point to arrays of size target_nr_corners + * (returned by Init()). + * The results of matching are in id_l and id_r. + * Interpretaqtion of results: if id_l[i] = m and id_r[i] = n, + * feature at (x_l[m],y_l[m]) matched to (x_r[n],y_r[n]). + * \param l_img left image + * \param r_img right image + * \param x_l left x coordinates of features + * \param y_l left y coordinates of features + * \param nr_l number of features in left image + * \param x_r right x coordinates of features + * \param y_r right y coordinates of features + * \param nr_r number of features in right image + * \param id_l indices of left features that matched + * \param id_r indices of right features that matched + * \param nr_matches number of features actually matched + * \param H image homography (prewarp) to be applied to right image features + */ + void Match(const float * const *l_img,const float * const *r_img, + const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r, + int *id_l,int *id_r,int *nr_matches,const double H[9]=0); + +protected: + void Clean(); + + int m_w,m_h,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,m_target; + unsigned long m_kA,m_kB; + db_Bucket_f **m_bp_l; + db_Bucket_f **m_bp_r; + float *m_patch_space,*m_aligned_patch_space; +}; +/*! + * \class db_Matcher_u + * \ingroup FeatureMatching + * \brief Feature matcher for byte images. + * + * Normalized correlation feature matcher for <b>byte</b> images. + * Correlation window size is constant and set to 11x11. + * See \ref FeatureDetection to detect Harris corners. + * Images are managed with functions in \ref LMImageBasicUtilities. + * + * If the prewarp matrix H is supplied, the feature coordinates are warped by H before being placed in + * appropriate buckets. If H is an affine transform and the "affine" parameter is set to 1 or 2, + * then the correlation patches themselves are warped before being placed in the patch space. + */ +class DB_API db_Matcher_u +{ +public: + db_Matcher_u(); + + int GetPatchSize(){return 11;}; + + virtual ~db_Matcher_u(); + + /*! + Copy ctor duplicates settings. + Memory not copied. + */ + db_Matcher_u(const db_Matcher_u& cm); + + /*! + Assignment optor duplicates settings + Memory not copied. + */ + db_Matcher_u& operator= (const db_Matcher_u& cm); + + /*! + * Set parameters and pre-allocate memory. Return an upper bound + * on the number of matches. + * If max_disparity_v is DB_DEFAULT_NO_DISPARITY, look for matches + * in a ellipse around a feature of radius max_disparity*im_width by max_disparity*im_height. + * If max_disparity_v is specified, use a rectangle max_disparity*im_width by max_disparity_v*im_height. + * \param im_width width + * \param im_height height + * \param max_disparity maximum distance (as fraction of image size) between matches + * \param target_nr_corners maximum number of matches + * \param max_disparity_v maximum vertical disparity (distance between matches) + * \param use_smaller_matching_window if set to true, uses a correlation window of 5x5 instead of the default 11x11 + * \return maximum number of matches + */ + virtual unsigned long Init(int im_width,int im_height, + double max_disparity=DB_DEFAULT_MAX_DISPARITY, + int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS, + double max_disparity_v=DB_DEFAULT_NO_DISPARITY, + bool use_smaller_matching_window=false, int use_21=0); + + /*! + * Match two sets of features. + * If the prewarp H is not NULL it will be applied to the features + * in the right image before matching. + * Parameters id_l and id_r must point to arrays of size target_nr_corners + * (returned by Init()). + * The results of matching are in id_l and id_r. + * Interpretaqtion of results: if id_l[i] = m and id_r[i] = n, + * feature at (x_l[m],y_l[m]) matched to (x_r[n],y_r[n]). + * \param l_img left image + * \param r_img right image + * \param x_l left x coordinates of features + * \param y_l left y coordinates of features + * \param nr_l number of features in left image + * \param x_r right x coordinates of features + * \param y_r right y coordinates of features + * \param nr_r number of features in right image + * \param id_l indices of left features that matched + * \param id_r indices of right features that matched + * \param nr_matches number of features actually matched + * \param H image homography (prewarp) to be applied to right image features + * \param affine prewarp the 11x11 patches by given affine transform. 0 means no warping, + 1 means nearest neighbor, 2 means bilinear warping. + */ + virtual void Match(const unsigned char * const *l_img,const unsigned char * const *r_img, + const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r, + int *id_l,int *id_r,int *nr_matches,const double H[9]=0,int affine=0); + + /*! + * Checks if Init() was called. + * \return 1 if Init() was called, 0 otherwise. + */ + int IsAllocated(); + +protected: + virtual void Clean(); + + + int m_w,m_h,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,m_target; + unsigned long m_kA,m_kB; + db_Bucket_u **m_bp_l; + db_Bucket_u **m_bp_r; + short *m_patch_space,*m_aligned_patch_space; + + double m_max_disparity, m_max_disparity_v; + int m_rect_window; + bool m_use_smaller_matching_window; + int m_use_21; +}; + + + +#endif /*DB_FEATURE_MATCHING_H*/ diff --git a/jni/feature_stab/db_vlvm/db_framestitching.cpp b/jni/feature_stab/db_vlvm/db_framestitching.cpp new file mode 100644 index 000000000..b574f7a04 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_framestitching.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_framestitching.cpp,v 1.2 2011/06/17 14:03:30 mbansal Exp $ */ + +#include "db_utilities.h" +#include "db_framestitching.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +inline void db_RotationFromMOuterProductSum(double R[9],double *score,double M[9]) +{ + double N[16],q[4],lambda[4],lambda_max; + double y[4]; + int nr_roots; + + N[0]= M[0]+M[4]+M[8]; + N[5]= M[0]-M[4]-M[8]; + N[10]= -M[0]+M[4]-M[8]; + N[15]= -M[0]-M[4]+M[8]; + N[1] =N[4] =M[5]-M[7]; + N[2] =N[8] =M[6]-M[2]; + N[3] =N[12]=M[1]-M[3]; + N[6] =N[9] =M[1]+M[3]; + N[7] =N[13]=M[6]+M[2]; + N[11]=N[14]=M[5]+M[7]; + + /*get the quaternion representing the rotation + by finding the eigenvector corresponding to the most + positive eigenvalue. Force eigenvalue solutions, since the matrix + is symmetric and solutions might otherwise be lost + when the data is planar*/ + db_RealEigenvalues4x4(lambda,&nr_roots,N,1); + if(nr_roots) + { + lambda_max=lambda[0]; + if(nr_roots>=2) + { + if(lambda[1]>lambda_max) lambda_max=lambda[1]; + if(nr_roots>=3) + { + if(lambda[2]>lambda_max) lambda_max=lambda[2]; + { + if(nr_roots>=4) if(lambda[3]>lambda_max) lambda_max=lambda[3]; + } + } + } + } + else lambda_max=1.0; + db_EigenVector4x4(q,lambda_max,N); + + /*Compute the rotation matrix*/ + db_QuaternionToRotation(R,q); + + if(score) + { + /*Compute score=transpose(q)*N*q */ + db_Multiply4x4_4x1(y,N,q); + *score=db_ScalarProduct4(q,y); + } +} + +void db_StitchSimilarity3DRaw(double *scale,double R[9],double t[3], + double **Xp,double **X,int nr_points,int orientation_preserving, + int allow_scaling,int allow_rotation,int allow_translation) +{ + int i; + double c[3],cp[3],r[3],rp[3],M[9],s,sp,sc; + double Rr[9],score_p,score_r; + double *temp,*temp_p; + + if(allow_translation) + { + db_PointCentroid3D(c,X,nr_points); + db_PointCentroid3D(cp,Xp,nr_points); + } + else + { + db_Zero3(c); + db_Zero3(cp); + } + + db_Zero9(M); + s=sp=0; + for(i=0;i<nr_points;i++) + { + temp= *X++; + temp_p= *Xp++; + r[0]=(*temp++)-c[0]; + r[1]=(*temp++)-c[1]; + r[2]=(*temp++)-c[2]; + rp[0]=(*temp_p++)-cp[0]; + rp[1]=(*temp_p++)-cp[1]; + rp[2]=(*temp_p++)-cp[2]; + + M[0]+=r[0]*rp[0]; + M[1]+=r[0]*rp[1]; + M[2]+=r[0]*rp[2]; + M[3]+=r[1]*rp[0]; + M[4]+=r[1]*rp[1]; + M[5]+=r[1]*rp[2]; + M[6]+=r[2]*rp[0]; + M[7]+=r[2]*rp[1]; + M[8]+=r[2]*rp[2]; + + s+=db_sqr(r[0])+db_sqr(r[1])+db_sqr(r[2]); + sp+=db_sqr(rp[0])+db_sqr(rp[1])+db_sqr(rp[2]); + } + + /*Compute scale*/ + if(allow_scaling) sc=sqrt(db_SafeDivision(sp,s)); + else sc=1.0; + *scale=sc; + + /*Compute rotation*/ + if(allow_rotation) + { + if(orientation_preserving) + { + db_RotationFromMOuterProductSum(R,0,M); + } + else + { + /*Try preserving*/ + db_RotationFromMOuterProductSum(R,&score_p,M); + /*Try reversing*/ + M[6]= -M[6]; + M[7]= -M[7]; + M[8]= -M[8]; + db_RotationFromMOuterProductSum(Rr,&score_r,M); + if(score_r>score_p) + { + /*Reverse is better*/ + R[0]=Rr[0]; R[1]=Rr[1]; R[2]= -Rr[2]; + R[3]=Rr[3]; R[4]=Rr[4]; R[5]= -Rr[5]; + R[6]=Rr[6]; R[7]=Rr[7]; R[8]= -Rr[8]; + } + } + } + else db_Identity3x3(R); + + /*Compute translation*/ + if(allow_translation) + { + t[0]=cp[0]-sc*(R[0]*c[0]+R[1]*c[1]+R[2]*c[2]); + t[1]=cp[1]-sc*(R[3]*c[0]+R[4]*c[1]+R[5]*c[2]); + t[2]=cp[2]-sc*(R[6]*c[0]+R[7]*c[1]+R[8]*c[2]); + } + else db_Zero3(t); +} + + diff --git a/jni/feature_stab/db_vlvm/db_framestitching.h b/jni/feature_stab/db_vlvm/db_framestitching.h new file mode 100644 index 000000000..5fef5f37e --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_framestitching.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_framestitching.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_FRAMESTITCHING_H +#define DB_FRAMESTITCHING_H +/*! + * \defgroup FrameStitching Frame Stitching (2D and 3D homography estimation) + */ +/*\{*/ + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMFrameStitching (LM) Frame Stitching (2D and 3D homography estimation) + */ +/*\{*/ + +/*! +Find scale, rotation and translation of the similarity that +takes the nr_points inhomogenous 3D points X to Xp +(left to right according to Horn), i.e. for the homogenous equivalents +Xp and X we would have +\code + Xp~ + [sR t]*X + [0 1] +\endcode +If orientation_preserving is true, R is restricted such that det(R)>0. +allow_scaling, allow_rotation and allow_translation allow s,R and t +to differ from 1,Identity and 0 + +Full similarity takes the following on 550MHz: +\code +4.5 microseconds with 3 points +4.7 microseconds with 4 points +5.0 microseconds with 5 points +5.2 microseconds with 6 points +5.8 microseconds with 10 points +20 microseconds with 100 points +205 microseconds with 1000 points +2.9 milliseconds with 10000 points +50 milliseconds with 100000 points +0.5 seconds with 1000000 points +\endcode +Without orientation_preserving: +\code +4 points is minimal for (s,R,t) (R,t) +3 points is minimal for (s,R) (R) +2 points is minimal for (s,t) +1 point is minimal for (s) (t) +\endcode +With orientation_preserving: +\code +3 points is minimal for (s,R,t) (R,t) +2 points is minimal for (s,R) (s,t) (R) +1 point is minimal for (s) (t) +\endcode + +\param scale scale +\param R rotation +\param t translation +\param Xp inhomogenouse 3D points in first coordinate system +\param X inhomogenouse 3D points in second coordinate system +\param nr_points number of points +\param orientation_preserving if true, R is restricted such that det(R)>0. +\param allow_scaling estimate scale +\param allow_rotation estimate rotation +\param allow_translation estimate translation +*/ +DB_API void db_StitchSimilarity3DRaw(double *scale,double R[9],double t[3], + double **Xp,double **X,int nr_points,int orientation_preserving=1, + int allow_scaling=1,int allow_rotation=1,int allow_translation=1); + + +/*\}*/ + +#endif /* DB_FRAMESTITCHING_H */ diff --git a/jni/feature_stab/db_vlvm/db_image_homography.cpp b/jni/feature_stab/db_vlvm/db_image_homography.cpp new file mode 100644 index 000000000..aaad7f85e --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_image_homography.cpp @@ -0,0 +1,332 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_image_homography.cpp,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */ + +#include "db_utilities.h" +#include "db_image_homography.h" +#include "db_framestitching.h" +#include "db_metrics.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +/*Compute the linear constraint on H obtained by requiring that the +ratio between coordinate i_num and i_den of xp is equal to the ratio +between coordinate i_num and i_den of Hx. i_zero should be set to +the coordinate not equal to i_num or i_den. No normalization is used*/ +inline void db_SProjImagePointPointConstraint(double c[9],int i_num,int i_den,int i_zero, + double xp[3],double x[3]) +{ + db_MultiplyScalarCopy3(c+3*i_den,x, xp[i_num]); + db_MultiplyScalarCopy3(c+3*i_num,x, -xp[i_den]); + db_Zero3(c+3*i_zero); +} + +/*Compute two constraints on H generated by the correspondence (Xp,X), +assuming that Xp ~= H*X. No normalization is used*/ +inline void db_SProjImagePointPointConstraints(double c1[9],double c2[9],double xp[3],double x[3]) +{ + int ma_ind; + + /*Find index of coordinate of Xp with largest absolute value*/ + ma_ind=db_MaxAbsIndex3(xp); + + /*Generate 2 constraints, + each constraint is generated by considering the ratio between a + coordinate and the largest absolute value coordinate*/ + switch(ma_ind) + { + case 0: + db_SProjImagePointPointConstraint(c1,1,0,2,xp,x); + db_SProjImagePointPointConstraint(c2,2,0,1,xp,x); + break; + case 1: + db_SProjImagePointPointConstraint(c1,0,1,2,xp,x); + db_SProjImagePointPointConstraint(c2,2,1,0,xp,x); + break; + default: + db_SProjImagePointPointConstraint(c1,0,2,1,xp,x); + db_SProjImagePointPointConstraint(c2,1,2,0,xp,x); + } +} + +inline void db_SAffineImagePointPointConstraints(double c1[7],double c2[7],double xp[3],double x[3]) +{ + double ct1[9],ct2[9]; + + db_SProjImagePointPointConstraints(ct1,ct2,xp,x); + db_Copy6(c1,ct1); c1[6]=ct1[8]; + db_Copy6(c2,ct2); c2[6]=ct2[8]; +} + +void db_StitchProjective2D_4Points(double H[9], + double x1[3],double x2[3],double x3[3],double x4[3], + double xp1[3],double xp2[3],double xp3[3],double xp4[3]) +{ + double c[72]; + + /*Collect the constraints*/ + db_SProjImagePointPointConstraints(c ,c+9 ,xp1,x1); + db_SProjImagePointPointConstraints(c+18,c+27,xp2,x2); + db_SProjImagePointPointConstraints(c+36,c+45,xp3,x3); + db_SProjImagePointPointConstraints(c+54,c+63,xp4,x4); + /*Solve for the nullvector*/ + db_NullVector8x9Destructive(H,c); +} + +void db_StitchAffine2D_3Points(double H[9], + double x1[3],double x2[3],double x3[3], + double xp1[3],double xp2[3],double xp3[3]) +{ + double c[42]; + + /*Collect the constraints*/ + db_SAffineImagePointPointConstraints(c ,c+7 ,xp1,x1); + db_SAffineImagePointPointConstraints(c+14,c+21,xp2,x2); + db_SAffineImagePointPointConstraints(c+28,c+35,xp3,x3); + /*Solve for the nullvector*/ + db_NullVector6x7Destructive(H,c); + db_MultiplyScalar6(H,db_SafeReciprocal(H[6])); + H[6]=H[7]=0; H[8]=1.0; +} + +/*Compute up to three solutions for the focal length given two point correspondences +generated by a rotation with a common unknown focal length. No specific normalization +of the input points is required. If signed_disambiguation is true, the points are +required to be in front of the camera*/ +inline void db_CommonFocalLengthFromRotation_2Point(double fsol[3],int *nr_sols,double x1[3],double x2[3],double xp1[3],double xp2[3],int signed_disambiguation=1) +{ + double m,ax,ay,apx,apy,bx,by,bpx,bpy; + double p1[2],p2[2],p3[2],p4[2],p5[2],p6[2]; + double p7[3],p8[4],p9[5],p10[3],p11[4]; + double roots[3]; + int nr_roots,i,j; + + /*Solve for focal length using the equation + <a,b>^2*<ap,ap><bp,bp>=<ap,bp>^2*<a,a><b,b> + where a and ap are the homogenous vectors in the first image + after focal length scaling and b,bp are the vectors in the + second image*/ + + /*Normalize homogenous coordinates so that last coordinate is one*/ + m=db_SafeReciprocal(x1[2]); + ax=x1[0]*m; + ay=x1[1]*m; + m=db_SafeReciprocal(xp1[2]); + apx=xp1[0]*m; + apy=xp1[1]*m; + m=db_SafeReciprocal(x2[2]); + bx=x2[0]*m; + by=x2[1]*m; + m=db_SafeReciprocal(xp2[2]); + bpx=xp2[0]*m; + bpy=xp2[1]*m; + + /*Compute cubic in l=1/(f^2) + by dividing out the root l=0 from the equation + (l(ax*bx+ay*by)+1)^2*(l(apx^2+apy^2)+1)*(l(bpx^2+bpy^2)+1)= + (l(apx*bpx+apy*bpy)+1)^2*(l(ax^2+ay^2)+1)*(l(bx^2+by^2)+1)*/ + p1[1]=ax*bx+ay*by; + p2[1]=db_sqr(apx)+db_sqr(apy); + p3[1]=db_sqr(bpx)+db_sqr(bpy); + p4[1]=apx*bpx+apy*bpy; + p5[1]=db_sqr(ax)+db_sqr(ay); + p6[1]=db_sqr(bx)+db_sqr(by); + p1[0]=p2[0]=p3[0]=p4[0]=p5[0]=p6[0]=1; + + db_MultiplyPoly1_1(p7,p1,p1); + db_MultiplyPoly1_2(p8,p2,p7); + db_MultiplyPoly1_3(p9,p3,p8); + + db_MultiplyPoly1_1(p10,p4,p4); + db_MultiplyPoly1_2(p11,p5,p10); + db_SubtractPolyProduct1_3(p9,p6,p11); + /*Cubic starts at p9[1]*/ + db_SolveCubic(roots,&nr_roots,p9[4],p9[3],p9[2],p9[1]); + + for(j=0,i=0;i<nr_roots;i++) + { + if(roots[i]>0) + { + if((!signed_disambiguation) || (db_PolyEval1(p1,roots[i])*db_PolyEval1(p4,roots[i])>0)) + { + fsol[j++]=db_SafeSqrtReciprocal(roots[i]); + } + } + } + *nr_sols=j; +} + +int db_StitchRotationCommonFocalLength_3Points(double H[9],double x1[3],double x2[3],double x3[3],double xp1[3],double xp2[3],double xp3[3],double *f,int signed_disambiguation) +{ + double fsol[3]; + int nr_sols,i,best_sol,done; + double cost,best_cost; + double m,hyp[27],x1_temp[3],x2_temp[3],xp1_temp[3],xp2_temp[3]; + double *hyp_point,ft; + double y[2]; + + db_CommonFocalLengthFromRotation_2Point(fsol,&nr_sols,x1,x2,xp1,xp2,signed_disambiguation); + if(nr_sols) + { + db_DeHomogenizeImagePoint(y,xp3); + done=0; + for(i=0;i<nr_sols;i++) + { + ft=fsol[i]; + m=db_SafeReciprocal(ft); + x1_temp[0]=x1[0]*m; + x1_temp[1]=x1[1]*m; + x1_temp[2]=x1[2]; + x2_temp[0]=x2[0]*m; + x2_temp[1]=x2[1]*m; + x2_temp[2]=x2[2]; + xp1_temp[0]=xp1[0]*m; + xp1_temp[1]=xp1[1]*m; + xp1_temp[2]=xp1[2]; + xp2_temp[0]=xp2[0]*m; + xp2_temp[1]=xp2[1]*m; + xp2_temp[2]=xp2[2]; + + hyp_point=hyp+9*i; + db_StitchCameraRotation_2Points(hyp_point,x1_temp,x2_temp,xp1_temp,xp2_temp); + hyp_point[2]*=ft; + hyp_point[5]*=ft; + hyp_point[6]*=m; + hyp_point[7]*=m; + cost=db_SquaredReprojectionErrorHomography(y,hyp_point,x3); + + if(!done || cost<best_cost) + { + done=1; + best_cost=cost; + best_sol=i; + } + } + + if(f) *f=fsol[best_sol]; + db_Copy9(H,hyp+9*best_sol); + return(1); + } + else + { + db_Identity3x3(H); + if(f) *f=1.0; + return(0); + } +} + +void db_StitchSimilarity2DRaw(double *scale,double R[4],double t[2], + double **Xp,double **X,int nr_points,int orientation_preserving, + int allow_scaling,int allow_rotation,int allow_translation) +{ + int i; + double c[2],cp[2],r[2],rp[2],M[4],s,sp,sc; + double *temp,*temp_p; + double Aacc,Bacc,Aacc2,Bacc2,divisor,divisor2,m,Am,Bm; + + if(allow_translation) + { + db_PointCentroid2D(c,X,nr_points); + db_PointCentroid2D(cp,Xp,nr_points); + } + else + { + db_Zero2(c); + db_Zero2(cp); + } + + db_Zero4(M); + s=sp=0; + for(i=0;i<nr_points;i++) + { + temp= *X++; + temp_p= *Xp++; + r[0]=(*temp++)-c[0]; + r[1]=(*temp++)-c[1]; + rp[0]=(*temp_p++)-cp[0]; + rp[1]=(*temp_p++)-cp[1]; + + M[0]+=r[0]*rp[0]; + M[1]+=r[0]*rp[1]; + M[2]+=r[1]*rp[0]; + M[3]+=r[1]*rp[1]; + + s+=db_sqr(r[0])+db_sqr(r[1]); + sp+=db_sqr(rp[0])+db_sqr(rp[1]); + } + + /*Compute scale*/ + if(allow_scaling) sc=sqrt(db_SafeDivision(sp,s)); + else sc=1.0; + *scale=sc; + + /*Compute rotation*/ + if(allow_rotation) + { + /*orientation preserving*/ + Aacc=M[0]+M[3]; + Bacc=M[2]-M[1]; + /*orientation reversing*/ + Aacc2=M[0]-M[3]; + Bacc2=M[2]+M[1]; + if(Aacc!=0.0 || Bacc!=0.0) + { + divisor=sqrt(Aacc*Aacc+Bacc*Bacc); + m=db_SafeReciprocal(divisor); + Am=Aacc*m; + Bm=Bacc*m; + R[0]= Am; + R[1]= Bm; + R[2]= -Bm; + R[3]= Am; + } + else + { + db_Identity2x2(R); + divisor=0.0; + } + if(!orientation_preserving && (Aacc2!=0.0 || Bacc2!=0.0)) + { + divisor2=sqrt(Aacc2*Aacc2+Bacc2*Bacc2); + if(divisor2>divisor) + { + m=db_SafeReciprocal(divisor2); + Am=Aacc2*m; + Bm=Bacc2*m; + R[0]= Am; + R[1]= Bm; + R[2]= Bm; + R[3]= -Am; + } + } + } + else db_Identity2x2(R); + + /*Compute translation*/ + if(allow_translation) + { + t[0]=cp[0]-sc*(R[0]*c[0]+R[1]*c[1]); + t[1]=cp[1]-sc*(R[2]*c[0]+R[3]*c[1]); + } + else db_Zero2(t); +} + + diff --git a/jni/feature_stab/db_vlvm/db_image_homography.h b/jni/feature_stab/db_vlvm/db_image_homography.h new file mode 100644 index 000000000..165447dd7 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_image_homography.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_image_homography.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_IMAGE_HOMOGRAPHY +#define DB_IMAGE_HOMOGRAPHY + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +#include "db_framestitching.h" +/*! + * \defgroup LMImageHomography (LM) Image Homography Estimation (feature based) + */ +/*\{*/ +/*! +Solve for projective H such that xp~Hx. Prior normalization is not necessary, +although desirable for numerical conditioning +\param H image projective (out) +\param x1 image 1 point 1 +\param x2 image 1 point 2 +\param x3 image 1 point 3 +\param x4 image 1 point 4 +\param xp1 image 2 point 1 +\param xp2 image 2 point 2 +\param xp3 image 2 point 3 +\param xp4 image 2 point 4 +*/ +DB_API void db_StitchProjective2D_4Points(double H[9], + double x1[3],double x2[3],double x3[3],double x4[3], + double xp1[3],double xp2[3],double xp3[3],double xp4[3]); + +/*! +Solve for affine H such that xp~Hx. Prior normalization is not necessary, +although desirable for numerical conditioning +\param H image projective (out) +\param x1 image 1 point 1 +\param x2 image 1 point 2 +\param x3 image 1 point 3 +\param xp1 image 2 point 1 +\param xp2 image 2 point 2 +\param xp3 image 2 point 3 +*/ +DB_API void db_StitchAffine2D_3Points(double H[9], + double x1[3],double x2[3],double x3[3], + double xp1[3],double xp2[3],double xp3[3]); + +/*! +Solve for rotation R such that xp~Rx. +Image points have to be of unit norm for the least squares to be meaningful. +\param R image rotation (out) +\param x1 image 1 point 1 +\param x2 image 1 point 2 +\param xp1 image 2 point 1 +\param xp2 image 2 point 2 +*/ +inline void db_StitchCameraRotation_2Points(double R[9], + /*Image points have to be of unit norm + for the least squares to be meaningful*/ + double x1[3],double x2[3], + double xp1[3],double xp2[3]) +{ + double* x[2]; + double* xp[2]; + double scale,t[3]; + + x[0]=x1; + x[1]=x2; + xp[0]=xp1; + xp[1]=xp2; + db_StitchSimilarity3DRaw(&scale,R,t,xp,x,2,1,0,1,0); +} + +/*! +Solve for a homography H generated by a rotation R with a common unknown focal length f, i.e. +H=diag(f,f,1)*R*diag(1/f,1/f,1) such that xp~Hx. +If signed_disambiguation is true, the points are +required to be in front of the camera. No specific normalization of the homogenous points +is required, although it could be desirable to keep x1,x2,xp1 and xp2 of reasonable magnitude. +If a solution is obtained the function returns 1, otherwise 0. If the focal length is desired +a valid pointer should be passed in f +*/ +DB_API int db_StitchRotationCommonFocalLength_3Points(double H[9],double x1[3],double x2[3],double x3[3], + double xp1[3],double xp2[3],double xp3[3],double *f=0,int signed_disambiguation=1); + +/*! +Find scale, rotation and translation of the similarity that +takes the nr_points inhomogenous 2D points X to Xp, +i.e. for the homogenous equivalents +Xp and X we would have +\code +Xp~ +[sR t]*X +[0 1] +\endcode +If orientation_preserving is true, R is restricted such that det(R)>0. +allow_scaling, allow_rotation and allow_translation allow s,R and t +to differ from 1,Identity and 0 + +Full similarity takes the following on 550MHz: +\code +0.9 microseconds with 2 points +1.0 microseconds with 3 points +1.1 microseconds with 4 points +1.3 microseconds with 5 points +1.4 microseconds with 6 points +1.7 microseconds with 10 points +9 microseconds with 100 points +130 microseconds with 1000 points +1.3 milliseconds with 10000 points +35 milliseconds with 100000 points +350 milliseconds with 1000000 points +\endcode + +Without orientation_preserving: +\code +3 points is minimal for (s,R,t) (R,t) +2 points is minimal for (s,t) (s,R) (R) +1 point is minimal for (s) (t) +\endcode + +With orientation_preserving: +\code +2 points is minimal for (s,R,t) (R,t) (s,t) +1 point is minimal for (s,R) (R) (s) (t) +\endcode +\param scale (out) +\param R 2D rotation (out) +\param t 2D translation (out) +\param Xp (nr_points x 2) pointer to array of image points +\param X (nr_points x 2 ) pointer to array of image points +\param nr_points number of points +\param orientation_preserving +\param allow_scaling compute scale (if 0, scale=1) +\param allow_rotation compute rotation (if 0, R=[I]) +\param allow_translation compute translation (if 0 t = [0,0]') +*/ +DB_API void db_StitchSimilarity2DRaw(double *scale,double R[4],double t[2], + double **Xp,double **X,int nr_points,int orientation_preserving=1, + int allow_scaling=1,int allow_rotation=1,int allow_translation=1); +/*! +See db_StitchRotationCommonFocalLength_3Points(). +\param H Image similarity transformation (out) +\param Xp (nr_points x 2) pointer to array of image points +\param X (nr_points x 2) pointer to array of image points +\param nr_points number of points +\param orientation_preserving +\param allow_scaling compute scale (if 0, scale=1) +\param allow_rotation compute rotation (if 0, R=[I]) +\param allow_translation compute translation (if 0 t = [0,0]') +*/ +inline void db_StitchSimilarity2D(double H[9],double **Xp,double **X,int nr_points,int orientation_preserving=1, + int allow_scaling=1,int allow_rotation=1,int allow_translation=1) +{ + double s,R[4],t[2]; + + db_StitchSimilarity2DRaw(&s,R,t,Xp,X,nr_points,orientation_preserving, + allow_scaling,allow_rotation,allow_translation); + + H[0]=s*R[0]; H[1]=s*R[1]; H[2]=t[0]; + H[3]=s*R[2]; H[4]=s*R[3]; H[5]=t[1]; + db_Zero2(H+6); + H[8]=1.0; +} +/*\}*/ +#endif /* DB_IMAGE_HOMOGRAPHY */ diff --git a/jni/feature_stab/db_vlvm/db_metrics.h b/jni/feature_stab/db_vlvm/db_metrics.h new file mode 100644 index 000000000..6b95458f1 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_metrics.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_metrics.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_METRICS +#define DB_METRICS + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +#include "db_utilities.h" +/*! + * \defgroup LMMetrics (LM) Metrics + */ +/*\{*/ + + + + +/*! +Compute function value fp and Jacobian J of robustifier given input value f*/ +inline void db_CauchyDerivative(double J[4],double fp[2],const double f[2],double one_over_scale2) +{ + double x2,y2,r,r2,r2s,one_over_r2,fu,r_fu,one_over_r_fu; + double one_plus_r2s,half_dfu_dx,half_dfu_dy,coeff,coeff2,coeff3; + int at_zero; + + /*The robustifier takes the input (x,y) and makes a new + vector (xp,yp) where + xp=sqrt(log(1+(x^2+y^2)*one_over_scale2))*x/sqrt(x^2+y^2) + yp=sqrt(log(1+(x^2+y^2)*one_over_scale2))*y/sqrt(x^2+y^2) + The new vector has the property + xp^2+yp^2=log(1+(x^2+y^2)*one_over_scale2) + i.e. when it is square-summed it gives the robust + reprojection error + Define + r2=(x^2+y^2) and + r2s=r2*one_over_scale2 + fu=log(1+r2s)/r2 + then + xp=sqrt(fu)*x + yp=sqrt(fu)*y + and + d(r2)/dx=2x + d(r2)/dy=2y + and + dfu/dx=d(r2)/dx*(r2s/(1+r2s)-log(1+r2s))/(r2*r2) + dfu/dy=d(r2)/dy*(r2s/(1+r2s)-log(1+r2s))/(r2*r2) + and + d(xp)/dx=1/(2sqrt(fu))*(dfu/dx)*x+sqrt(fu) + d(xp)/dy=1/(2sqrt(fu))*(dfu/dy)*x + d(yp)/dx=1/(2sqrt(fu))*(dfu/dx)*y + d(yp)/dy=1/(2sqrt(fu))*(dfu/dy)*y+sqrt(fu) + */ + + x2=db_sqr(f[0]); + y2=db_sqr(f[1]); + r2=x2+y2; + r=sqrt(r2); + + if(r2<=0.0) at_zero=1; + else + { + one_over_r2=1.0/r2; + r2s=r2*one_over_scale2; + one_plus_r2s=1.0+r2s; + fu=log(one_plus_r2s)*one_over_r2; + r_fu=sqrt(fu); + if(r_fu<=0.0) at_zero=1; + else + { + one_over_r_fu=1.0/r_fu; + fp[0]=r_fu*f[0]; + fp[1]=r_fu*f[1]; + /*r2s is always >= 0*/ + coeff=(r2s/one_plus_r2s*one_over_r2-fu)*one_over_r2; + half_dfu_dx=f[0]*coeff; + half_dfu_dy=f[1]*coeff; + coeff2=one_over_r_fu*half_dfu_dx; + coeff3=one_over_r_fu*half_dfu_dy; + + J[0]=coeff2*f[0]+r_fu; + J[1]=coeff3*f[0]; + J[2]=coeff2*f[1]; + J[3]=coeff3*f[1]+r_fu; + at_zero=0; + } + } + if(at_zero) + { + /*Close to zero the robustifying mapping + becomes identity*sqrt(one_over_scale2)*/ + fp[0]=0.0; + fp[1]=0.0; + J[0]=sqrt(one_over_scale2); + J[1]=0.0; + J[2]=0.0; + J[3]=J[0]; + } +} + +inline double db_SquaredReprojectionErrorHomography(const double y[2],const double H[9],const double x[3]) +{ + double x0,x1,x2,mult; + double sd; + + x0=H[0]*x[0]+H[1]*x[1]+H[2]*x[2]; + x1=H[3]*x[0]+H[4]*x[1]+H[5]*x[2]; + x2=H[6]*x[0]+H[7]*x[1]+H[8]*x[2]; + mult=1.0/((x2!=0.0)?x2:1.0); + sd=db_sqr((y[0]-x0*mult))+db_sqr((y[1]-x1*mult)); + + return(sd); +} + +inline double db_SquaredInhomogenousHomographyError(const double y[2],const double H[9],const double x[2]) +{ + double x0,x1,x2,mult; + double sd; + + x0=H[0]*x[0]+H[1]*x[1]+H[2]; + x1=H[3]*x[0]+H[4]*x[1]+H[5]; + x2=H[6]*x[0]+H[7]*x[1]+H[8]; + mult=1.0/((x2!=0.0)?x2:1.0); + sd=db_sqr((y[0]-x0*mult))+db_sqr((y[1]-x1*mult)); + + return(sd); +} + +/*! +Return a constant divided by likelihood of a Cauchy distributed +reprojection error given the image point y, homography H, image point +point x and the squared scale coefficient one_over_scale2=1.0/(scale*scale) +where scale is the half width at half maximum (hWahM) of the +Cauchy distribution*/ +inline double db_ExpCauchyInhomogenousHomographyError(const double y[2],const double H[9],const double x[2], + double one_over_scale2) +{ + double sd; + sd=db_SquaredInhomogenousHomographyError(y,H,x); + return(1.0+sd*one_over_scale2); +} + +/*! +Compute residual vector f between image point y and homography Hx of +image point x. Also compute Jacobian of f with respect +to an update dx of H*/ +inline void db_DerivativeInhomHomographyError(double Jf_dx[18],double f[2],const double y[2],const double H[9], + const double x[2]) +{ + double xh,yh,zh,mult,mult2,xh_mult2,yh_mult2; + /*The Jacobian of the inhomogenous coordinates with respect to + the homogenous is + [1/zh 0 -xh/(zh*zh)] + [ 0 1/zh -yh/(zh*zh)] + The Jacobian of the homogenous coordinates with respect to dH is + [x0 x1 1 0 0 0 0 0 0] + [ 0 0 0 x0 x1 1 0 0 0] + [ 0 0 0 0 0 0 x0 x1 1] + The output Jacobian is minus their product, i.e. + [-x0/zh -x1/zh -1/zh 0 0 0 x0*xh/(zh*zh) x1*xh/(zh*zh) xh/(zh*zh)] + [ 0 0 0 -x0/zh -x1/zh -1/zh x0*yh/(zh*zh) x1*yh/(zh*zh) yh/(zh*zh)]*/ + + /*Compute warped point, which is the same as + homogenous coordinates of reprojection*/ + xh=H[0]*x[0]+H[1]*x[1]+H[2]; + yh=H[3]*x[0]+H[4]*x[1]+H[5]; + zh=H[6]*x[0]+H[7]*x[1]+H[8]; + mult=1.0/((zh!=0.0)?zh:1.0); + /*Compute inhomogenous residual*/ + f[0]=y[0]-xh*mult; + f[1]=y[1]-yh*mult; + /*Compute Jacobian*/ + mult2=mult*mult; + xh_mult2=xh*mult2; + yh_mult2=yh*mult2; + Jf_dx[0]= -x[0]*mult; + Jf_dx[1]= -x[1]*mult; + Jf_dx[2]= -mult; + Jf_dx[3]=0; + Jf_dx[4]=0; + Jf_dx[5]=0; + Jf_dx[6]=x[0]*xh_mult2; + Jf_dx[7]=x[1]*xh_mult2; + Jf_dx[8]=xh_mult2; + Jf_dx[9]=0; + Jf_dx[10]=0; + Jf_dx[11]=0; + Jf_dx[12]=Jf_dx[0]; + Jf_dx[13]=Jf_dx[1]; + Jf_dx[14]=Jf_dx[2]; + Jf_dx[15]=x[0]*yh_mult2; + Jf_dx[16]=x[1]*yh_mult2; + Jf_dx[17]=yh_mult2; +} + +/*! +Compute robust residual vector f between image point y and homography Hx of +image point x. Also compute Jacobian of f with respect +to an update dH of H*/ +inline void db_DerivativeCauchyInhomHomographyReprojection(double Jf_dx[18],double f[2],const double y[2],const double H[9], + const double x[2],double one_over_scale2) +{ + double Jf_dx_loc[18],f_loc[2]; + double J[4],J0,J1,J2,J3; + + /*Compute reprojection Jacobian*/ + db_DerivativeInhomHomographyError(Jf_dx_loc,f_loc,y,H,x); + /*Compute robustifier Jacobian*/ + db_CauchyDerivative(J,f,f_loc,one_over_scale2); + + /*Multiply the robustifier Jacobian with + the reprojection Jacobian*/ + J0=J[0];J1=J[1];J2=J[2];J3=J[3]; + Jf_dx[0]=J0*Jf_dx_loc[0]; + Jf_dx[1]=J0*Jf_dx_loc[1]; + Jf_dx[2]=J0*Jf_dx_loc[2]; + Jf_dx[3]= J1*Jf_dx_loc[12]; + Jf_dx[4]= J1*Jf_dx_loc[13]; + Jf_dx[5]= J1*Jf_dx_loc[14]; + Jf_dx[6]=J0*Jf_dx_loc[6]+J1*Jf_dx_loc[15]; + Jf_dx[7]=J0*Jf_dx_loc[7]+J1*Jf_dx_loc[16]; + Jf_dx[8]=J0*Jf_dx_loc[8]+J1*Jf_dx_loc[17]; + Jf_dx[9]= J2*Jf_dx_loc[0]; + Jf_dx[10]=J2*Jf_dx_loc[1]; + Jf_dx[11]=J2*Jf_dx_loc[2]; + Jf_dx[12]= J3*Jf_dx_loc[12]; + Jf_dx[13]= J3*Jf_dx_loc[13]; + Jf_dx[14]= J3*Jf_dx_loc[14]; + Jf_dx[15]=J2*Jf_dx_loc[6]+J3*Jf_dx_loc[15]; + Jf_dx[16]=J2*Jf_dx_loc[7]+J3*Jf_dx_loc[16]; + Jf_dx[17]=J2*Jf_dx_loc[8]+J3*Jf_dx_loc[17]; +} +/*! +Compute residual vector f between image point y and rotation of +image point x by R. Also compute Jacobian of f with respect +to an update dx of R*/ +inline void db_DerivativeInhomRotationReprojection(double Jf_dx[6],double f[2],const double y[2],const double R[9], + const double x[2]) +{ + double xh,yh,zh,mult,mult2,xh_mult2,yh_mult2; + /*The Jacobian of the inhomogenous coordinates with respect to + the homogenous is + [1/zh 0 -xh/(zh*zh)] + [ 0 1/zh -yh/(zh*zh)] + The Jacobian at zero of the homogenous coordinates with respect to + [sin(phi) sin(ohm) sin(kap)] is + [-rx2 0 rx1 ] + [ 0 rx2 -rx0 ] + [ rx0 -rx1 0 ] + The output Jacobian is minus their product, i.e. + [1+xh*xh/(zh*zh) -xh*yh/(zh*zh) -yh/zh] + [xh*yh/(zh*zh) -1-yh*yh/(zh*zh) xh/zh]*/ + + /*Compute rotated point, which is the same as + homogenous coordinates of reprojection*/ + xh=R[0]*x[0]+R[1]*x[1]+R[2]; + yh=R[3]*x[0]+R[4]*x[1]+R[5]; + zh=R[6]*x[0]+R[7]*x[1]+R[8]; + mult=1.0/((zh!=0.0)?zh:1.0); + /*Compute inhomogenous residual*/ + f[0]=y[0]-xh*mult; + f[1]=y[1]-yh*mult; + /*Compute Jacobian*/ + mult2=mult*mult; + xh_mult2=xh*mult2; + yh_mult2=yh*mult2; + Jf_dx[0]= 1.0+xh*xh_mult2; + Jf_dx[1]= -yh*xh_mult2; + Jf_dx[2]= -yh*mult; + Jf_dx[3]= -Jf_dx[1]; + Jf_dx[4]= -1-yh*yh_mult2; + Jf_dx[5]= xh*mult; +} + +/*! +Compute robust residual vector f between image point y and rotation of +image point x by R. Also compute Jacobian of f with respect +to an update dx of R*/ +inline void db_DerivativeCauchyInhomRotationReprojection(double Jf_dx[6],double f[2],const double y[2],const double R[9], + const double x[2],double one_over_scale2) +{ + double Jf_dx_loc[6],f_loc[2]; + double J[4],J0,J1,J2,J3; + + /*Compute reprojection Jacobian*/ + db_DerivativeInhomRotationReprojection(Jf_dx_loc,f_loc,y,R,x); + /*Compute robustifier Jacobian*/ + db_CauchyDerivative(J,f,f_loc,one_over_scale2); + + /*Multiply the robustifier Jacobian with + the reprojection Jacobian*/ + J0=J[0];J1=J[1];J2=J[2];J3=J[3]; + Jf_dx[0]=J0*Jf_dx_loc[0]+J1*Jf_dx_loc[3]; + Jf_dx[1]=J0*Jf_dx_loc[1]+J1*Jf_dx_loc[4]; + Jf_dx[2]=J0*Jf_dx_loc[2]+J1*Jf_dx_loc[5]; + Jf_dx[3]=J2*Jf_dx_loc[0]+J3*Jf_dx_loc[3]; + Jf_dx[4]=J2*Jf_dx_loc[1]+J3*Jf_dx_loc[4]; + Jf_dx[5]=J2*Jf_dx_loc[2]+J3*Jf_dx_loc[5]; +} + + + +/*! +// remove the outliers whose projection error is larger than pre-defined +*/ +inline int db_RemoveOutliers_Homography(const double H[9], double *x_i,double *xp_i, double *wp,double *im, double *im_p, double *im_r, double *im_raw,double *im_raw_p,int point_count,double scale, double thresh=DB_OUTLIER_THRESHOLD) +{ + double temp_valueE, t2; + int c; + int k1=0; + int k2=0; + int k3=0; + int numinliers=0; + int ind1; + int ind2; + int ind3; + int isinlier; + + // experimentally determined + t2=1.0/(thresh*thresh*thresh*thresh); + + // count the inliers + for(c=0;c<point_count;c++) + { + ind1=c<<1; + ind2=c<<2; + ind3=3*c; + + temp_valueE=db_SquaredInhomogenousHomographyError(im_p+ind3,H,im+ind3); + + isinlier=((temp_valueE<=t2)?1:0); + + // if it is inlier, then copy the 3d and 2d correspondences + if (isinlier) + { + numinliers++; + + x_i[k1]=x_i[ind1]; + x_i[k1+1]=x_i[ind1+1]; + + xp_i[k1]=xp_i[ind1]; + xp_i[k1+1]=xp_i[ind1+1]; + + k1=k1+2; + + // original normalized pixel coordinates + im[k3]=im[ind3]; + im[k3+1]=im[ind3+1]; + im[k3+2]=im[ind3+2]; + + im_r[k3]=im_r[ind3]; + im_r[k3+1]=im_r[ind3+1]; + im_r[k3+2]=im_r[ind3+2]; + + im_p[k3]=im_p[ind3]; + im_p[k3+1]=im_p[ind3+1]; + im_p[k3+2]=im_p[ind3+2]; + + // left and right raw pixel coordinates + im_raw[k3] = im_raw[ind3]; + im_raw[k3+1] = im_raw[ind3+1]; + im_raw[k3+2] = im_raw[ind3+2]; // the index + + im_raw_p[k3] = im_raw_p[ind3]; + im_raw_p[k3+1] = im_raw_p[ind3+1]; + im_raw_p[k3+2] = im_raw_p[ind3+2]; // the index + + k3=k3+3; + + // 3D coordinates + wp[k2]=wp[ind2]; + wp[k2+1]=wp[ind2+1]; + wp[k2+2]=wp[ind2+2]; + wp[k2+3]=wp[ind2+3]; + + k2=k2+4; + + } + } + + return numinliers; +} + + + + + +/*\}*/ + +#endif /* DB_METRICS */ diff --git a/jni/feature_stab/db_vlvm/db_rob_image_homography.cpp b/jni/feature_stab/db_vlvm/db_rob_image_homography.cpp new file mode 100644 index 000000000..82dec0cbe --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_rob_image_homography.cpp @@ -0,0 +1,1082 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_rob_image_homography.cpp,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */ + +#include "db_utilities.h" +#include "db_rob_image_homography.h" +#include "db_bundle.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +#include "db_image_homography.h" + +#ifdef _VERBOSE_ +#include <iostream> +using namespace std; +#endif /*VERBOSE*/ + +inline double db_RobImageHomography_Cost(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2) +{ + int c; + double back,acc,*x_i_temp,*xp_i_temp; + + for(back=0.0,c=0;c<point_count;) + { + /*Take log of product of ten reprojection + errors to reduce nr of expensive log operations*/ + if(c+9<point_count) + { + x_i_temp=x_i+(c<<1); + xp_i_temp=xp_i+(c<<1); + + acc=db_ExpCauchyInhomogenousHomographyError(xp_i_temp,H,x_i_temp,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+2,H,x_i_temp+2,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+4,H,x_i_temp+4,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+6,H,x_i_temp+6,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+8,H,x_i_temp+8,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+10,H,x_i_temp+10,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+12,H,x_i_temp+12,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+14,H,x_i_temp+14,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+16,H,x_i_temp+16,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+18,H,x_i_temp+18,one_over_scale2); + c+=10; + } + else + { + for(acc=1.0;c<point_count;c++) + { + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i+(c<<1),H,x_i+(c<<1),one_over_scale2); + } + } + back+=log(acc); + } + return(back); +} + +inline double db_RobImageHomography_Statistics(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2,db_Statistics *stat,double thresh=DB_OUTLIER_THRESHOLD) +{ + int c,i; + double t2,frac; + + t2=thresh*thresh; + for(i=0,c=0;c<point_count;c++) + { + i+=(db_SquaredInhomogenousHomographyError(xp_i+(c<<1),H,x_i+(c<<1))*one_over_scale2<=t2)?1:0; + } + frac=((double)i)/((double)(db_maxi(point_count,1))); + +#ifdef _VERBOSE_ + std::cout << "Inlier Percentage RobImageHomography: " << frac*100.0 << "% out of " << point_count << " constraints" << std::endl; +#endif /*_VERBOSE_*/ + + if(stat) + { + stat->nr_points=point_count; + stat->one_over_scale2=one_over_scale2; + stat->nr_inliers=i; + stat->inlier_fraction=frac; + + stat->cost=db_RobImageHomography_Cost(H,point_count,x_i,xp_i,one_over_scale2); + stat->model_dimension=0; + /*stat->nr_parameters=;*/ + + stat->lambda1=log(4.0); + stat->lambda2=log(4.0*((double)db_maxi(1,stat->nr_points))); + stat->lambda3=10.0; + stat->gric=stat->cost+stat->lambda1*stat->model_dimension*((double)stat->nr_points)+stat->lambda2*((double)stat->nr_parameters); + stat->inlier_evidence=((double)stat->nr_inliers)-stat->lambda3*((double)stat->nr_parameters); + } + + return(frac); +} + +/*Compute min_Jtf and upper right of JtJ. Return cost.*/ +inline double db_RobImageHomography_Jacobians(double JtJ[81],double min_Jtf[9],double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2) +{ + double back,Jf_dx[18],f[2],temp,temp2; + int i; + + db_Zero(JtJ,81); + db_Zero(min_Jtf,9); + for(back=0.0,i=0;i<point_count;i++) + { + /*Compute reprojection error vector and its Jacobian + for this point*/ + db_DerivativeCauchyInhomHomographyReprojection(Jf_dx,f,xp_i+(i<<1),H,x_i+(i<<1),one_over_scale2); + /*Perform + min_Jtf-=Jf_dx*f[0] and + min_Jtf-=(Jf_dx+9)*f[1] to accumulate -Jt%f*/ + db_RowOperation9(min_Jtf,Jf_dx,f[0]); + db_RowOperation9(min_Jtf,Jf_dx+9,f[1]); + /*Accumulate upper right of JtJ with outer product*/ + temp=Jf_dx[0]; temp2=Jf_dx[9]; + JtJ[0]+=temp*Jf_dx[0]+temp2*Jf_dx[9]; + JtJ[1]+=temp*Jf_dx[1]+temp2*Jf_dx[10]; + JtJ[2]+=temp*Jf_dx[2]+temp2*Jf_dx[11]; + JtJ[3]+=temp*Jf_dx[3]+temp2*Jf_dx[12]; + JtJ[4]+=temp*Jf_dx[4]+temp2*Jf_dx[13]; + JtJ[5]+=temp*Jf_dx[5]+temp2*Jf_dx[14]; + JtJ[6]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[7]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[8]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[1]; temp2=Jf_dx[10]; + JtJ[10]+=temp*Jf_dx[1]+temp2*Jf_dx[10]; + JtJ[11]+=temp*Jf_dx[2]+temp2*Jf_dx[11]; + JtJ[12]+=temp*Jf_dx[3]+temp2*Jf_dx[12]; + JtJ[13]+=temp*Jf_dx[4]+temp2*Jf_dx[13]; + JtJ[14]+=temp*Jf_dx[5]+temp2*Jf_dx[14]; + JtJ[15]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[16]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[17]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[2]; temp2=Jf_dx[11]; + JtJ[20]+=temp*Jf_dx[2]+temp2*Jf_dx[11]; + JtJ[21]+=temp*Jf_dx[3]+temp2*Jf_dx[12]; + JtJ[22]+=temp*Jf_dx[4]+temp2*Jf_dx[13]; + JtJ[23]+=temp*Jf_dx[5]+temp2*Jf_dx[14]; + JtJ[24]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[25]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[26]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[3]; temp2=Jf_dx[12]; + JtJ[30]+=temp*Jf_dx[3]+temp2*Jf_dx[12]; + JtJ[31]+=temp*Jf_dx[4]+temp2*Jf_dx[13]; + JtJ[32]+=temp*Jf_dx[5]+temp2*Jf_dx[14]; + JtJ[33]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[34]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[35]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[4]; temp2=Jf_dx[13]; + JtJ[40]+=temp*Jf_dx[4]+temp2*Jf_dx[13]; + JtJ[41]+=temp*Jf_dx[5]+temp2*Jf_dx[14]; + JtJ[42]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[43]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[44]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[5]; temp2=Jf_dx[14]; + JtJ[50]+=temp*Jf_dx[5]+temp2*Jf_dx[14]; + JtJ[51]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[52]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[53]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[6]; temp2=Jf_dx[15]; + JtJ[60]+=temp*Jf_dx[6]+temp2*Jf_dx[15]; + JtJ[61]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[62]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[7]; temp2=Jf_dx[16]; + JtJ[70]+=temp*Jf_dx[7]+temp2*Jf_dx[16]; + JtJ[71]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + temp=Jf_dx[8]; temp2=Jf_dx[17]; + JtJ[80]+=temp*Jf_dx[8]+temp2*Jf_dx[17]; + + /*Add square-sum to cost*/ + back+=db_sqr(f[0])+db_sqr(f[1]); + } + + return(back); +} + +/*Compute min_Jtf and upper right of JtJ. Return cost*/ +inline double db_RobCamRotation_Jacobians(double JtJ[9],double min_Jtf[3],double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2) +{ + double back,Jf_dx[6],f[2]; + int i,j; + + db_Zero(JtJ,9); + db_Zero(min_Jtf,3); + for(back=0.0,i=0;i<point_count;i++) + { + /*Compute reprojection error vector and its Jacobian + for this point*/ + j=(i<<1); + db_DerivativeCauchyInhomRotationReprojection(Jf_dx,f,xp_i+j,H,x_i+j,one_over_scale2); + /*Perform + min_Jtf-=Jf_dx*f[0] and + min_Jtf-=(Jf_dx+3)*f[1] to accumulate -Jt%f*/ + db_RowOperation3(min_Jtf,Jf_dx,f[0]); + db_RowOperation3(min_Jtf,Jf_dx+3,f[1]); + /*Accumulate upper right of JtJ with outer product*/ + JtJ[0]+=Jf_dx[0]*Jf_dx[0]+Jf_dx[3]*Jf_dx[3]; + JtJ[1]+=Jf_dx[0]*Jf_dx[1]+Jf_dx[3]*Jf_dx[4]; + JtJ[2]+=Jf_dx[0]*Jf_dx[2]+Jf_dx[3]*Jf_dx[5]; + JtJ[4]+=Jf_dx[1]*Jf_dx[1]+Jf_dx[4]*Jf_dx[4]; + JtJ[5]+=Jf_dx[1]*Jf_dx[2]+Jf_dx[4]*Jf_dx[5]; + JtJ[8]+=Jf_dx[2]*Jf_dx[2]+Jf_dx[5]*Jf_dx[5]; + + /*Add square-sum to cost*/ + back+=db_sqr(f[0])+db_sqr(f[1]); + } + + return(back); +} + +void db_RobCamRotation_Polish(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2, + int max_iterations,double improvement_requirement) +{ + int i,update,stop; + double lambda,cost,current_cost; + double JtJ[9],min_Jtf[3],dx[3],H_p_dx[9]; + + lambda=0.001; + for(update=1,stop=0,i=0;(stop<2) && (i<max_iterations);i++) + { + /*if first time since improvement, compute Jacobian and residual*/ + if(update) + { + current_cost=db_RobCamRotation_Jacobians(JtJ,min_Jtf,H,point_count,x_i,xp_i,one_over_scale2); + update=0; + } + +#ifdef _VERBOSE_ + /*std::cout << "Cost:" << current_cost << " ";*/ +#endif /*_VERBOSE_*/ + + /*Come up with a hypothesis dx + based on the current lambda*/ + db_Compute_dx_3x3(dx,JtJ,min_Jtf,lambda); + + /*Compute Cost(x+dx)*/ + db_UpdateRotation(H_p_dx,H,dx); + cost=db_RobImageHomography_Cost(H_p_dx,point_count,x_i,xp_i,one_over_scale2); + + /*Is there an improvement?*/ + if(cost<current_cost) + { + /*improvement*/ + if(current_cost-cost<current_cost*improvement_requirement) stop++; + else stop=0; + lambda*=0.1; + /*Move to the hypothesised position x+dx*/ + current_cost=cost; + db_Copy9(H,H_p_dx); + db_OrthonormalizeRotation(H); + update=1; + +#ifdef _VERBOSE_ + std::cout << "Step" << i << "Imp,Lambda=" << lambda << "Cost:" << current_cost << std::endl; +#endif /*_VERBOSE_*/ + } + else + { + /*no improvement*/ + lambda*=10.0; + stop=0; + } + } +} + +inline void db_RobImageHomographyFetchJacobian(double **JtJ_ref,double *min_Jtf,double **JtJ_temp_ref,double *min_Jtf_temp,int n,int *fetch_vector) +{ + int i,j,t; + double *t1,*t2; + + for(i=0;i<n;i++) + { + t=fetch_vector[i]; + min_Jtf[i]=min_Jtf_temp[t]; + t1=JtJ_ref[i]; + t2=JtJ_temp_ref[t]; + for(j=i;j<n;j++) + { + t1[j]=t2[fetch_vector[j]]; + } + } +} + +inline void db_RobImageHomographyMultiplyJacobian(double **JtJ_ref,double *min_Jtf,double **JtJ_temp_ref,double *min_Jtf_temp,double **JE_dx_ref,int n) +{ + double JtJ_JE[72],*JtJ_JE_ref[9]; + + db_SetupMatrixRefs(JtJ_JE_ref,9,8,JtJ_JE); + + db_SymmetricExtendUpperToLower(JtJ_temp_ref,9,9); + db_MultiplyMatricesAB(JtJ_JE_ref,JtJ_temp_ref,JE_dx_ref,9,9,n); + db_UpperMultiplyMatricesAtB(JtJ_ref,JE_dx_ref,JtJ_JE_ref,n,9,n); + db_MultiplyMatrixVectorAtb(min_Jtf,JE_dx_ref,min_Jtf_temp,n,9); +} + +inline void db_RobImageHomographyJH_Js(double **JE_dx_ref,int j,double H[9]) +{ + /*Update of upper 2x2 is multiplication by + [s 0][ cos(theta) sin(theta)] + [0 s][-sin(theta) cos(theta)]*/ + JE_dx_ref[0][j]=H[0]; + JE_dx_ref[1][j]=H[1]; + JE_dx_ref[2][j]=0; + JE_dx_ref[3][j]=H[2]; + JE_dx_ref[4][j]=H[3]; + JE_dx_ref[5][j]=0; + JE_dx_ref[6][j]=0; + JE_dx_ref[7][j]=0; + JE_dx_ref[8][j]=0; +} + +inline void db_RobImageHomographyJH_JR(double **JE_dx_ref,int j,double H[9]) +{ + /*Update of upper 2x2 is multiplication by + [s 0][ cos(theta) sin(theta)] + [0 s][-sin(theta) cos(theta)]*/ + JE_dx_ref[0][j]= H[3]; + JE_dx_ref[1][j]= H[4]; + JE_dx_ref[2][j]=0; + JE_dx_ref[3][j]= -H[0]; + JE_dx_ref[4][j]= -H[1]; + JE_dx_ref[5][j]=0; + JE_dx_ref[6][j]=0; + JE_dx_ref[7][j]=0; + JE_dx_ref[8][j]=0; +} + +inline void db_RobImageHomographyJH_Jt(double **JE_dx_ref,int j,int k,double H[9]) +{ + JE_dx_ref[0][j]=0; + JE_dx_ref[1][j]=0; + JE_dx_ref[2][j]=1.0; + JE_dx_ref[3][j]=0; + JE_dx_ref[4][j]=0; + JE_dx_ref[5][j]=0; + JE_dx_ref[6][j]=0; + JE_dx_ref[7][j]=0; + JE_dx_ref[8][j]=0; + + JE_dx_ref[0][k]=0; + JE_dx_ref[1][k]=0; + JE_dx_ref[2][k]=0; + JE_dx_ref[3][k]=0; + JE_dx_ref[4][k]=0; + JE_dx_ref[5][k]=1.0; + JE_dx_ref[6][k]=0; + JE_dx_ref[7][k]=0; + JE_dx_ref[8][k]=0; +} + +inline void db_RobImageHomographyJH_dRotFocal(double **JE_dx_ref,int j,int k,int l,int m,double H[9]) +{ + double f,fi,fi2; + double R[9],J[9]; + + /*Updated matrix is diag(f+df,f+df)*dR*R*diag(1/(f+df),1/(f+df),1)*/ + f=db_FocalAndRotFromCamRotFocalHomography(R,H); + fi=db_SafeReciprocal(f); + fi2=db_sqr(fi); + db_JacobianOfRotatedPointStride(J,R,3); + JE_dx_ref[0][j]= J[0]; + JE_dx_ref[1][j]= J[1]; + JE_dx_ref[2][j]=f* J[2]; + JE_dx_ref[3][j]= J[3]; + JE_dx_ref[4][j]= J[4]; + JE_dx_ref[5][j]=f* J[5]; + JE_dx_ref[6][j]=fi*J[6]; + JE_dx_ref[7][j]=fi*J[7]; + JE_dx_ref[8][j]= J[8]; + db_JacobianOfRotatedPointStride(J,R+1,3); + JE_dx_ref[0][k]= J[0]; + JE_dx_ref[1][k]= J[1]; + JE_dx_ref[2][k]=f* J[2]; + JE_dx_ref[3][k]= J[3]; + JE_dx_ref[4][k]= J[4]; + JE_dx_ref[5][k]=f* J[5]; + JE_dx_ref[6][k]=fi*J[6]; + JE_dx_ref[7][k]=fi*J[7]; + JE_dx_ref[8][k]= J[8]; + db_JacobianOfRotatedPointStride(J,R+2,3); + JE_dx_ref[0][l]= J[0]; + JE_dx_ref[1][l]= J[1]; + JE_dx_ref[2][l]=f* J[2]; + JE_dx_ref[3][l]= J[3]; + JE_dx_ref[4][l]= J[4]; + JE_dx_ref[5][l]=f* J[5]; + JE_dx_ref[6][l]=fi*J[6]; + JE_dx_ref[7][l]=fi*J[7]; + JE_dx_ref[8][l]= J[8]; + + JE_dx_ref[0][m]=0; + JE_dx_ref[1][m]=0; + JE_dx_ref[2][m]=H[2]; + JE_dx_ref[3][m]=0; + JE_dx_ref[4][m]=0; + JE_dx_ref[5][m]=H[5]; + JE_dx_ref[6][m]= -fi2*H[6]; + JE_dx_ref[7][m]= -fi2*H[7]; + JE_dx_ref[8][m]=0; +} + +inline double db_RobImageHomography_Jacobians_Generic(double *JtJ_ref[8],double min_Jtf[8],int *num_param,int *frozen_coord,double H[9],int point_count,double *x_i,double *xp_i,int homography_type,double one_over_scale2) +{ + double back; + int i,j,fetch_vector[8],n; + double JtJ_temp[81],min_Jtf_temp[9],JE_dx[72]; + double *JE_dx_ref[9],*JtJ_temp_ref[9]; + + /*Compute cost and JtJ,min_Jtf with respect to H*/ + back=db_RobImageHomography_Jacobians(JtJ_temp,min_Jtf_temp,H,point_count,x_i,xp_i,one_over_scale2); + + /*Compute JtJ,min_Jtf with respect to the right parameters + The formulas are + JtJ=transpose(JE_dx)*JtJ*JE_dx and + min_Jtf=transpose(JE_dx)*min_Jtf, + where the 9xN matrix JE_dx is the Jacobian of H with respect + to the update*/ + db_SetupMatrixRefs(JtJ_temp_ref,9,9,JtJ_temp); + db_SetupMatrixRefs(JE_dx_ref,9,8,JE_dx); + switch(homography_type) + { + case DB_HOMOGRAPHY_TYPE_SIMILARITY: + case DB_HOMOGRAPHY_TYPE_SIMILARITY_U: + n=4; + db_RobImageHomographyJH_Js(JE_dx_ref,0,H); + db_RobImageHomographyJH_JR(JE_dx_ref,1,H); + db_RobImageHomographyJH_Jt(JE_dx_ref,2,3,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + case DB_HOMOGRAPHY_TYPE_ROTATION: + case DB_HOMOGRAPHY_TYPE_ROTATION_U: + n=1; + db_RobImageHomographyJH_JR(JE_dx_ref,0,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + case DB_HOMOGRAPHY_TYPE_SCALING: + n=1; + db_RobImageHomographyJH_Js(JE_dx_ref,0,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + case DB_HOMOGRAPHY_TYPE_S_T: + n=3; + db_RobImageHomographyJH_Js(JE_dx_ref,0,H); + db_RobImageHomographyJH_Jt(JE_dx_ref,1,2,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + case DB_HOMOGRAPHY_TYPE_R_T: + n=3; + db_RobImageHomographyJH_JR(JE_dx_ref,0,H); + db_RobImageHomographyJH_Jt(JE_dx_ref,1,2,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + case DB_HOMOGRAPHY_TYPE_R_S: + n=2; + db_RobImageHomographyJH_Js(JE_dx_ref,0,H); + db_RobImageHomographyJH_JR(JE_dx_ref,1,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + + case DB_HOMOGRAPHY_TYPE_TRANSLATION: + n=2; + fetch_vector[0]=2; + fetch_vector[1]=5; + db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector); + break; + case DB_HOMOGRAPHY_TYPE_AFFINE: + n=6; + fetch_vector[0]=0; + fetch_vector[1]=1; + fetch_vector[2]=2; + fetch_vector[3]=3; + fetch_vector[4]=4; + fetch_vector[5]=5; + db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector); + break; + case DB_HOMOGRAPHY_TYPE_PROJECTIVE: + n=8; + *frozen_coord=db_MaxAbsIndex9(H); + for(j=0,i=0;i<9;i++) if(i!=(*frozen_coord)) + { + fetch_vector[j]=i; + j++; + } + db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector); + break; + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F: + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD: + n=4; + db_RobImageHomographyJH_dRotFocal(JE_dx_ref,0,1,2,3,H); + db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n); + break; + } + *num_param=n; + + return(back); +} + +inline void db_ImageHomographyUpdateGeneric(double H_p_dx[9],double H[9],double *dx,int homography_type,int frozen_coord) +{ + switch(homography_type) + { + case DB_HOMOGRAPHY_TYPE_SIMILARITY: + case DB_HOMOGRAPHY_TYPE_SIMILARITY_U: + db_Copy9(H_p_dx,H); + db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]); + db_MultiplyRotationOntoImageHomography(H,dx[1]); + H_p_dx[2]+=dx[2]; + H_p_dx[5]+=dx[3]; + break; + case DB_HOMOGRAPHY_TYPE_ROTATION: + case DB_HOMOGRAPHY_TYPE_ROTATION_U: + db_MultiplyRotationOntoImageHomography(H,dx[0]); + break; + case DB_HOMOGRAPHY_TYPE_SCALING: + db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]); + break; + case DB_HOMOGRAPHY_TYPE_S_T: + db_Copy9(H_p_dx,H); + db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]); + H_p_dx[2]+=dx[1]; + H_p_dx[5]+=dx[2]; + break; + case DB_HOMOGRAPHY_TYPE_R_T: + db_Copy9(H_p_dx,H); + db_MultiplyRotationOntoImageHomography(H,dx[0]); + H_p_dx[2]+=dx[1]; + H_p_dx[5]+=dx[2]; + break; + case DB_HOMOGRAPHY_TYPE_R_S: + db_Copy9(H_p_dx,H); + db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]); + db_MultiplyRotationOntoImageHomography(H,dx[1]); + break; + case DB_HOMOGRAPHY_TYPE_TRANSLATION: + db_Copy9(H_p_dx,H); + H_p_dx[2]+=dx[0]; + H_p_dx[5]+=dx[1]; + break; + case DB_HOMOGRAPHY_TYPE_AFFINE: + db_UpdateImageHomographyAffine(H_p_dx,H,dx); + break; + case DB_HOMOGRAPHY_TYPE_PROJECTIVE: + db_UpdateImageHomographyProjective(H_p_dx,H,dx,frozen_coord); + break; + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F: + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD: + db_UpdateRotFocalHomography(H_p_dx,H,dx); + break; + } +} + +void db_RobCamRotation_Polish_Generic(double H[9],int point_count,int homography_type,double *x_i,double *xp_i,double one_over_scale2, + int max_iterations,double improvement_requirement) +{ + int i,update,stop,n; + int frozen_coord = 0; + double lambda,cost,current_cost; + double JtJ[72],min_Jtf[9],dx[8],H_p_dx[9]; + double *JtJ_ref[9],d[8]; + + lambda=0.001; + for(update=1,stop=0,i=0;(stop<2) && (i<max_iterations);i++) + { + /*if first time since improvement, compute Jacobian and residual*/ + if(update) + { + db_SetupMatrixRefs(JtJ_ref,9,8,JtJ); + current_cost=db_RobImageHomography_Jacobians_Generic(JtJ_ref,min_Jtf,&n,&frozen_coord,H,point_count,x_i,xp_i,homography_type,one_over_scale2); + update=0; + } + +#ifdef _VERBOSE_ + /*std::cout << "Cost:" << current_cost << " ";*/ +#endif /*_VERBOSE_*/ + + /*Come up with a hypothesis dx + based on the current lambda*/ + db_Compute_dx(dx,JtJ_ref,min_Jtf,lambda,d,n); + + /*Compute Cost(x+dx)*/ + db_ImageHomographyUpdateGeneric(H_p_dx,H,dx,homography_type,frozen_coord); + cost=db_RobImageHomography_Cost(H_p_dx,point_count,x_i,xp_i,one_over_scale2); + + /*Is there an improvement?*/ + if(cost<current_cost) + { + /*improvement*/ + if(current_cost-cost<current_cost*improvement_requirement) stop++; + else stop=0; + lambda*=0.1; + /*Move to the hypothesised position x+dx*/ + current_cost=cost; + db_Copy9(H,H_p_dx); + update=1; + +#ifdef _VERBOSE_ + std::cout << "Step" << i << "Imp,Lambda=" << lambda << "Cost:" << current_cost << std::endl; +#endif /*_VERBOSE_*/ + } + else + { + /*no improvement*/ + lambda*=10.0; + stop=0; + } + } +} +void db_RobImageHomography( + /*Best homography*/ + double H[9], + /*2DPoint to 2DPoint constraints + Points are assumed to be given in + homogenous coordinates*/ + double *im, double *im_p, + /*Nr of points in total*/ + int nr_points, + /*Calibration matrices + used to normalize the points*/ + double K[9], + double Kp[9], + /*Pre-allocated space temp_d + should point to at least + 12*nr_samples+10*nr_points + allocated positions*/ + double *temp_d, + /*Pre-allocated space temp_i + should point to at least + max(nr_samples,nr_points) + allocated positions*/ + int *temp_i, + int homography_type, + db_Statistics *stat, + int max_iterations, + int max_points, + double scale, + int nr_samples, + int chunk_size, + ///////////////////////////////////////////// + // regular use: set outlierremoveflagE =0; + // flag for the outlier removal + int outlierremoveflagE, + // if flag is 1, then the following variables + // need the input + ////////////////////////////////////// + // 3D coordinates + double *wp, + // its corresponding stereo pair's points + double *im_r, + // raw image coordinates + double *im_raw, double *im_raw_p, + // final matches + int *finalNumE) +{ + /*Random seed*/ + int r_seed; + + int point_count_new; + /*Counters*/ + int i,j,c,point_count,hyp_count; + int last_hyp,new_last_hyp,last_corr; + int pos,point_pos,last_point; + /*Accumulator*/ + double acc; + /*Hypothesis pointer*/ + double *hyp_point; + /*Random sample*/ + int s[4]; + /*Pivot for hypothesis pruning*/ + double pivot; + /*Best hypothesis position*/ + int best_pos; + /*Best score*/ + double lowest_cost; + /*One over the squared scale of + Cauchy distribution*/ + double one_over_scale2; + /*temporary pointers*/ + double *x_i_temp,*xp_i_temp; + /*Temporary space for inverse calibration matrices*/ + double K_inv[9]; + double Kp_inv[9]; + /*Temporary space for homography*/ + double H_temp[9],H_temp2[9]; + /*Pointers to homogenous coordinates*/ + double *x_h_point,*xp_h_point; + /*Array of pointers to inhomogenous coordinates*/ + double *X[3],*Xp[3]; + /*Similarity parameters*/ + int orientation_preserving,allow_scaling,allow_rotation,allow_translation,sample_size; + + /*Homogenous coordinates of image points in first image*/ + double *x_h; + /*Homogenous coordinates of image points in second image*/ + double *xp_h; + /*Inhomogenous coordinates of image points in first image*/ + double *x_i; + /*Inhomogenous coordinates of image points in second image*/ + double *xp_i; + /*Homography hypotheses*/ + double *hyp_H_array; + /*Cost array*/ + double *hyp_cost_array; + /*Permutation of the hypotheses*/ + int *hyp_perm; + /*Sample of the points*/ + int *point_perm; + /*Temporary space for quick-select + 2*nr_samples*/ + double *temp_select; + + /*Get inverse calibration matrices*/ + db_InvertCalibrationMatrix(K_inv,K); + db_InvertCalibrationMatrix(Kp_inv,Kp); + /*Compute scale coefficient*/ + one_over_scale2=1.0/(scale*scale); + /*Initialize random seed*/ + r_seed=12345; + /*Set pointers to pre-allocated space*/ + hyp_cost_array=temp_d; + hyp_H_array=temp_d+nr_samples; + temp_select=temp_d+10*nr_samples; + x_h=temp_d+12*nr_samples; + xp_h=temp_d+12*nr_samples+3*nr_points; + x_i=temp_d+12*nr_samples+6*nr_points; + xp_i=temp_d+12*nr_samples+8*nr_points; + hyp_perm=temp_i; + point_perm=temp_i; + + /*Prepare a randomly permuted subset of size + point_count from the input points*/ + + point_count=db_mini(nr_points,(int)(chunk_size*log((double)nr_samples)/DB_LN2)); + + point_count_new = point_count; + + for(i=0;i<nr_points;i++) point_perm[i]=i; + + for(last_point=nr_points-1,i=0;i<point_count;i++,last_point--) + { + pos=db_RandomInt(r_seed,last_point); + point_pos=point_perm[pos]; + point_perm[pos]=point_perm[last_point]; + + /*Normalize image points with calibration + matrices and move them to x_h and xp_h*/ + c=3*point_pos; + j=3*i; + x_h_point=x_h+j; + xp_h_point=xp_h+j; + db_Multiply3x3_3x1(x_h_point,K_inv,im+c); + db_Multiply3x3_3x1(xp_h_point,Kp_inv,im_p+c); + + db_HomogenousNormalize3(x_h_point); + db_HomogenousNormalize3(xp_h_point); + + /*Dehomogenize image points and move them + to x_i and xp_i*/ + c=(i<<1); + db_DeHomogenizeImagePoint(x_i+c,x_h_point); // 2-dimension + db_DeHomogenizeImagePoint(xp_i+c,xp_h_point); //2-dimension + } + + + /*Generate Hypotheses*/ + hyp_count=0; + switch(homography_type) + { + case DB_HOMOGRAPHY_TYPE_SIMILARITY: + case DB_HOMOGRAPHY_TYPE_SIMILARITY_U: + case DB_HOMOGRAPHY_TYPE_TRANSLATION: + case DB_HOMOGRAPHY_TYPE_ROTATION: + case DB_HOMOGRAPHY_TYPE_ROTATION_U: + case DB_HOMOGRAPHY_TYPE_SCALING: + case DB_HOMOGRAPHY_TYPE_S_T: + case DB_HOMOGRAPHY_TYPE_R_T: + case DB_HOMOGRAPHY_TYPE_R_S: + + switch(homography_type) + { + case DB_HOMOGRAPHY_TYPE_SIMILARITY: + orientation_preserving=1; + allow_scaling=1; + allow_rotation=1; + allow_translation=1; + sample_size=2; + break; + case DB_HOMOGRAPHY_TYPE_SIMILARITY_U: + orientation_preserving=0; + allow_scaling=1; + allow_rotation=1; + allow_translation=1; + sample_size=3; + break; + case DB_HOMOGRAPHY_TYPE_TRANSLATION: + orientation_preserving=1; + allow_scaling=0; + allow_rotation=0; + allow_translation=1; + sample_size=1; + break; + case DB_HOMOGRAPHY_TYPE_ROTATION: + orientation_preserving=1; + allow_scaling=0; + allow_rotation=1; + allow_translation=0; + sample_size=1; + break; + case DB_HOMOGRAPHY_TYPE_ROTATION_U: + orientation_preserving=0; + allow_scaling=0; + allow_rotation=1; + allow_translation=0; + sample_size=2; + break; + case DB_HOMOGRAPHY_TYPE_SCALING: + orientation_preserving=1; + allow_scaling=1; + allow_rotation=0; + allow_translation=0; + sample_size=1; + break; + case DB_HOMOGRAPHY_TYPE_S_T: + orientation_preserving=1; + allow_scaling=1; + allow_rotation=0; + allow_translation=1; + sample_size=2; + break; + case DB_HOMOGRAPHY_TYPE_R_T: + orientation_preserving=1; + allow_scaling=0; + allow_rotation=1; + allow_translation=1; + sample_size=2; + break; + case DB_HOMOGRAPHY_TYPE_R_S: + orientation_preserving=1; + allow_scaling=1; + allow_rotation=0; + allow_translation=0; + sample_size=1; + break; + } + + if(point_count>=sample_size) for(i=0;i<nr_samples;i++) + { + db_RandomSample(s,3,point_count,r_seed); + X[0]= &x_i[s[0]<<1]; + X[1]= &x_i[s[1]<<1]; + X[2]= &x_i[s[2]<<1]; + Xp[0]= &xp_i[s[0]<<1]; + Xp[1]= &xp_i[s[1]<<1]; + Xp[2]= &xp_i[s[2]<<1]; + db_StitchSimilarity2D(&hyp_H_array[9*hyp_count],Xp,X,sample_size,orientation_preserving, + allow_scaling,allow_rotation,allow_translation); + hyp_count++; + } + break; + + case DB_HOMOGRAPHY_TYPE_CAMROTATION: + if(point_count>=2) for(i=0;i<nr_samples;i++) + { + db_RandomSample(s,2,point_count,r_seed); + db_StitchCameraRotation_2Points(&hyp_H_array[9*hyp_count], + &x_h[3*s[0]],&x_h[3*s[1]], + &xp_h[3*s[0]],&xp_h[3*s[1]]); + hyp_count++; + } + break; + + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F: + if(point_count>=3) for(i=0;i<nr_samples;i++) + { + db_RandomSample(s,3,point_count,r_seed); + hyp_count+=db_StitchRotationCommonFocalLength_3Points(&hyp_H_array[9*hyp_count], + &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]], + &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]]); + } + break; + + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD: + if(point_count>=3) for(i=0;i<nr_samples;i++) + { + db_RandomSample(s,3,point_count,r_seed); + hyp_count+=db_StitchRotationCommonFocalLength_3Points(&hyp_H_array[9*hyp_count], + &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]], + &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]],NULL,0); + } + break; + + case DB_HOMOGRAPHY_TYPE_AFFINE: + if(point_count>=3) for(i=0;i<nr_samples;i++) + { + db_RandomSample(s,3,point_count,r_seed); + db_StitchAffine2D_3Points(&hyp_H_array[9*hyp_count], + &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]], + &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]]); + hyp_count++; + } + break; + + case DB_HOMOGRAPHY_TYPE_PROJECTIVE: + default: + if(point_count>=4) for(i=0;i<nr_samples;i++) + { + db_RandomSample(s,4,point_count,r_seed); + db_StitchProjective2D_4Points(&hyp_H_array[9*hyp_count], + &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],&x_h[3*s[3]], + &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]],&xp_h[3*s[3]]); + hyp_count++; + } + } + + if(hyp_count) + { + /*Count cost in chunks and decimate hypotheses + until only one remains or the correspondences are + exhausted*/ + for(i=0;i<hyp_count;i++) + { + hyp_perm[i]=i; + hyp_cost_array[i]=0.0; + } + for(i=0,last_hyp=hyp_count-1;(last_hyp>0) && (i<point_count);i+=chunk_size) + { + /*Update cost with the next chunk*/ + last_corr=db_mini(i+chunk_size-1,point_count-1); + for(j=0;j<=last_hyp;j++) + { + hyp_point=hyp_H_array+9*hyp_perm[j]; + for(c=i;c<=last_corr;) + { + /*Take log of product of ten reprojection + errors to reduce nr of expensive log operations*/ + if(c+9<=last_corr) + { + x_i_temp=x_i+(c<<1); + xp_i_temp=xp_i+(c<<1); + + acc=db_ExpCauchyInhomogenousHomographyError(xp_i_temp,hyp_point,x_i_temp,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+2,hyp_point,x_i_temp+2,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+4,hyp_point,x_i_temp+4,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+6,hyp_point,x_i_temp+6,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+8,hyp_point,x_i_temp+8,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+10,hyp_point,x_i_temp+10,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+12,hyp_point,x_i_temp+12,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+14,hyp_point,x_i_temp+14,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+16,hyp_point,x_i_temp+16,one_over_scale2); + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+18,hyp_point,x_i_temp+18,one_over_scale2); + c+=10; + } + else + { + for(acc=1.0;c<=last_corr;c++) + { + acc*=db_ExpCauchyInhomogenousHomographyError(xp_i+(c<<1),hyp_point,x_i+(c<<1),one_over_scale2); + } + } + hyp_cost_array[j]+=log(acc); + } + } + if (chunk_size<point_count){ + /*Prune out half of the hypotheses*/ + new_last_hyp=(last_hyp+1)/2-1; + pivot=db_LeanQuickSelect(hyp_cost_array,last_hyp+1,new_last_hyp,temp_select); + for(j=0,c=0;(j<=last_hyp) && (c<=new_last_hyp);j++) + { + if(hyp_cost_array[j]<=pivot) + { + hyp_cost_array[c]=hyp_cost_array[j]; + hyp_perm[c]=hyp_perm[j]; + c++; + } + } + last_hyp=new_last_hyp; + } + } + /*Find the best hypothesis*/ + lowest_cost=hyp_cost_array[0]; + best_pos=0; + for(j=1;j<=last_hyp;j++) + { + if(hyp_cost_array[j]<lowest_cost) + { + lowest_cost=hyp_cost_array[j]; + best_pos=j; + } + } + + /*Move the best hypothesis*/ + db_Copy9(H_temp,hyp_H_array+9*hyp_perm[best_pos]); + + // outlier removal + if (outlierremoveflagE) // no polishment needed + { + point_count_new = db_RemoveOutliers_Homography(H_temp,x_i,xp_i,wp,im,im_p,im_r,im_raw,im_raw_p,point_count,one_over_scale2); + } + else + { + /*Polish*/ + switch(homography_type) + { + case DB_HOMOGRAPHY_TYPE_SIMILARITY: + case DB_HOMOGRAPHY_TYPE_SIMILARITY_U: + case DB_HOMOGRAPHY_TYPE_TRANSLATION: + case DB_HOMOGRAPHY_TYPE_ROTATION: + case DB_HOMOGRAPHY_TYPE_ROTATION_U: + case DB_HOMOGRAPHY_TYPE_SCALING: + case DB_HOMOGRAPHY_TYPE_S_T: + case DB_HOMOGRAPHY_TYPE_R_T: + case DB_HOMOGRAPHY_TYPE_R_S: + case DB_HOMOGRAPHY_TYPE_AFFINE: + case DB_HOMOGRAPHY_TYPE_PROJECTIVE: + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F: + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD: + db_RobCamRotation_Polish_Generic(H_temp,db_mini(point_count,max_points),homography_type,x_i,xp_i,one_over_scale2,max_iterations); + break; + case DB_HOMOGRAPHY_TYPE_CAMROTATION: + db_RobCamRotation_Polish(H_temp,db_mini(point_count,max_points),x_i,xp_i,one_over_scale2,max_iterations); + break; + } + + } + + } + else db_Identity3x3(H_temp); + + switch(homography_type) + { + case DB_HOMOGRAPHY_TYPE_PROJECTIVE: + if(stat) stat->nr_parameters=8; + break; + case DB_HOMOGRAPHY_TYPE_AFFINE: + if(stat) stat->nr_parameters=6; + break; + case DB_HOMOGRAPHY_TYPE_SIMILARITY: + case DB_HOMOGRAPHY_TYPE_SIMILARITY_U: + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F: + case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD: + if(stat) stat->nr_parameters=4; + break; + case DB_HOMOGRAPHY_TYPE_CAMROTATION: + if(stat) stat->nr_parameters=3; + break; + case DB_HOMOGRAPHY_TYPE_TRANSLATION: + case DB_HOMOGRAPHY_TYPE_S_T: + case DB_HOMOGRAPHY_TYPE_R_T: + case DB_HOMOGRAPHY_TYPE_R_S: + if(stat) stat->nr_parameters=2; + break; + case DB_HOMOGRAPHY_TYPE_ROTATION: + case DB_HOMOGRAPHY_TYPE_ROTATION_U: + case DB_HOMOGRAPHY_TYPE_SCALING: + if(stat) stat->nr_parameters=1; + break; + } + + db_RobImageHomography_Statistics(H_temp,db_mini(point_count,max_points),x_i,xp_i,one_over_scale2,stat); + + /*Put on the calibration matrices*/ + db_Multiply3x3_3x3(H_temp2,H_temp,K_inv); + db_Multiply3x3_3x3(H,Kp,H_temp2); + + if (finalNumE) + *finalNumE = point_count_new; + +} diff --git a/jni/feature_stab/db_vlvm/db_rob_image_homography.h b/jni/feature_stab/db_vlvm/db_rob_image_homography.h new file mode 100644 index 000000000..59cde7daa --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_rob_image_homography.h @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_rob_image_homography.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_ROB_IMAGE_HOMOGRAPHY +#define DB_ROB_IMAGE_HOMOGRAPHY + +#include "db_utilities.h" +#include "db_robust.h" +#include "db_metrics.h" + +#include <stdlib.h> // for NULL + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMRobImageHomography (LM) Robust Image Homography + */ +/*\{*/ + +#define DB_HOMOGRAPHY_TYPE_DEFAULT 0 +#define DB_HOMOGRAPHY_TYPE_PROJECTIVE 0 +#define DB_HOMOGRAPHY_TYPE_AFFINE 1 +#define DB_HOMOGRAPHY_TYPE_SIMILARITY 2 +#define DB_HOMOGRAPHY_TYPE_SIMILARITY_U 3 +#define DB_HOMOGRAPHY_TYPE_TRANSLATION 4 +#define DB_HOMOGRAPHY_TYPE_ROTATION 5 +#define DB_HOMOGRAPHY_TYPE_ROTATION_U 6 +#define DB_HOMOGRAPHY_TYPE_SCALING 7 +#define DB_HOMOGRAPHY_TYPE_S_T 8 +#define DB_HOMOGRAPHY_TYPE_R_T 9 +#define DB_HOMOGRAPHY_TYPE_R_S 10 +#define DB_HOMOGRAPHY_TYPE_CAMROTATION 11 +#define DB_HOMOGRAPHY_TYPE_CAMROTATION_F 12 +#define DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD 13 + +/*! +Solve for homography H such that xp~Hx +\param H best homography + +2D point to 2D point constraints: + +\param im first image points +\param im_p second image points +\param nr_points number of points + +Calibration matrices: + +\param K first camera +\param Kp second camera + + Temporary space: + + \param temp_d pre-allocated space of size 12*nr_samples+10*nr_points doubles + \param temp_i pre-allocated space of size max(nr_samples,nr_points) ints + + Statistics for this estimation + + \param stat NULL - do not compute + + \param homography_type see DB_HOMOGRAPHY_TYPE_* definitions above + + Estimation parameters: + + \param max_iterations max number of polishing steps + \param max_points only use this many points + \param scale Cauchy scale coefficient (see db_ExpCauchyReprojectionError() ) + \param nr_samples number of times to compute a hypothesis + \param chunk_size size of cost chunks +*/ +DB_API void db_RobImageHomography( + /*Best homography*/ + double H[9], + /*2DPoint to 2DPoint constraints + Points are assumed to be given in + homogenous coordinates*/ + double *im,double *im_p, + /*Nr of points in total*/ + int nr_points, + /*Calibration matrices + used to normalize the points*/ + double K[9], + double Kp[9], + /*Pre-allocated space temp_d + should point to at least + 12*nr_samples+10*nr_points + allocated positions*/ + double *temp_d, + /*Pre-allocated space temp_i + should point to at least + max(nr_samples,nr_points) + allocated positions*/ + int *temp_i, + int homography_type=DB_HOMOGRAPHY_TYPE_DEFAULT, + db_Statistics *stat=NULL, + int max_iterations=DB_DEFAULT_MAX_ITERATIONS, + int max_points=DB_DEFAULT_MAX_POINTS, + double scale=DB_POINT_STANDARDDEV, + int nr_samples=DB_DEFAULT_NR_SAMPLES, + int chunk_size=DB_DEFAULT_CHUNK_SIZE, + /////////////////////////////////////////////////// + // flag for the outlier removal + int outlierremoveflagE = 0, + // if flag is 1, then the following variables + // need to input + /////////////////////////////////////////////////// + // 3D coordinates + double *wp=NULL, + // its corresponding stereo pair's points + double *im_r=NULL, + // raw image coordinates + double *im_raw=NULL, double *im_raw_p=NULL, + // final matches + int *final_NumE=0); + +DB_API double db_RobImageHomography_Cost(double H[9],int point_count,double *x_i, + double *xp_i,double one_over_scale2); + + +DB_API void db_RobCamRotation_Polish(double H[9],int point_count,double *x_i, + double *xp_i, double one_over_scale2, + int max_iterations=DB_DEFAULT_MAX_ITERATIONS, + double improvement_requirement=DB_DEFAULT_IMP_REQ); + + +DB_API void db_RobCamRotation_Polish_Generic(double H[9],int point_count,int homography_type, + double *x_i,double *xp_i,double one_over_scale2, + int max_iterations=DB_DEFAULT_MAX_ITERATIONS, + double improvement_requirement=DB_DEFAULT_IMP_REQ); + + +#endif /* DB_ROB_IMAGE_HOMOGRAPHY */ diff --git a/jni/feature_stab/db_vlvm/db_robust.h b/jni/feature_stab/db_vlvm/db_robust.h new file mode 100644 index 000000000..be0794c6e --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_robust.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_robust.h,v 1.4 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_ROBUST +#define DB_ROBUST + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMRobust (LM) Robust Estimation + */ + +/*! + \struct db_Statistics + \ingroup LMRobust + \brief (LnM) Sampling problem statistics + \date Mon Sep 10 10:28:08 EDT 2007 + \par Copyright: 2007 Sarnoff Corporation. All Rights Reserved + */ + struct db_stat_struct + { + int nr_points; + int nr_inliers; + double inlier_fraction; + double cost; + double one_over_scale2; + double lambda1; + double lambda2; + double lambda3; + int nr_parameters; + int model_dimension; + double gric; + double inlier_evidence; + double posestd[6]; + double rotationvecCov[9]; + double translationvecCov[9]; + int posecov_inliercount; + int posecovready; + double median_reprojection_error; + }; + typedef db_stat_struct db_Statistics; + +#endif /* DB_ROBUST */ diff --git a/jni/feature_stab/db_vlvm/db_utilities.cpp b/jni/feature_stab/db_vlvm/db_utilities.cpp new file mode 100644 index 000000000..ce2093b01 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities.cpp,v 1.4 2011/06/17 14:03:31 mbansal Exp $ */ + +#include "db_utilities.h" +#include <string.h> +#include <stdio.h> + +float** db_SetupImageReferences_f(float *im,int w,int h) +{ + int i; + float **img; + assert(im); + img=new float* [h]; + for(i=0;i<h;i++) + { + img[i]=im+w*i; + } + return(img); +} + +unsigned char** db_SetupImageReferences_u(unsigned char *im,int w,int h) +{ + int i; + unsigned char **img; + + assert(im); + + img=new unsigned char* [h]; + for(i=0;i<h;i++) + { + img[i]=im+w*i; + } + return(img); +} +float** db_AllocImage_f(int w,int h,int over_allocation) +{ + float **img,*im; + + im=new float [w*h+over_allocation]; + img=db_SetupImageReferences_f(im,w,h); + + return(img); +} + +unsigned char** db_AllocImage_u(int w,int h,int over_allocation) +{ + unsigned char **img,*im; + + im=new unsigned char [w*h+over_allocation]; + img=db_SetupImageReferences_u(im,w,h); + + return(img); +} + +void db_FreeImage_f(float **img,int h) +{ + delete [] (img[0]); + delete [] img; +} + +void db_FreeImage_u(unsigned char **img,int h) +{ + delete [] (img[0]); + delete [] img; +} + +// ----------------------------------------------------------------------------------------------------------- ; +// +// copy image (source to destination) +// ---> must be a 2D image array with the same image size +// ---> the size of the input and output images must be same +// +// ------------------------------------------------------------------------------------------------------------ ; +void db_CopyImage_u(unsigned char **d,const unsigned char * const *s, int w, int h, int over_allocation) +{ + int i; + + for (i=0;i<h;i++) + { + memcpy(d[i],s[i],w*sizeof(unsigned char)); + } + + memcpy(&d[h],&d[h],over_allocation); + +} + +inline void db_WarpImageLutFast_u(const unsigned char * const * src, unsigned char ** dst, int w, int h, + const float * const * lut_x, const float * const * lut_y) +{ + assert(src && dst); + int xd=0, yd=0; + + for ( int i = 0; i < w; ++i ) + for ( int j = 0; j < h; ++j ) + { + //xd = static_cast<unsigned int>(lut_x[j][i]); + //yd = static_cast<unsigned int>(lut_y[j][i]); + xd = (unsigned int)(lut_x[j][i]); + yd = (unsigned int)(lut_y[j][i]); + if ( xd >= w || yd >= h || + xd < 0 || yd < 0) + dst[j][i] = 0; + else + dst[j][i] = src[yd][xd]; + } +} + +inline void db_WarpImageLutBilinear_u(const unsigned char * const * src, unsigned char ** dst, int w, int h, + const float * const * lut_x,const float * const* lut_y) +{ + assert(src && dst); + double xd=0.0, yd=0.0; + + for ( int i = 0; i < w; ++i ) + for ( int j = 0; j < h; ++j ) + { + xd = static_cast<double>(lut_x[j][i]); + yd = static_cast<double>(lut_y[j][i]); + if ( xd > w || yd > h || + xd < 0.0 || yd < 0.0) + dst[j][i] = 0; + else + dst[j][i] = db_BilinearInterpolation(yd, xd, src); + } +} + + +void db_WarpImageLut_u(const unsigned char * const * src, unsigned char ** dst, int w, int h, + const float * const * lut_x,const float * const * lut_y, int type) +{ + switch (type) + { + case DB_WARP_FAST: + db_WarpImageLutFast_u(src,dst,w,h,lut_x,lut_y); + break; + case DB_WARP_BILINEAR: + db_WarpImageLutBilinear_u(src,dst,w,h,lut_x,lut_y); + break; + default: + break; + } +} + + +void db_PrintDoubleVector(double *a,long size) +{ + printf("[ "); + for(long i=0;i<size;i++) printf("%lf ",a[i]); + printf("]"); +} + +void db_PrintDoubleMatrix(double *a,long rows,long cols) +{ + printf("[\n"); + for(long i=0;i<rows;i++) + { + for(long j=0;j<cols;j++) printf("%lf ",a[i*cols+j]); + printf("\n"); + } + printf("]"); +} diff --git a/jni/feature_stab/db_vlvm/db_utilities.h b/jni/feature_stab/db_vlvm/db_utilities.h new file mode 100644 index 000000000..fa9c87745 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities.h @@ -0,0 +1,571 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_H +#define DB_UTILITIES_H + + +#ifdef _WIN32 +#pragma warning(disable: 4275) +#pragma warning(disable: 4251) +#pragma warning(disable: 4786) +#pragma warning(disable: 4800) +#pragma warning(disable: 4018) /* signed-unsigned mismatch */ +#endif /* _WIN32 */ + +#ifdef _WIN32 + #ifdef DBDYNAMIC_EXPORTS + #define DB_API __declspec(dllexport) + #else + #ifdef DBDYNAMIC_IMPORTS + #define DB_API __declspec(dllimport) + #else + #define DB_API + #endif + #endif +#else + #define DB_API +#endif /* _WIN32 */ + +#ifdef _VERBOSE_ +#include <iostream> +#endif + +#include <math.h> + +#include <assert.h> +#include "db_utilities_constants.h" +/*! + * \defgroup LMBasicUtilities (LM) Utility Functions (basic math, linear algebra and array manipulations) + */ +/*\{*/ + +/*! + * Round double into int using fld and fistp instructions. + */ +inline int db_roundi (double x) { +#ifdef WIN32_ASM + int n; + __asm + { + fld x; + fistp n; + } + return n; +#else + return static_cast<int>(floor(x+0.5)); +#endif +} + +/*! + * Square a double. + */ +inline double db_sqr(double a) +{ + return(a*a); +} + +/*! + * Square a long. + */ +inline long db_sqr(long a) +{ + return(a*a); +} + +/*! + * Square an int. + */ +inline long db_sqr(int a) +{ + return(a*a); +} + +/*! + * Maximum of two doubles. + */ +inline double db_maxd(double a,double b) +{ + if(b>a) return(b); + else return(a); +} +/*! + * Minumum of two doubles. + */ +inline double db_mind(double a,double b) +{ + if(b<a) return(b); + else return(a); +} + + +/*! + * Maximum of two ints. + */ +inline int db_maxi(int a,int b) +{ + if(b>a) return(b); + else return(a); +} + +/*! + * Minimum of two numbers. + */ +inline int db_mini(int a,int b) +{ + if(b<a) return(b); + else return(a); +} +/*! + * Maximum of two numbers. + */ +inline long db_maxl(long a,long b) +{ + if(b>a) return(b); + else return(a); +} + +/*! + * Minimum of two numbers. + */ +inline long db_minl(long a,long b) +{ + if(b<a) return(b); + else return(a); +} + +/*! + * Sign of a number. + * \return -1.0 if negative, 1.0 if positive. + */ +inline double db_sign(double x) +{ + if(x>=0.0) return(1.0); + else return(-1.0); +} +/*! + * Absolute value. + */ +inline int db_absi(int a) +{ + if(a<0) return(-a); + else return(a); +} +/*! + * Absolute value. + */ +inline float db_absf(float a) +{ + if(a<0) return(-a); + else return(a); +} + +/*! + * Absolute value. + */ +inline double db_absd(double a) +{ + if(a<0) return(-a); + else return(a); +} + +/*! + * Reciprocal (1/a). Prevents divide by 0. + * \return 1/a if a != 0. 1.0 otherwise. + */ +inline double db_SafeReciprocal(double a) +{ + return((a!=0.0)?(1.0/a):1.0); +} + +/*! + * Division. Prevents divide by 0. + * \return a/b if b!=0. a otherwise. + */ +inline double db_SafeDivision(double a,double b) +{ + return((b!=0.0)?(a/b):a); +} + +/*! + * Square root. Prevents imaginary output. + * \return sqrt(a) if a > 0.0. 0.0 otherewise. + */ +inline double db_SafeSqrt(double a) +{ + return((a>=0.0)?(sqrt(a)):0.0); +} + +/*! + * Square root of a reciprocal. Prevents divide by 0 and imaginary output. + * \return sqrt(1/a) if a > 0.0. 1.0 otherewise. + */ +inline double db_SafeSqrtReciprocal(double a) +{ + return((a>0.0)?(sqrt(1.0/a)):1.0); +} +/*! + * Cube root. + */ +inline double db_CubRoot(double x) +{ + if(x>=0.0) return(pow(x,1.0/3.0)); + else return(-pow(-x,1.0/3.0)); +} +/*! + * Sum of squares of elements of x. + */ +inline double db_SquareSum3(const double x[3]) +{ + return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])); +} +/*! + * Sum of squares of elements of x. + */ +inline double db_SquareSum7(double x[7]) +{ + return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])+ + db_sqr(x[3])+db_sqr(x[4])+db_sqr(x[5])+ + db_sqr(x[6])); +} +/*! + * Sum of squares of elements of x. + */ +inline double db_SquareSum9(double x[9]) +{ + return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])+ + db_sqr(x[3])+db_sqr(x[4])+db_sqr(x[5])+ + db_sqr(x[6])+db_sqr(x[7])+db_sqr(x[8])); +} +/*! + * Copy a vector. + * \param xd destination + * \param xs source + */ +void inline db_Copy3(double xd[3],const double xs[3]) +{ + xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2]; +} +/*! + * Copy a vector. + * \param xd destination + * \param xs source + */ +void inline db_Copy6(double xd[6],const double xs[6]) +{ + xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2]; + xd[3]=xs[3];xd[4]=xs[4];xd[5]=xs[5]; +} +/*! + * Copy a vector. + * \param xd destination + * \param xs source + */ +void inline db_Copy9(double xd[9],const double xs[9]) +{ + xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2]; + xd[3]=xs[3];xd[4]=xs[4];xd[5]=xs[5]; + xd[6]=xs[6];xd[7]=xs[7];xd[8]=xs[8]; +} + +/*! + * Scalar product: Transpose(A)*B. + */ +inline double db_ScalarProduct4(const double A[4],const double B[4]) +{ + return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+A[3]*B[3]); +} +/*! + * Scalar product: Transpose(A)*B. + */ +inline double db_ScalarProduct7(const double A[7],const double B[7]) +{ + return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+ + A[3]*B[3]+A[4]*B[4]+A[5]*B[5]+ + A[6]*B[6]); +} +/*! + * Scalar product: Transpose(A)*B. + */ +inline double db_ScalarProduct9(const double A[9],const double B[9]) +{ + return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+ + A[3]*B[3]+A[4]*B[4]+A[5]*B[5]+ + A[6]*B[6]+A[7]*B[7]+A[8]*B[8]); +} +/*! + * Vector addition: S=A+B. + */ +inline void db_AddVectors6(double S[6],const double A[6],const double B[6]) +{ + S[0]=A[0]+B[0]; S[1]=A[1]+B[1]; S[2]=A[2]+B[2]; S[3]=A[3]+B[3]; S[4]=A[4]+B[4]; + S[5]=A[5]+B[5]; +} +/*! + * Multiplication: C(3x1)=A(3x3)*B(3x1). + */ +inline void db_Multiply3x3_3x1(double y[3],const double A[9],const double x[3]) +{ + y[0]=A[0]*x[0]+A[1]*x[1]+A[2]*x[2]; + y[1]=A[3]*x[0]+A[4]*x[1]+A[5]*x[2]; + y[2]=A[6]*x[0]+A[7]*x[1]+A[8]*x[2]; +} +inline void db_Multiply3x3_3x3(double C[9], const double A[9],const double B[9]) +{ + C[0]=A[0]*B[0]+A[1]*B[3]+A[2]*B[6]; + C[1]=A[0]*B[1]+A[1]*B[4]+A[2]*B[7]; + C[2]=A[0]*B[2]+A[1]*B[5]+A[2]*B[8]; + + C[3]=A[3]*B[0]+A[4]*B[3]+A[5]*B[6]; + C[4]=A[3]*B[1]+A[4]*B[4]+A[5]*B[7]; + C[5]=A[3]*B[2]+A[4]*B[5]+A[5]*B[8]; + + C[6]=A[6]*B[0]+A[7]*B[3]+A[8]*B[6]; + C[7]=A[6]*B[1]+A[7]*B[4]+A[8]*B[7]; + C[8]=A[6]*B[2]+A[7]*B[5]+A[8]*B[8]; +} +/*! + * Multiplication: C(4x1)=A(4x4)*B(4x1). + */ +inline void db_Multiply4x4_4x1(double y[4],const double A[16],const double x[4]) +{ + y[0]=A[0]*x[0]+A[1]*x[1]+A[2]*x[2]+A[3]*x[3]; + y[1]=A[4]*x[0]+A[5]*x[1]+A[6]*x[2]+A[7]*x[3]; + y[2]=A[8]*x[0]+A[9]*x[1]+A[10]*x[2]+A[11]*x[3]; + y[3]=A[12]*x[0]+A[13]*x[1]+A[14]*x[2]+A[15]*x[3]; +} +/*! + * Scalar multiplication in place: A(3)=mult*A(3). + */ +inline void db_MultiplyScalar3(double *A,double mult) +{ + (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; +} + +/*! + * Scalar multiplication: A(3)=mult*B(3). + */ +inline void db_MultiplyScalarCopy3(double *A,const double *B,double mult) +{ + (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; +} + +/*! + * Scalar multiplication: A(4)=mult*B(4). + */ +inline void db_MultiplyScalarCopy4(double *A,const double *B,double mult) +{ + (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; +} +/*! + * Scalar multiplication: A(7)=mult*B(7). + */ +inline void db_MultiplyScalarCopy7(double *A,const double *B,double mult) +{ + (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; + (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; +} +/*! + * Scalar multiplication: A(9)=mult*B(9). + */ +inline void db_MultiplyScalarCopy9(double *A,const double *B,double mult) +{ + (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; + (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; +} + +/*! + * \defgroup LMImageBasicUtilities (LM) Basic Image Utility Functions + + Images in db are simply 2D arrays of unsigned char or float types. + Only the very basic operations are supported: allocation/deallocation, +copying, simple pyramid construction and LUT warping. These images are used +by db_CornerDetector_u and db_Matcher_u. The db_Image class is an attempt +to wrap these images. It has not been tested well. + + */ +/*\{*/ +/*! + * Given a float image array, allocates and returns the set of row poiners. + * \param im image pointer + * \param w image width + * \param h image height + */ +DB_API float** db_SetupImageReferences_f(float *im,int w,int h); +/*! + * Allocate a float image. + * Note: for feature detection images must be overallocated by 256 bytes. + * \param w width + * \param h height + * \param over_allocation allocate this many extra bytes at the end + * \return row array pointer + */ +DB_API float** db_AllocImage_f(int w,int h,int over_allocation=256); +/*! + * Free a float image + * \param img row array pointer + * \param h image height (number of rows) + */ +DB_API void db_FreeImage_f(float **img,int h); +/*! + * Given an unsigned char image array, allocates and returns the set of row poiners. + * \param im image pointer + * \param w image width + * \param h image height + */ +DB_API unsigned char** db_SetupImageReferences_u(unsigned char *im,int w,int h); +/*! + * Allocate an unsigned char image. + * Note: for feature detection images must be overallocated by 256 bytes. + * \param w width + * \param h height + * \param over_allocation allocate this many extra bytes at the end + * \return row array pointer + */ +DB_API unsigned char** db_AllocImage_u(int w,int h,int over_allocation=256); +/*! + * Free an unsigned char image + * \param img row array pointer + * \param h image height (number of rows) + */ +DB_API void db_FreeImage_u(unsigned char **img,int h); + +/*! + Copy an image from s to d. Both s and d must be pre-allocated at of the same size. + Copy is done row by row. + \param s source + \param d destination + \param w width + \param h height + \param over_allocation copy this many bytes after the end of the last line + */ +DB_API void db_CopyImage_u(unsigned char **d,const unsigned char * const *s,int w,int h,int over_allocation=0); + +DB_API inline unsigned char db_BilinearInterpolation(double y, double x, const unsigned char * const * v) +{ + int floor_x=(int) x; + int floor_y=(int) y; + + int ceil_x=floor_x+1; + int ceil_y=floor_y+1; + + unsigned char f00 = v[floor_y][floor_x]; + unsigned char f01 = v[floor_y][ceil_x]; + unsigned char f10 = v[ceil_y][floor_x]; + unsigned char f11 = v[ceil_y][ceil_x]; + + double xl = x-floor_x; + double yl = y-floor_y; + + return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl); +} +/*\}*/ +/*! + * \ingroup LMRotation + * Compute an incremental rotation matrix using the update dx=[sin(phi) sin(ohm) sin(kap)] + */ +inline void db_IncrementalRotationMatrix(double R[9],const double dx[3]) +{ + double sp,so,sk,om_sp2,om_so2,om_sk2,cp,co,ck,sp_so,cp_so; + + /*Store sines*/ + sp=dx[0]; so=dx[1]; sk=dx[2]; + om_sp2=1.0-sp*sp; + om_so2=1.0-so*so; + om_sk2=1.0-sk*sk; + /*Compute cosines*/ + cp=(om_sp2>=0.0)?sqrt(om_sp2):1.0; + co=(om_so2>=0.0)?sqrt(om_so2):1.0; + ck=(om_sk2>=0.0)?sqrt(om_sk2):1.0; + /*Compute matrix*/ + sp_so=sp*so; + cp_so=cp*so; + R[0]=sp_so*sk+cp*ck; R[1]=co*sk; R[2]=cp_so*sk-sp*ck; + R[3]=sp_so*ck-cp*sk; R[4]=co*ck; R[5]=cp_so*ck+sp*sk; + R[6]=sp*co; R[7]= -so; R[8]=cp*co; +} +/*! + * Zero out 2 vector in place. + */ +void inline db_Zero2(double x[2]) +{ + x[0]=x[1]=0; +} +/*! + * Zero out 3 vector in place. + */ +void inline db_Zero3(double x[3]) +{ + x[0]=x[1]=x[2]=0; +} +/*! + * Zero out 4 vector in place. + */ +void inline db_Zero4(double x[4]) +{ + x[0]=x[1]=x[2]=x[3]=0; +} +/*! + * Zero out 9 vector in place. + */ +void inline db_Zero9(double x[9]) +{ + x[0]=x[1]=x[2]=x[3]=x[4]=x[5]=x[6]=x[7]=x[8]=0; +} + +#define DB_WARP_FAST 0 +#define DB_WARP_BILINEAR 1 + +/*! + * Perform a look-up table warp. + * The LUTs must be float images of the same size as source image. + * The source value x_s is determined from destination (x_d,y_d) through lut_x + * and y_s is determined from lut_y: + \code + x_s = lut_x[y_d][x_d]; + y_s = lut_y[y_d][x_d]; + \endcode + + * \param src source image + * \param dst destination image + * \param w width + * \param h height + * \param lut_x LUT for x + * \param lut_y LUT for y + * \param type warp type (DB_WARP_FAST or DB_WARP_BILINEAR) + */ +DB_API void db_WarpImageLut_u(const unsigned char * const * src,unsigned char ** dst, int w, int h, + const float * const * lut_x, const float * const * lut_y, int type=DB_WARP_BILINEAR); + +DB_API void db_PrintDoubleVector(double *a,long size); +DB_API void db_PrintDoubleMatrix(double *a,long rows,long cols); + +#include "db_utilities_constants.h" +#include "db_utilities_algebra.h" +#include "db_utilities_indexing.h" +#include "db_utilities_linalg.h" +#include "db_utilities_poly.h" +#include "db_utilities_geometry.h" +#include "db_utilities_random.h" +#include "db_utilities_rotation.h" +#include "db_utilities_camera.h" + +#define DB_INVALID (-1) + + +#endif /* DB_UTILITIES_H */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_algebra.h b/jni/feature_stab/db_vlvm/db_utilities_algebra.h new file mode 100644 index 000000000..2aedd74d5 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_algebra.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_algebra.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_ALGEBRA +#define DB_UTILITIES_ALGEBRA + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMAlgebra (LM) Algebra utilities + */ +/*\{*/ + +inline void db_HomogenousNormalize3(double *x) +{ + db_MultiplyScalar3(x,db_SafeSqrtReciprocal(db_SquareSum3(x))); +} + +/*\}*/ + +#endif /* DB_UTILITIES_ALGEBRA */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_camera.cpp b/jni/feature_stab/db_vlvm/db_utilities_camera.cpp new file mode 100644 index 000000000..dceba9b62 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_camera.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_camera.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#include "db_utilities_camera.h" +#include "db_utilities.h" +#include <assert.h> + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +void db_Approx3DCalMat(double K[9],double Kinv[9],int im_width,int im_height,double f_correction,int field) +{ + double iw,ih,av_size,field_fact; + + if(field) field_fact=2.0; + else field_fact=1.0; + + iw=(double)im_width; + ih=(double)(im_height*field_fact); + av_size=(iw+ih)/2.0; + K[0]=f_correction*av_size; + K[1]=0; + K[2]=iw/2.0; + K[3]=0; + K[4]=f_correction*av_size/field_fact; + K[5]=ih/2.0/field_fact; + K[6]=0; + K[7]=0; + K[8]=1; + + db_InvertCalibrationMatrix(Kinv,K); +} diff --git a/jni/feature_stab/db_vlvm/db_utilities_camera.h b/jni/feature_stab/db_vlvm/db_utilities_camera.h new file mode 100644 index 000000000..26ba4420a --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_camera.h @@ -0,0 +1,332 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_camera.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_CAMERA +#define DB_UTILITIES_CAMERA + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMCamera (LM) Camera Utilities + */ +/*\{*/ + +#include "db_utilities.h" + +#define DB_RADDISTMODE_BOUGEUT 4 +#define DB_RADDISTMODE_2NDORDER 5 +#define DB_RADDISTMODE_IDENTITY 6 + +/*! +Give reasonable guess of the calibration matrix for normalization purposes. +Use real K matrix when doing real geometry. +focal length = (w+h)/2.0*f_correction. +\param K calibration matrix (out) +\param Kinv inverse of K (out) +\param im_width image width +\param im_height image height +\param f_correction focal length correction factor +\param field set to 1 if this is a field image (fy = fx/2) +\return K(3x3) intrinsic calibration matrix +*/ +DB_API void db_Approx3DCalMat(double K[9],double Kinv[9],int im_width,int im_height,double f_correction=1.0,int field=0); + +/*! + Make a 2x2 identity matrix + */ +void inline db_Identity2x2(double A[4]) +{ + A[0]=1;A[1]=0; + A[2]=0;A[3]=1; +} +/*! + Make a 3x3 identity matrix + */ +void inline db_Identity3x3(double A[9]) +{ + A[0]=1;A[1]=0;A[2]=0; + A[3]=0;A[4]=1;A[5]=0; + A[6]=0;A[7]=0;A[8]=1; +} +/*! + Invert intrinsic calibration matrix K(3x3) + If fx or fy is 0, I is returned. + */ +void inline db_InvertCalibrationMatrix(double Kinv[9],const double K[9]) +{ + double a,b,c,d,e,f,ainv,dinv,adinv; + + a=K[0];b=K[1];c=K[2];d=K[4];e=K[5];f=K[8]; + if((a==0.0)||(d==0.0)) db_Identity3x3(Kinv); + else + { + Kinv[3]=0.0; + Kinv[6]=0.0; + Kinv[7]=0.0; + Kinv[8]=1.0; + + ainv=1.0/a; + dinv=1.0/d; + adinv=ainv*dinv; + Kinv[0]=f*ainv; + Kinv[1]= -b*f*adinv; + Kinv[2]=(b*e-c*d)*adinv; + Kinv[4]=f*dinv; + Kinv[5]= -e*dinv; + } +} +/*! + De-homogenize image point: xd(1:2) = xs(1:2)/xs(3). + If xs(3) is 0, xd will become 0 + \param xd destination point + \param xs source point + */ +void inline db_DeHomogenizeImagePoint(double xd[2],const double xs[3]) +{ + double temp,div; + + temp=xs[2]; + if(temp!=0) + { + div=1.0/temp; + xd[0]=xs[0]*div;xd[1]=xs[1]*div; + } + else + { + xd[0]=0.0;xd[1]=0.0; + } +} + + +/*! + Orthonormalize 3D rotation R + */ +inline void db_OrthonormalizeRotation(double R[9]) +{ + double s,mult; + /*Normalize first vector*/ + s=db_sqr(R[0])+db_sqr(R[1])+db_sqr(R[2]); + mult=sqrt(1.0/(s?s:1)); + R[0]*=mult; R[1]*=mult; R[2]*=mult; + /*Subtract scalar product from second vector*/ + s=R[0]*R[3]+R[1]*R[4]+R[2]*R[5]; + R[3]-=s*R[0]; R[4]-=s*R[1]; R[5]-=s*R[2]; + /*Normalize second vector*/ + s=db_sqr(R[3])+db_sqr(R[4])+db_sqr(R[5]); + mult=sqrt(1.0/(s?s:1)); + R[3]*=mult; R[4]*=mult; R[5]*=mult; + /*Get third vector by vector product*/ + R[6]=R[1]*R[5]-R[4]*R[2]; + R[7]=R[2]*R[3]-R[5]*R[0]; + R[8]=R[0]*R[4]-R[3]*R[1]; +} +/*! +Update a rotation with the update dx=[sin(phi) sin(ohm) sin(kap)] +*/ +inline void db_UpdateRotation(double R_p_dx[9],double R[9],const double dx[3]) +{ + double R_temp[9]; + /*Update rotation*/ + db_IncrementalRotationMatrix(R_temp,dx); + db_Multiply3x3_3x3(R_p_dx,R_temp,R); +} +/*! + Compute xp = Hx for inhomogenous image points. + */ +inline void db_ImageHomographyInhomogenous(double xp[2],const double H[9],const double x[2]) +{ + double x3,m; + + x3=H[6]*x[0]+H[7]*x[1]+H[8]; + if(x3!=0.0) + { + m=1.0/x3; + xp[0]=m*(H[0]*x[0]+H[1]*x[1]+H[2]); + xp[1]=m*(H[3]*x[0]+H[4]*x[1]+H[5]); + } + else + { + xp[0]=xp[1]=0.0; + } +} +inline double db_FocalFromCamRotFocalHomography(const double H[9]) +{ + double k1,k2; + + k1=db_sqr(H[2])+db_sqr(H[5]); + k2=db_sqr(H[6])+db_sqr(H[7]); + if(k1>=k2) + { + return(db_SafeSqrt(db_SafeDivision(k1,1.0-db_sqr(H[8])))); + } + else + { + return(db_SafeSqrt(db_SafeDivision(1.0-db_sqr(H[8]),k2))); + } +} + +inline double db_FocalAndRotFromCamRotFocalHomography(double R[9],const double H[9]) +{ + double back,fi; + + back=db_FocalFromCamRotFocalHomography(H); + fi=db_SafeReciprocal(back); + R[0]=H[0]; R[1]=H[1]; R[2]=fi*H[2]; + R[3]=H[3]; R[4]=H[4]; R[5]=fi*H[5]; + R[6]=back*H[6]; R[7]=back*H[7]; R[8]=H[8]; + return(back); +} +/*! +Compute Jacobian at zero of three coordinates dR*x with +respect to the update dR([sin(phi) sin(ohm) sin(kap)]) given x. + +The Jacobian at zero of the homogenous coordinates with respect to + [sin(phi) sin(ohm) sin(kap)] is +\code + [-rx2 0 rx1 ] + [ 0 rx2 -rx0 ] + [ rx0 -rx1 0 ]. +\endcode + +*/ +inline void db_JacobianOfRotatedPointStride(double J[9],const double x[3],int stride) +{ + /*The Jacobian at zero of the homogenous coordinates with respect to + [sin(phi) sin(ohm) sin(kap)] is + [-rx2 0 rx1 ] + [ 0 rx2 -rx0 ] + [ rx0 -rx1 0 ]*/ + + J[0]= -x[stride<<1]; + J[1]=0; + J[2]= x[stride]; + J[3]=0; + J[4]= x[stride<<1]; + J[5]= -x[0]; + J[6]= x[0]; + J[7]= -x[stride]; + J[8]=0; +} +/*! + Invert an affine (if possible) + \param Hinv inverted matrix + \param H input matrix + \return true if success and false if matrix is ill-conditioned (det < 1e-7) + */ +inline bool db_InvertAffineTransform(double Hinv[9],const double H[9]) +{ + double det=H[0]*H[4]-H[3]*H[1]; + if (det<1e-7) + { + db_Copy9(Hinv,H); + return false; + } + else + { + Hinv[0]=H[4]/det; + Hinv[1]=-H[1]/det; + Hinv[3]=-H[3]/det; + Hinv[4]=H[0]/det; + Hinv[2]= -Hinv[0]*H[2]-Hinv[1]*H[5]; + Hinv[5]= -Hinv[3]*H[2]-Hinv[4]*H[5]; + } + return true; +} + +/*! +Update of upper 2x2 is multiplication by +\code +[s 0][ cos(theta) sin(theta)] +[0 s][-sin(theta) cos(theta)] +\endcode +*/ +inline void db_MultiplyScaleOntoImageHomography(double H[9],double s) +{ + + H[0]*=s; + H[1]*=s; + H[3]*=s; + H[4]*=s; +} +/*! +Update of upper 2x2 is multiplication by +\code +[s 0][ cos(theta) sin(theta)] +[0 s][-sin(theta) cos(theta)] +\endcode +*/ +inline void db_MultiplyRotationOntoImageHomography(double H[9],double theta) +{ + double c,s,H0,H1; + + + c=cos(theta); + s=db_SafeSqrt(1.0-db_sqr(c)); + H0= c*H[0]+s*H[3]; + H[3]= -s*H[0]+c*H[3]; + H[0]=H0; + H1=c*H[1]+s*H[4]; + H[4]= -s*H[1]+c*H[4]; + H[1]=H1; +} + +inline void db_UpdateImageHomographyAffine(double H_p_dx[9],const double H[9],const double dx[6]) +{ + db_AddVectors6(H_p_dx,H,dx); + db_Copy3(H_p_dx+6,H+6); +} + +inline void db_UpdateImageHomographyProjective(double H_p_dx[9],const double H[9],const double dx[8],int frozen_coord) +{ + int i,j; + + for(j=0,i=0;i<9;i++) + { + if(i!=frozen_coord) + { + H_p_dx[i]=H[i]+dx[j]; + j++; + } + else H_p_dx[i]=H[i]; + } +} + +inline void db_UpdateRotFocalHomography(double H_p_dx[9],const double H[9],const double dx[4]) +{ + double f,fp,fpi; + double R[9],dR[9]; + + /*Updated matrix is diag(f+df,f+df)*dR*R*diag(1/(f+df),1/(f+df),1)*/ + f=db_FocalAndRotFromCamRotFocalHomography(R,H); + db_IncrementalRotationMatrix(dR,dx); + db_Multiply3x3_3x3(H_p_dx,dR,R); + fp=f+dx[3]; + fpi=db_SafeReciprocal(fp); + H_p_dx[2]*=fp; + H_p_dx[5]*=fp; + H_p_dx[6]*=fpi; + H_p_dx[7]*=fpi; +} + +/*\}*/ +#endif /* DB_UTILITIES_CAMERA */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_constants.h b/jni/feature_stab/db_vlvm/db_utilities_constants.h new file mode 100644 index 000000000..07565efd2 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_constants.h @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_constants.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_CONSTANTS +#define DB_UTILITIES_CONSTANTS + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +/****************Constants********************/ +#define DB_E 2.7182818284590452354 +#define DB_LOG2E 1.4426950408889634074 +#define DB_LOG10E 0.43429448190325182765 +#define DB_LN2 0.69314718055994530942 +#define DB_LN10 2.30258509299404568402 +#define DB_PI 3.1415926535897932384626433832795 +#define DB_PI_2 1.57079632679489661923 +#define DB_PI_4 0.78539816339744830962 +#define DB_1_PI 0.31830988618379067154 +#define DB_2_PI 0.63661977236758134308 +#define DB_SQRTPI 1.7724538509055160272981674833411 +#define DB_SQRT_2PI 2.506628274631000502415765284811 +#define DB_2_SQRTPI 1.12837916709551257390 +#define DB_SQRT2 1.41421356237309504880 +#define DB_SQRT3 1.7320508075688772935274463415059 +#define DB_SQRT1_2 0.70710678118654752440 +#define DB_EPS 2.220446049250313e-016 /* for 32 bit double */ + +/****************Default Parameters********************/ +/*Preemptive ransac parameters*/ +#define DB_DEFAULT_NR_SAMPLES 500 +#define DB_DEFAULT_CHUNK_SIZE 100 +#define DB_DEFAULT_GROUP_SIZE 10 + +/*Optimisation parameters*/ +#define DB_DEFAULT_MAX_POINTS 1000 +#define DB_DEFAULT_MAX_ITERATIONS 25 +#define DB_DEFAULT_IMP_REQ 0.001 + +/*Feature standard deviation parameters*/ +#define DB_POINT_STANDARDDEV (1.0/(826.0)) /*1 pixel for CIF (fraction of (image width+image height)/2)*/ +#define DB_OUTLIER_THRESHOLD 3.0 /*In number of DB_POINT_STANDARDDEV's*/ +#define DB_WORST_CASE 50.0 /*In number of DB_POINT_STANDARDDEV's*/ + +/*Front-end parameters*/ +#define DB_DEFAULT_TARGET_NR_CORNERS 5000 +#define DB_DEFAULT_NR_FEATURE_BLOCKS 10 +#define DB_DEFAULT_ABS_CORNER_THRESHOLD 50000000.0 +#define DB_DEFAULT_REL_CORNER_THRESHOLD 0.00005 +#define DB_DEFAULT_MAX_DISPARITY 0.1 +#define DB_DEFAULT_NO_DISPARITY -1.0 +#define DB_DEFAULT_MAX_TRACK_LENGTH 300 + +#define DB_DEFAULT_MAX_NR_CAMERAS 1000 + +#define DB_DEFAULT_TRIPLE_STEP 2 +#define DB_DEFAULT_DOUBLE_STEP 2 +#define DB_DEFAULT_SINGLE_STEP 1 +#define DB_DEFAULT_NR_SINGLES 10 +#define DB_DEFAULT_NR_DOUBLES 1 +#define DB_DEFAULT_NR_TRIPLES 1 + +#define DB_DEFAULT_TRIFOCAL_FOUR_STEPS 40 + +#define DB_DEFAULT_EPIPOLAR_ERROR 1 /*in pixels*/ + +////////////////////////// DOXYGEN ///////////////////// + +/*! + * \def DB_DEFAULT_GROUP_SIZE + * \ingroup LMRobust + * \brief Default group size for db_PreemptiveRansac class. + * Group size is the number of observation costs multiplied together + * before a log of the product is added to the total cost. +*/ + +/*! + * \def DB_DEFAULT_TARGET_NR_CORNERS + * \ingroup FeatureDetection + * \brief Default target number of corners +*/ +/*! + * \def DB_DEFAULT_NR_FEATURE_BLOCKS + * \ingroup FeatureDetection + * \brief Default number of regions (horizontal or vertical) that are considered separately + * for feature detection. The greater the number, the more uniform the distribution of + * detected features. +*/ +/*! + * \def DB_DEFAULT_ABS_CORNER_THRESHOLD + * \ingroup FeatureDetection + * \brief Absolute feature strength threshold. +*/ +/*! + * \def DB_DEFAULT_REL_CORNER_THRESHOLD + * \ingroup FeatureDetection + * \brief Relative feature strength threshold. +*/ +/*! + * \def DB_DEFAULT_MAX_DISPARITY + * \ingroup FeatureMatching + * \brief Maximum disparity (as fraction of image size) allowed in feature matching +*/ + /*! + * \def DB_DEFAULT_NO_DISPARITY + * \ingroup FeatureMatching + * \brief Indicates that vertical disparity is the same as horizontal disparity. +*/ +/////////////////////////////////////////////////////////////////////////////////// + /*! + * \def DB_E + * \ingroup LMBasicUtilities + * \brief e +*/ + /*! + * \def DB_LOG2E + * \ingroup LMBasicUtilities + * \brief log2(e) +*/ + /*! + * \def DB_LOG10E + * \ingroup LMBasicUtilities + * \brief log10(e) +*/ + /*! + * \def DB_LOG10E + * \ingroup LMBasicUtilities + * \brief log10(e) +*/ +/*! + * \def DB_LN2 + * \ingroup LMBasicUtilities + * \brief ln(2) +*/ +/*! + * \def DB_LN10 + * \ingroup LMBasicUtilities + * \brief ln(10) +*/ +/*! + * \def DB_PI + * \ingroup LMBasicUtilities + * \brief Pi +*/ +/*! + * \def DB_PI_2 + * \ingroup LMBasicUtilities + * \brief Pi/2 +*/ +/*! + * \def DB_PI_4 + * \ingroup LMBasicUtilities + * \brief Pi/4 +*/ +/*! + * \def DB_1_PI + * \ingroup LMBasicUtilities + * \brief 1/Pi +*/ +/*! + * \def DB_2_PI + * \ingroup LMBasicUtilities + * \brief 2/Pi +*/ +/*! + * \def DB_SQRTPI + * \ingroup LMBasicUtilities + * \brief sqrt(Pi) +*/ +/*! + * \def DB_SQRT_2PI + * \ingroup LMBasicUtilities + * \brief sqrt(2*Pi) +*/ +/*! + * \def DB_SQRT2 + * \ingroup LMBasicUtilities + * \brief sqrt(2) +*/ +/*! + * \def DB_SQRT3 + * \ingroup LMBasicUtilities + * \brief sqrt(3) +*/ +/*! + * \def DB_SQRT1_2 + * \ingroup LMBasicUtilities + * \brief sqrt(1/2) +*/ +#endif /* DB_UTILITIES_CONSTANTS */ + + diff --git a/jni/feature_stab/db_vlvm/db_utilities_geometry.h b/jni/feature_stab/db_vlvm/db_utilities_geometry.h new file mode 100644 index 000000000..f21558467 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_geometry.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_geometry.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_GEOMETRY_H +#define DB_UTILITIES_GEOMETRY_H + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! Get the inhomogenous 2D-point centroid of nr_point inhomogenous +points in X*/ +inline void db_PointCentroid2D(double c[2],const double *X,int nr_points) +{ + int i; + double cx,cy,m; + + cx=0;cy=0; + for(i=0;i<nr_points;i++) + { + cx+= *X++; + cy+= *X++; + } + if(nr_points) + { + m=1.0/((double)nr_points); + c[0]=cx*m; + c[1]=cy*m; + } + else c[0]=c[1]=0; +} + +inline void db_PointCentroid2D(double c[2],const double * const *X,int nr_points) +{ + int i; + double cx,cy,m; + const double *temp; + + cx=0;cy=0; + for(i=0;i<nr_points;i++) + { + temp= *X++; + cx+=temp[0]; + cy+=temp[1]; + } + if(nr_points) + { + m=1.0/((double)nr_points); + c[0]=cx*m; + c[1]=cy*m; + } + else c[0]=c[1]=0; +} + +/*! Get the inhomogenous 3D-point centroid of nr_point inhomogenous +points in X*/ +inline void db_PointCentroid3D(double c[3],const double *X,int nr_points) +{ + int i; + double cx,cy,cz,m; + + cx=0;cy=0;cz=0; + for(i=0;i<nr_points;i++) + { + cx+= *X++; + cy+= *X++; + cz+= *X++; + } + if(nr_points) + { + m=1.0/((double)nr_points); + c[0]=cx*m; + c[1]=cy*m; + c[2]=cz*m; + } + else c[0]=c[1]=c[2]=0; +} + +inline void db_PointCentroid3D(double c[3],const double * const *X,int nr_points) +{ + int i; + double cx,cy,cz,m; + const double *temp; + + cx=0;cy=0;cz=0; + for(i=0;i<nr_points;i++) + { + temp= *X++; + cx+=temp[0]; + cy+=temp[1]; + cz+=temp[2]; + } + if(nr_points) + { + m=1.0/((double)nr_points); + c[0]=cx*m; + c[1]=cy*m; + c[2]=cz*m; + } + else c[0]=c[1]=c[2]=0; +} + +#endif /* DB_UTILITIES_GEOMETRY_H */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_indexing.cpp b/jni/feature_stab/db_vlvm/db_utilities_indexing.cpp new file mode 100644 index 000000000..30ce03aa6 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_indexing.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_indexing.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#include "db_utilities_indexing.h" +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +void db_Zero(double *d,long nr) +{ + long i; + for(i=0;i<nr;i++) d[i]=0.0; +} + +/*This routine breaks number in source into values smaller and larger than +a pivot element. Values equal to the pivot are ignored*/ +void db_LeanPartitionOnPivot(double pivot,double *dest,const double *source,long first,long last,long *first_equal,long *last_equal) +{ + double temp; + const double *s_point; + const double *s_top; + double *d_bottom; + double *d_top; + + s_point=source+first; + s_top=source+last; + d_bottom=dest+first; + d_top=dest+last; + + for(;s_point<=s_top;) + { + temp= *(s_point++); + if(temp<pivot) *(d_bottom++)=temp; + else if(temp>pivot) *(d_top--)=temp; + } + *first_equal=d_bottom-dest; + *last_equal=d_top-dest; +} + +double db_LeanQuickSelect(const double *s,long nr_elements,long pos,double *temp) +{ + long first=0; + long last=nr_elements-1; + double pivot; + long first_equal,last_equal; + double *tempA; + double *tempB; + double *tempC; + const double *source; + double *dest; + + tempA=temp; + tempB=temp+nr_elements; + source=s; + dest=tempA; + + for(;last-first>2;) + { + pivot=db_TripleMedian(source[first],source[last],source[(first+last)/2]); + db_LeanPartitionOnPivot(pivot,dest,source,first,last,&first_equal,&last_equal); + + if(first_equal>pos) last=first_equal-1; + else if(last_equal<pos) first=last_equal+1; + else + { + return(pivot); + } + + /*Swap pointers*/ + tempC=tempA; + tempA=tempB; + tempB=tempC; + source=tempB; + dest=tempA; + } + pivot=db_TripleMedian(source[first],source[last],source[(first+last)/2]); + + return(pivot); +} + +float* db_AlignPointer_f(float *p,unsigned long nr_bytes) +{ + float *ap; + unsigned long m; + + m=((unsigned long)p)%nr_bytes; + if(m) ap=(float*) (((unsigned long)p)-m+nr_bytes); + else ap=p; + return(ap); +} + +short* db_AlignPointer_s(short *p,unsigned long nr_bytes) +{ + short *ap; + unsigned long m; + + m=((unsigned long)p)%nr_bytes; + if(m) ap=(short*) (((unsigned long)p)-m+nr_bytes); + else ap=p; + return(ap); +} diff --git a/jni/feature_stab/db_vlvm/db_utilities_indexing.h b/jni/feature_stab/db_vlvm/db_utilities_indexing.h new file mode 100644 index 000000000..01eeb9ea2 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_indexing.h @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_indexing.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_INDEXING +#define DB_UTILITIES_INDEXING + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +#include "db_utilities.h" + +/*! + * \defgroup LMIndexing (LM) Indexing Utilities (Order Statistics, Matrix Operations) + */ +/*\{*/ + +inline void db_SetupMatrixRefs(double **ar,long rows,long cols,double *a) +{ + long i; + for(i=0;i<rows;i++) ar[i]=&a[i*cols]; +} + +inline void db_SymmetricExtendUpperToLower(double **A,int rows,int cols) +{ + int i,j; + for(i=1;i<rows;i++) for(j=0;j<i;j++) A[i][j]=A[j][i]; +} + +void inline db_MultiplyMatrixVectorAtb(double *c,const double * const *At,const double *b,int arows,int acols) +{ + int i,j; + double acc; + + for(i=0;i<arows;i++) + { + acc=0; + for(j=0;j<acols;j++) acc+=At[j][i]*b[j]; + c[i]=acc; + } +} + +inline void db_MultiplyMatricesAB(double **C,const double * const *A,const double * const *B,int arows,int acols,int bcols) +{ + int i,j,k; + double acc; + + for(i=0;i<arows;i++) for(j=0;j<bcols;j++) + { + acc=0; + for(k=0;k<acols;k++) acc+=A[i][k]*B[k][j]; + C[i][j]=acc; + } +} + +inline void db_UpperMultiplyMatricesAtB(double **Cu,const double * const *At,const double * const *B,int arows,int acols,int bcols) +{ + int i,j,k; + double acc; + + for(i=0;i<arows;i++) for(j=i;j<bcols;j++) + { + acc=0; + for(k=0;k<acols;k++) acc+=At[k][i]*B[k][j]; + Cu[i][j]=acc; + } +} + +DB_API void db_Zero(double *d,long nr); + +inline int db_MaxIndex2(double s[2]) +{ + if(s[0]>=s[1]) return(0); + return(1); +} + +inline int db_MaxIndex3(const double s[3]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]>best){best=s[1];pos=1;} + if(s[2]>best){best=s[2];pos=2;} + return(pos); +} + +inline int db_MaxIndex4(const double s[4]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]>best){best=s[1];pos=1;} + if(s[2]>best){best=s[2];pos=2;} + if(s[3]>best){best=s[3];pos=3;} + return(pos); +} + +inline int db_MaxIndex5(const double s[5]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]>best){best=s[1];pos=1;} + if(s[2]>best){best=s[2];pos=2;} + if(s[3]>best){best=s[3];pos=3;} + if(s[4]>best){best=s[4];pos=4;} + return(pos); +} + +inline int db_MaxIndex6(const double s[6]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]>best){best=s[1];pos=1;} + if(s[2]>best){best=s[2];pos=2;} + if(s[3]>best){best=s[3];pos=3;} + if(s[4]>best){best=s[4];pos=4;} + if(s[5]>best){best=s[5];pos=5;} + return(pos); +} + +inline int db_MaxIndex7(const double s[7]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]>best){best=s[1];pos=1;} + if(s[2]>best){best=s[2];pos=2;} + if(s[3]>best){best=s[3];pos=3;} + if(s[4]>best){best=s[4];pos=4;} + if(s[5]>best){best=s[5];pos=5;} + if(s[6]>best){best=s[6];pos=6;} + return(pos); +} + +inline int db_MinIndex7(const double s[7]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]<best){best=s[1];pos=1;} + if(s[2]<best){best=s[2];pos=2;} + if(s[3]<best){best=s[3];pos=3;} + if(s[4]<best){best=s[4];pos=4;} + if(s[5]<best){best=s[5];pos=5;} + if(s[6]<best){best=s[6];pos=6;} + return(pos); +} + +inline int db_MinIndex9(const double s[9]) +{ + double best; + int pos; + + best=s[0];pos=0; + if(s[1]<best){best=s[1];pos=1;} + if(s[2]<best){best=s[2];pos=2;} + if(s[3]<best){best=s[3];pos=3;} + if(s[4]<best){best=s[4];pos=4;} + if(s[5]<best){best=s[5];pos=5;} + if(s[6]<best){best=s[6];pos=6;} + if(s[7]<best){best=s[7];pos=7;} + if(s[8]<best){best=s[8];pos=8;} + return(pos); +} + +inline int db_MaxAbsIndex3(const double *s) +{ + double t,best; + int pos; + + best=fabs(s[0]);pos=0; + t=fabs(s[1]);if(t>best){best=t;pos=1;} + t=fabs(s[2]);if(t>best){pos=2;} + return(pos); +} + +inline int db_MaxAbsIndex9(const double *s) +{ + double t,best; + int pos; + + best=fabs(s[0]);pos=0; + t=fabs(s[1]);if(t>best){best=t;pos=1;} + t=fabs(s[2]);if(t>best){best=t;pos=2;} + t=fabs(s[3]);if(t>best){best=t;pos=3;} + t=fabs(s[4]);if(t>best){best=t;pos=4;} + t=fabs(s[5]);if(t>best){best=t;pos=5;} + t=fabs(s[6]);if(t>best){best=t;pos=6;} + t=fabs(s[7]);if(t>best){best=t;pos=7;} + t=fabs(s[8]);if(t>best){best=t;pos=8;} + return(pos); +} + + +/*! +Select ordinal pos (zero based) out of nr_elements in s. +temp should point to alloced memory of at least nr_elements*2 +Optimized runtimes on 450MHz: +\code + 30 with 3 microsecs + 100 with 11 microsecs + 300 with 30 microsecs + 500 with 40 microsecs +1000 with 100 microsecs +5000 with 540 microsecs +\endcode +so the expected runtime is around +(nr_elements/10) microseconds +The total quickselect cost of splitting 500 hypotheses recursively +is thus around 100 microseconds + +Does the same operation as std::nth_element(). +*/ +DB_API double db_LeanQuickSelect(const double *s,long nr_elements,long pos,double *temp); + +/*! + Median of 3 doubles + */ +inline double db_TripleMedian(double a,double b,double c) +{ + if(a>b) + { + if(c>a) return(a); + else if(c>b) return(c); + else return(b); + } + else + { + if(c>b) return(b); + else if(c>a) return(c); + else return(a); + } +} + +/*! +Align float pointer to nr_bytes by moving forward +*/ +DB_API float* db_AlignPointer_f(float *p,unsigned long nr_bytes); + +/*! +Align short pointer to nr_bytes by moving forward +*/ +DB_API short* db_AlignPointer_s(short *p,unsigned long nr_bytes); + +#endif /* DB_UTILITIES_INDEXING */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_linalg.cpp b/jni/feature_stab/db_vlvm/db_utilities_linalg.cpp new file mode 100644 index 000000000..8f68b303a --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_linalg.cpp @@ -0,0 +1,376 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_linalg.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */ + +#include "db_utilities_linalg.h" +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +/*Cholesky-factorize symmetric positive definite 6 x 6 matrix A. Upper +part of A is used from the input. The Cholesky factor is output as +subdiagonal part in A and diagonal in d, which is 6-dimensional*/ +void db_CholeskyDecomp6x6(double A[36],double d[6]) +{ + double s,temp; + + /*[50 mult 35 add 6sqrt=85flops 6func]*/ + /*i=0*/ + s=A[0]; + d[0]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[0]); + A[6]=A[1]*temp; + A[12]=A[2]*temp; + A[18]=A[3]*temp; + A[24]=A[4]*temp; + A[30]=A[5]*temp; + /*i=1*/ + s=A[7]-A[6]*A[6]; + d[1]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[1]); + A[13]=(A[8]-A[6]*A[12])*temp; + A[19]=(A[9]-A[6]*A[18])*temp; + A[25]=(A[10]-A[6]*A[24])*temp; + A[31]=(A[11]-A[6]*A[30])*temp; + /*i=2*/ + s=A[14]-A[12]*A[12]-A[13]*A[13]; + d[2]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[2]); + A[20]=(A[15]-A[12]*A[18]-A[13]*A[19])*temp; + A[26]=(A[16]-A[12]*A[24]-A[13]*A[25])*temp; + A[32]=(A[17]-A[12]*A[30]-A[13]*A[31])*temp; + /*i=3*/ + s=A[21]-A[18]*A[18]-A[19]*A[19]-A[20]*A[20]; + d[3]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[3]); + A[27]=(A[22]-A[18]*A[24]-A[19]*A[25]-A[20]*A[26])*temp; + A[33]=(A[23]-A[18]*A[30]-A[19]*A[31]-A[20]*A[32])*temp; + /*i=4*/ + s=A[28]-A[24]*A[24]-A[25]*A[25]-A[26]*A[26]-A[27]*A[27]; + d[4]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[4]); + A[34]=(A[29]-A[24]*A[30]-A[25]*A[31]-A[26]*A[32]-A[27]*A[33])*temp; + /*i=5*/ + s=A[35]-A[30]*A[30]-A[31]*A[31]-A[32]*A[32]-A[33]*A[33]-A[34]*A[34]; + d[5]=((s>0.0)?sqrt(s):1.0); +} + +/*Cholesky-factorize symmetric positive definite n x n matrix A.Part +above diagonal of A is used from the input, diagonal of A is assumed to +be stored in d. The Cholesky factor is output as +subdiagonal part in A and diagonal in d, which is n-dimensional*/ +void db_CholeskyDecompSeparateDiagonal(double **A,double *d,int n) +{ + int i,j,k; + double s; + double temp = 0.0; + + for(i=0;i<n;i++) for(j=i;j<n;j++) + { + if(i==j) s=d[i]; + else s=A[i][j]; + for(k=i-1;k>=0;k--) s-=A[i][k]*A[j][k]; + if(i==j) + { + d[i]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[i]); + } + else A[j][i]=s*temp; + } +} + +/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition +of an n x n matrix and the right hand side b. The vector b is unchanged*/ +void db_CholeskyBacksub(double *x,const double * const *A,const double *d,int n,const double *b) +{ + int i,k; + double s; + + for(i=0;i<n;i++) + { + for(s=b[i],k=i-1;k>=0;k--) s-=A[i][k]*x[k]; + x[i]=db_SafeDivision(s,d[i]); + } + for(i=n-1;i>=0;i--) + { + for(s=x[i],k=i+1;k<n;k++) s-=A[k][i]*x[k]; + x[i]=db_SafeDivision(s,d[i]); + } +} + +/*Cholesky-factorize symmetric positive definite 3 x 3 matrix A. Part +above diagonal of A is used from the input, diagonal of A is assumed to +be stored in d. The Cholesky factor is output as subdiagonal part in A +and diagonal in d, which is 3-dimensional*/ +void db_CholeskyDecomp3x3SeparateDiagonal(double A[9],double d[3]) +{ + double s,temp; + + /*i=0*/ + s=d[0]; + d[0]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[0]); + A[3]=A[1]*temp; + A[6]=A[2]*temp; + /*i=1*/ + s=d[1]-A[3]*A[3]; + d[1]=((s>0.0)?sqrt(s):1.0); + temp=db_SafeReciprocal(d[1]); + A[7]=(A[5]-A[3]*A[6])*temp; + /*i=2*/ + s=d[2]-A[6]*A[6]-A[7]*A[7]; + d[2]=((s>0.0)?sqrt(s):1.0); +} + +/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition +of a 3 x 3 matrix and the right hand side b. The vector b is unchanged*/ +void db_CholeskyBacksub3x3(double x[3],const double A[9],const double d[3],const double b[3]) +{ + /*[42 mult 30 add=72flops]*/ + x[0]=db_SafeDivision(b[0],d[0]); + x[1]=db_SafeDivision((b[1]-A[3]*x[0]),d[1]); + x[2]=db_SafeDivision((b[2]-A[6]*x[0]-A[7]*x[1]),d[2]); + x[2]=db_SafeDivision(x[2],d[2]); + x[1]=db_SafeDivision((x[1]-A[7]*x[2]),d[1]); + x[0]=db_SafeDivision((x[0]-A[6]*x[2]-A[3]*x[1]),d[0]); +} + +/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition +of a 6 x 6 matrix and the right hand side b. The vector b is unchanged*/ +void db_CholeskyBacksub6x6(double x[6],const double A[36],const double d[6],const double b[6]) +{ + /*[42 mult 30 add=72flops]*/ + x[0]=db_SafeDivision(b[0],d[0]); + x[1]=db_SafeDivision((b[1]-A[6]*x[0]),d[1]); + x[2]=db_SafeDivision((b[2]-A[12]*x[0]-A[13]*x[1]),d[2]); + x[3]=db_SafeDivision((b[3]-A[18]*x[0]-A[19]*x[1]-A[20]*x[2]),d[3]); + x[4]=db_SafeDivision((b[4]-A[24]*x[0]-A[25]*x[1]-A[26]*x[2]-A[27]*x[3]),d[4]); + x[5]=db_SafeDivision((b[5]-A[30]*x[0]-A[31]*x[1]-A[32]*x[2]-A[33]*x[3]-A[34]*x[4]),d[5]); + x[5]=db_SafeDivision(x[5],d[5]); + x[4]=db_SafeDivision((x[4]-A[34]*x[5]),d[4]); + x[3]=db_SafeDivision((x[3]-A[33]*x[5]-A[27]*x[4]),d[3]); + x[2]=db_SafeDivision((x[2]-A[32]*x[5]-A[26]*x[4]-A[20]*x[3]),d[2]); + x[1]=db_SafeDivision((x[1]-A[31]*x[5]-A[25]*x[4]-A[19]*x[3]-A[13]*x[2]),d[1]); + x[0]=db_SafeDivision((x[0]-A[30]*x[5]-A[24]*x[4]-A[18]*x[3]-A[12]*x[2]-A[6]*x[1]),d[0]); +} + + +void db_Orthogonalize6x7(double A[42],int orthonormalize) +{ + int i; + double ss[6]; + + /*Compute square sums of rows*/ + ss[0]=db_SquareSum7(A); + ss[1]=db_SquareSum7(A+7); + ss[2]=db_SquareSum7(A+14); + ss[3]=db_SquareSum7(A+21); + ss[4]=db_SquareSum7(A+28); + ss[5]=db_SquareSum7(A+35); + + ss[1]-=db_OrthogonalizePair7(A+7 ,A,ss[0]); + ss[2]-=db_OrthogonalizePair7(A+14,A,ss[0]); + ss[3]-=db_OrthogonalizePair7(A+21,A,ss[0]); + ss[4]-=db_OrthogonalizePair7(A+28,A,ss[0]); + ss[5]-=db_OrthogonalizePair7(A+35,A,ss[0]); + + /*Pivot on largest ss (could also be done on ss/(original_ss))*/ + i=db_MaxIndex5(ss+1); + db_OrthogonalizationSwap7(A+7,i,ss+1); + + ss[2]-=db_OrthogonalizePair7(A+14,A+7,ss[1]); + ss[3]-=db_OrthogonalizePair7(A+21,A+7,ss[1]); + ss[4]-=db_OrthogonalizePair7(A+28,A+7,ss[1]); + ss[5]-=db_OrthogonalizePair7(A+35,A+7,ss[1]); + + i=db_MaxIndex4(ss+2); + db_OrthogonalizationSwap7(A+14,i,ss+2); + + ss[3]-=db_OrthogonalizePair7(A+21,A+14,ss[2]); + ss[4]-=db_OrthogonalizePair7(A+28,A+14,ss[2]); + ss[5]-=db_OrthogonalizePair7(A+35,A+14,ss[2]); + + i=db_MaxIndex3(ss+3); + db_OrthogonalizationSwap7(A+21,i,ss+3); + + ss[4]-=db_OrthogonalizePair7(A+28,A+21,ss[3]); + ss[5]-=db_OrthogonalizePair7(A+35,A+21,ss[3]); + + i=db_MaxIndex2(ss+4); + db_OrthogonalizationSwap7(A+28,i,ss+4); + + ss[5]-=db_OrthogonalizePair7(A+35,A+28,ss[4]); + + if(orthonormalize) + { + db_MultiplyScalar7(A ,db_SafeSqrtReciprocal(ss[0])); + db_MultiplyScalar7(A+7 ,db_SafeSqrtReciprocal(ss[1])); + db_MultiplyScalar7(A+14,db_SafeSqrtReciprocal(ss[2])); + db_MultiplyScalar7(A+21,db_SafeSqrtReciprocal(ss[3])); + db_MultiplyScalar7(A+28,db_SafeSqrtReciprocal(ss[4])); + db_MultiplyScalar7(A+35,db_SafeSqrtReciprocal(ss[5])); + } +} + +void db_Orthogonalize8x9(double A[72],int orthonormalize) +{ + int i; + double ss[8]; + + /*Compute square sums of rows*/ + ss[0]=db_SquareSum9(A); + ss[1]=db_SquareSum9(A+9); + ss[2]=db_SquareSum9(A+18); + ss[3]=db_SquareSum9(A+27); + ss[4]=db_SquareSum9(A+36); + ss[5]=db_SquareSum9(A+45); + ss[6]=db_SquareSum9(A+54); + ss[7]=db_SquareSum9(A+63); + + ss[1]-=db_OrthogonalizePair9(A+9 ,A,ss[0]); + ss[2]-=db_OrthogonalizePair9(A+18,A,ss[0]); + ss[3]-=db_OrthogonalizePair9(A+27,A,ss[0]); + ss[4]-=db_OrthogonalizePair9(A+36,A,ss[0]); + ss[5]-=db_OrthogonalizePair9(A+45,A,ss[0]); + ss[6]-=db_OrthogonalizePair9(A+54,A,ss[0]); + ss[7]-=db_OrthogonalizePair9(A+63,A,ss[0]); + + /*Pivot on largest ss (could also be done on ss/(original_ss))*/ + i=db_MaxIndex7(ss+1); + db_OrthogonalizationSwap9(A+9,i,ss+1); + + ss[2]-=db_OrthogonalizePair9(A+18,A+9,ss[1]); + ss[3]-=db_OrthogonalizePair9(A+27,A+9,ss[1]); + ss[4]-=db_OrthogonalizePair9(A+36,A+9,ss[1]); + ss[5]-=db_OrthogonalizePair9(A+45,A+9,ss[1]); + ss[6]-=db_OrthogonalizePair9(A+54,A+9,ss[1]); + ss[7]-=db_OrthogonalizePair9(A+63,A+9,ss[1]); + + i=db_MaxIndex6(ss+2); + db_OrthogonalizationSwap9(A+18,i,ss+2); + + ss[3]-=db_OrthogonalizePair9(A+27,A+18,ss[2]); + ss[4]-=db_OrthogonalizePair9(A+36,A+18,ss[2]); + ss[5]-=db_OrthogonalizePair9(A+45,A+18,ss[2]); + ss[6]-=db_OrthogonalizePair9(A+54,A+18,ss[2]); + ss[7]-=db_OrthogonalizePair9(A+63,A+18,ss[2]); + + i=db_MaxIndex5(ss+3); + db_OrthogonalizationSwap9(A+27,i,ss+3); + + ss[4]-=db_OrthogonalizePair9(A+36,A+27,ss[3]); + ss[5]-=db_OrthogonalizePair9(A+45,A+27,ss[3]); + ss[6]-=db_OrthogonalizePair9(A+54,A+27,ss[3]); + ss[7]-=db_OrthogonalizePair9(A+63,A+27,ss[3]); + + i=db_MaxIndex4(ss+4); + db_OrthogonalizationSwap9(A+36,i,ss+4); + + ss[5]-=db_OrthogonalizePair9(A+45,A+36,ss[4]); + ss[6]-=db_OrthogonalizePair9(A+54,A+36,ss[4]); + ss[7]-=db_OrthogonalizePair9(A+63,A+36,ss[4]); + + i=db_MaxIndex3(ss+5); + db_OrthogonalizationSwap9(A+45,i,ss+5); + + ss[6]-=db_OrthogonalizePair9(A+54,A+45,ss[5]); + ss[7]-=db_OrthogonalizePair9(A+63,A+45,ss[5]); + + i=db_MaxIndex2(ss+6); + db_OrthogonalizationSwap9(A+54,i,ss+6); + + ss[7]-=db_OrthogonalizePair9(A+63,A+54,ss[6]); + + if(orthonormalize) + { + db_MultiplyScalar9(A ,db_SafeSqrtReciprocal(ss[0])); + db_MultiplyScalar9(A+9 ,db_SafeSqrtReciprocal(ss[1])); + db_MultiplyScalar9(A+18,db_SafeSqrtReciprocal(ss[2])); + db_MultiplyScalar9(A+27,db_SafeSqrtReciprocal(ss[3])); + db_MultiplyScalar9(A+36,db_SafeSqrtReciprocal(ss[4])); + db_MultiplyScalar9(A+45,db_SafeSqrtReciprocal(ss[5])); + db_MultiplyScalar9(A+54,db_SafeSqrtReciprocal(ss[6])); + db_MultiplyScalar9(A+63,db_SafeSqrtReciprocal(ss[7])); + } +} + +void db_NullVectorOrthonormal6x7(double x[7],const double A[42]) +{ + int i; + double omss[7]; + const double *B; + + /*Pivot by choosing row of the identity matrix + (the one corresponding to column of A with smallest square sum)*/ + omss[0]=db_SquareSum6Stride7(A); + omss[1]=db_SquareSum6Stride7(A+1); + omss[2]=db_SquareSum6Stride7(A+2); + omss[3]=db_SquareSum6Stride7(A+3); + omss[4]=db_SquareSum6Stride7(A+4); + omss[5]=db_SquareSum6Stride7(A+5); + omss[6]=db_SquareSum6Stride7(A+6); + i=db_MinIndex7(omss); + /*orthogonalize that row against all previous rows + and normalize it*/ + B=A+i; + db_MultiplyScalarCopy7(x,A,-B[0]); + db_RowOperation7(x,A+7 ,B[7]); + db_RowOperation7(x,A+14,B[14]); + db_RowOperation7(x,A+21,B[21]); + db_RowOperation7(x,A+28,B[28]); + db_RowOperation7(x,A+35,B[35]); + x[i]+=1.0; + db_MultiplyScalar7(x,db_SafeSqrtReciprocal(1.0-omss[i])); +} + +void db_NullVectorOrthonormal8x9(double x[9],const double A[72]) +{ + int i; + double omss[9]; + const double *B; + + /*Pivot by choosing row of the identity matrix + (the one corresponding to column of A with smallest square sum)*/ + omss[0]=db_SquareSum8Stride9(A); + omss[1]=db_SquareSum8Stride9(A+1); + omss[2]=db_SquareSum8Stride9(A+2); + omss[3]=db_SquareSum8Stride9(A+3); + omss[4]=db_SquareSum8Stride9(A+4); + omss[5]=db_SquareSum8Stride9(A+5); + omss[6]=db_SquareSum8Stride9(A+6); + omss[7]=db_SquareSum8Stride9(A+7); + omss[8]=db_SquareSum8Stride9(A+8); + i=db_MinIndex9(omss); + /*orthogonalize that row against all previous rows + and normalize it*/ + B=A+i; + db_MultiplyScalarCopy9(x,A,-B[0]); + db_RowOperation9(x,A+9 ,B[9]); + db_RowOperation9(x,A+18,B[18]); + db_RowOperation9(x,A+27,B[27]); + db_RowOperation9(x,A+36,B[36]); + db_RowOperation9(x,A+45,B[45]); + db_RowOperation9(x,A+54,B[54]); + db_RowOperation9(x,A+63,B[63]); + x[i]+=1.0; + db_MultiplyScalar9(x,db_SafeSqrtReciprocal(1.0-omss[i])); +} + diff --git a/jni/feature_stab/db_vlvm/db_utilities_linalg.h b/jni/feature_stab/db_vlvm/db_utilities_linalg.h new file mode 100644 index 000000000..1f63d4e57 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_linalg.h @@ -0,0 +1,802 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_linalg.h,v 1.5 2011/06/17 14:03:31 mbansal Exp $ */ + +#ifndef DB_UTILITIES_LINALG +#define DB_UTILITIES_LINALG + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMLinAlg (LM) Linear Algebra Utilities (QR factorization, orthogonal basis, etc.) + */ + +/*! + \ingroup LMBasicUtilities + */ +inline void db_MultiplyScalar6(double A[6],double mult) +{ + (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; + (*A++) *= mult; +} +/*! + \ingroup LMBasicUtilities + */ +inline void db_MultiplyScalar7(double A[7],double mult) +{ + (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; + (*A++) *= mult; (*A++) *= mult; +} +/*! + \ingroup LMBasicUtilities + */ +inline void db_MultiplyScalar9(double A[9],double mult) +{ + (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; + (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; +} + +/*! + \ingroup LMBasicUtilities + */ +inline double db_SquareSum6Stride7(const double *x) +{ + return(db_sqr(x[0])+db_sqr(x[7])+db_sqr(x[14])+ + db_sqr(x[21])+db_sqr(x[28])+db_sqr(x[35])); +} + +/*! + \ingroup LMBasicUtilities + */ +inline double db_SquareSum8Stride9(const double *x) +{ + return(db_sqr(x[0])+db_sqr(x[9])+db_sqr(x[18])+ + db_sqr(x[27])+db_sqr(x[36])+db_sqr(x[45])+ + db_sqr(x[54])+db_sqr(x[63])); +} + +/*! + \ingroup LMLinAlg + Cholesky-factorize symmetric positive definite 6 x 6 matrix A. Upper +part of A is used from the input. The Cholesky factor is output as +subdiagonal part in A and diagonal in d, which is 6-dimensional +1.9 microseconds on 450MHz*/ +DB_API void db_CholeskyDecomp6x6(double A[36],double d[6]); + +/*! + \ingroup LMLinAlg + Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition +of a 6 x 6 matrix and the right hand side b. The vector b is unchanged +1.3 microseconds on 450MHz*/ +DB_API void db_CholeskyBacksub6x6(double x[6],const double A[36],const double d[6],const double b[6]); + +/*! + \ingroup LMLinAlg + Cholesky-factorize symmetric positive definite n x n matrix A.Part +above diagonal of A is used from the input, diagonal of A is assumed to +be stored in d. The Cholesky factor is output as +subdiagonal part in A and diagonal in d, which is n-dimensional*/ +DB_API void db_CholeskyDecompSeparateDiagonal(double **A,double *d,int n); + +/*! + \ingroup LMLinAlg + Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition +of an n x n matrix and the right hand side b. The vector b is unchanged*/ +DB_API void db_CholeskyBacksub(double *x,const double * const *A,const double *d,int n,const double *b); + +/*! + \ingroup LMLinAlg + Cholesky-factorize symmetric positive definite 3 x 3 matrix A. Part +above diagonal of A is used from the input, diagonal of A is assumed to +be stored in d. The Cholesky factor is output as subdiagonal part in A +and diagonal in d, which is 3-dimensional*/ +DB_API void db_CholeskyDecomp3x3SeparateDiagonal(double A[9],double d[3]); + +/*! + \ingroup LMLinAlg + Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition +of a 3 x 3 matrix and the right hand side b. The vector b is unchanged*/ +DB_API void db_CholeskyBacksub3x3(double x[3],const double A[9],const double d[3],const double b[3]); + +/*! + \ingroup LMLinAlg + perform A-=B*mult*/ +inline void db_RowOperation3(double A[3],const double B[3],double mult) +{ + *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); +} + +/*! + \ingroup LMLinAlg + */ +inline void db_RowOperation7(double A[7],const double B[7],double mult) +{ + *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); + *A++ -= mult*(*B++); *A++ -= mult*(*B++); +} + +/*! + \ingroup LMLinAlg + */ +inline void db_RowOperation9(double A[9],const double B[9],double mult) +{ + *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); + *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); +} + +/*! + \ingroup LMBasicUtilities + Swap values of A[7] and B[7] + */ +inline void db_Swap7(double A[7],double B[7]) +{ + double temp; + temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; + temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; + temp= *A; *A++ = *B; *B++ =temp; +} + +/*! + \ingroup LMBasicUtilities + Swap values of A[9] and B[9] + */ +inline void db_Swap9(double A[9],double B[9]) +{ + double temp; + temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; + temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; + temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; temp= *A; *A++ = *B; *B++ =temp; +} + + +/*! + \ingroup LMLinAlg + */ +DB_API void db_Orthogonalize6x7(double A[42],int orthonormalize=0); + +/*! + \ingroup LMLinAlg + */ +DB_API void db_Orthogonalize8x9(double A[72],int orthonormalize=0); + +/*! + \ingroup LMLinAlg + */ +inline double db_OrthogonalizePair7(double *x,const double *v,double ssv) +{ + double m,sp,sp_m; + + m=db_SafeReciprocal(ssv); + sp=db_ScalarProduct7(x,v); + sp_m=sp*m; + db_RowOperation7(x,v,sp_m); + return(sp*sp_m); +} + +/*! + \ingroup LMLinAlg + */ +inline double db_OrthogonalizePair9(double *x,const double *v,double ssv) +{ + double m,sp,sp_m; + + m=db_SafeReciprocal(ssv); + sp=db_ScalarProduct9(x,v); + sp_m=sp*m; + db_RowOperation9(x,v,sp_m); + return(sp*sp_m); +} + +/*! + \ingroup LMLinAlg + */ +inline void db_OrthogonalizationSwap7(double *A,int i,double *ss) +{ + double temp; + + db_Swap7(A,A+7*i); + temp=ss[0]; ss[0]=ss[i]; ss[i]=temp; +} +/*! + \ingroup LMLinAlg + */ +inline void db_OrthogonalizationSwap9(double *A,int i,double *ss) +{ + double temp; + + db_Swap9(A,A+9*i); + temp=ss[0]; ss[0]=ss[i]; ss[i]=temp; +} + +/*! + \ingroup LMLinAlg + */ +DB_API void db_NullVectorOrthonormal6x7(double x[7],const double A[42]); +/*! + \ingroup LMLinAlg + */ +DB_API void db_NullVectorOrthonormal8x9(double x[9],const double A[72]); + +/*! + \ingroup LMLinAlg + */ +inline void db_NullVector6x7Destructive(double x[7],double A[42]) +{ + db_Orthogonalize6x7(A,1); + db_NullVectorOrthonormal6x7(x,A); +} + +/*! + \ingroup LMLinAlg + */ +inline void db_NullVector8x9Destructive(double x[9],double A[72]) +{ + db_Orthogonalize8x9(A,1); + db_NullVectorOrthonormal8x9(x,A); +} + +inline int db_ScalarProduct512_s(const short *f,const short *g) +{ +#ifndef DB_USE_MMX + int back; + back=0; + for(int i=1; i<=512; i++) + back+=(*f++)*(*g++); + + return(back); +#endif +} + + +inline int db_ScalarProduct32_s(const short *f,const short *g) +{ +#ifndef DB_USE_MMX + int back; + back=0; + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + return(back); +#endif +} + +/*! + \ingroup LMLinAlg + Scalar product of 128-vectors (short) + Compile-time control: MMX, SSE2 or standard C + */ +inline int db_ScalarProduct128_s(const short *f,const short *g) +{ +#ifndef DB_USE_MMX + int back; + back=0; + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + return(back); +#else +#ifdef DB_USE_SSE2 + int back; + + _asm + { + mov eax,f + mov ecx,g + /*First iteration************************************/ + movdqa xmm0,[eax] + pxor xmm7,xmm7 /*Set xmm7 to zero*/ + pmaddwd xmm0,[ecx] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm2,[eax+16] + paddd xmm7,xmm0 + pmaddwd xmm2,[ecx+16] + /*Stall*/ + movdqa xmm1,[eax+32] + paddd xmm7,xmm2 + pmaddwd xmm1,[ecx+32] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm0,[eax+48] + paddd xmm7,xmm1 + pmaddwd xmm0,[ecx+48] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm2,[eax+64] + paddd xmm7,xmm0 + pmaddwd xmm2,[ecx+64] + /*Stall*/ + movdqa xmm1,[eax+80] + paddd xmm7,xmm2 + pmaddwd xmm1,[ecx+80] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm0,[eax+96] + paddd xmm7,xmm1 + pmaddwd xmm0,[ecx+96] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm2,[eax+112] + paddd xmm7,xmm0 + pmaddwd xmm2,[ecx+112] + /*Stall*/ + movdqa xmm1,[eax+128] + paddd xmm7,xmm2 + pmaddwd xmm1,[ecx+128] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm0,[eax+144] + paddd xmm7,xmm1 + pmaddwd xmm0,[ecx+144] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm2,[eax+160] + paddd xmm7,xmm0 + pmaddwd xmm2,[ecx+160] + /*Stall*/ + movdqa xmm1,[eax+176] + paddd xmm7,xmm2 + pmaddwd xmm1,[ecx+176] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm0,[eax+192] + paddd xmm7,xmm1 + pmaddwd xmm0,[ecx+192] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm2,[eax+208] + paddd xmm7,xmm0 + pmaddwd xmm2,[ecx+208] + /*Stall*/ + movdqa xmm1,[eax+224] + paddd xmm7,xmm2 + pmaddwd xmm1,[ecx+224] + /*Stall*/ + /*Standard iteration************************************/ + movdqa xmm0,[eax+240] + paddd xmm7,xmm1 + pmaddwd xmm0,[ecx+240] + /*Stall*/ + /*Rest iteration************************************/ + paddd xmm7,xmm0 + + /* add up the sum squares */ + movhlps xmm0,xmm7 /* high half to low half */ + paddd xmm7,xmm0 /* add high to low */ + pshuflw xmm0,xmm7, 0xE /* reshuffle */ + paddd xmm7,xmm0 /* add remaining */ + movd back,xmm7 + + emms + } + + return(back); +#else + int back; + + _asm + { + mov eax,f + mov ecx,g + /*First iteration************************************/ + movq mm0,[eax] + pxor mm7,mm7 /*Set mm7 to zero*/ + pmaddwd mm0,[ecx] + /*Stall*/ + movq mm1,[eax+8] + /*Stall*/ + pmaddwd mm1,[ecx+8] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+16] + paddd mm7,mm0 + pmaddwd mm2,[ecx+16] + /*Stall*/ + movq mm0,[eax+24] + paddd mm7,mm1 + pmaddwd mm0,[ecx+24] + /*Stall*/ + movq mm1,[eax+32] + paddd mm7,mm2 + pmaddwd mm1,[ecx+32] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+40] + paddd mm7,mm0 + pmaddwd mm2,[ecx+40] + /*Stall*/ + movq mm0,[eax+48] + paddd mm7,mm1 + pmaddwd mm0,[ecx+48] + /*Stall*/ + movq mm1,[eax+56] + paddd mm7,mm2 + pmaddwd mm1,[ecx+56] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+64] + paddd mm7,mm0 + pmaddwd mm2,[ecx+64] + /*Stall*/ + movq mm0,[eax+72] + paddd mm7,mm1 + pmaddwd mm0,[ecx+72] + /*Stall*/ + movq mm1,[eax+80] + paddd mm7,mm2 + pmaddwd mm1,[ecx+80] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+88] + paddd mm7,mm0 + pmaddwd mm2,[ecx+88] + /*Stall*/ + movq mm0,[eax+96] + paddd mm7,mm1 + pmaddwd mm0,[ecx+96] + /*Stall*/ + movq mm1,[eax+104] + paddd mm7,mm2 + pmaddwd mm1,[ecx+104] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+112] + paddd mm7,mm0 + pmaddwd mm2,[ecx+112] + /*Stall*/ + movq mm0,[eax+120] + paddd mm7,mm1 + pmaddwd mm0,[ecx+120] + /*Stall*/ + movq mm1,[eax+128] + paddd mm7,mm2 + pmaddwd mm1,[ecx+128] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+136] + paddd mm7,mm0 + pmaddwd mm2,[ecx+136] + /*Stall*/ + movq mm0,[eax+144] + paddd mm7,mm1 + pmaddwd mm0,[ecx+144] + /*Stall*/ + movq mm1,[eax+152] + paddd mm7,mm2 + pmaddwd mm1,[ecx+152] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+160] + paddd mm7,mm0 + pmaddwd mm2,[ecx+160] + /*Stall*/ + movq mm0,[eax+168] + paddd mm7,mm1 + pmaddwd mm0,[ecx+168] + /*Stall*/ + movq mm1,[eax+176] + paddd mm7,mm2 + pmaddwd mm1,[ecx+176] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+184] + paddd mm7,mm0 + pmaddwd mm2,[ecx+184] + /*Stall*/ + movq mm0,[eax+192] + paddd mm7,mm1 + pmaddwd mm0,[ecx+192] + /*Stall*/ + movq mm1,[eax+200] + paddd mm7,mm2 + pmaddwd mm1,[ecx+200] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+208] + paddd mm7,mm0 + pmaddwd mm2,[ecx+208] + /*Stall*/ + movq mm0,[eax+216] + paddd mm7,mm1 + pmaddwd mm0,[ecx+216] + /*Stall*/ + movq mm1,[eax+224] + paddd mm7,mm2 + pmaddwd mm1,[ecx+224] + /*Stall*/ + /*Standard iteration************************************/ + movq mm2,[eax+232] + paddd mm7,mm0 + pmaddwd mm2,[ecx+232] + /*Stall*/ + movq mm0,[eax+240] + paddd mm7,mm1 + pmaddwd mm0,[ecx+240] + /*Stall*/ + movq mm1,[eax+248] + paddd mm7,mm2 + pmaddwd mm1,[ecx+248] + /*Stall*/ + /*Rest iteration************************************/ + paddd mm7,mm0 + /*Stall*/ + /*Stall*/ + /*Stall*/ + paddd mm7,mm1 + /*Stall*/ + movq mm0,mm7 + psrlq mm7,32 + paddd mm0,mm7 + /*Stall*/ + /*Stall*/ + /*Stall*/ + movd back,mm0 + emms + } + + return(back); +#endif +#endif /*DB_USE_MMX*/ +} + +/*! + \ingroup LMLinAlg + Scalar product of 16 byte aligned 128-vectors (float). + Compile-time control: SIMD (SSE) or standard C. + */ +inline float db_ScalarProduct128Aligned16_f(const float *f,const float *g) +{ +#ifndef DB_USE_SIMD + float back; + back=0.0; + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); + + return(back); +#else + float back; + + _asm + { + mov eax,f + mov ecx,g + /*First iteration************************************/ + movaps xmm0,[eax] + xorps xmm7,xmm7 /*Set mm7 to zero*/ + mulps xmm0,[ecx] + /*Stall*/ + movaps xmm1,[eax+16] + /*Stall*/ + mulps xmm1,[ecx+16] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+32] + addps xmm7,xmm0 + mulps xmm2,[ecx+32] + /*Stall*/ + movaps xmm0,[eax+48] + addps xmm7,xmm1 + mulps xmm0,[ecx+48] + /*Stall*/ + movaps xmm1,[eax+64] + addps xmm7,xmm2 + mulps xmm1,[ecx+64] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+80] + addps xmm7,xmm0 + mulps xmm2,[ecx+80] + /*Stall*/ + movaps xmm0,[eax+96] + addps xmm7,xmm1 + mulps xmm0,[ecx+96] + /*Stall*/ + movaps xmm1,[eax+112] + addps xmm7,xmm2 + mulps xmm1,[ecx+112] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+128] + addps xmm7,xmm0 + mulps xmm2,[ecx+128] + /*Stall*/ + movaps xmm0,[eax+144] + addps xmm7,xmm1 + mulps xmm0,[ecx+144] + /*Stall*/ + movaps xmm1,[eax+160] + addps xmm7,xmm2 + mulps xmm1,[ecx+160] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+176] + addps xmm7,xmm0 + mulps xmm2,[ecx+176] + /*Stall*/ + movaps xmm0,[eax+192] + addps xmm7,xmm1 + mulps xmm0,[ecx+192] + /*Stall*/ + movaps xmm1,[eax+208] + addps xmm7,xmm2 + mulps xmm1,[ecx+208] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+224] + addps xmm7,xmm0 + mulps xmm2,[ecx+224] + /*Stall*/ + movaps xmm0,[eax+240] + addps xmm7,xmm1 + mulps xmm0,[ecx+240] + /*Stall*/ + movaps xmm1,[eax+256] + addps xmm7,xmm2 + mulps xmm1,[ecx+256] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+272] + addps xmm7,xmm0 + mulps xmm2,[ecx+272] + /*Stall*/ + movaps xmm0,[eax+288] + addps xmm7,xmm1 + mulps xmm0,[ecx+288] + /*Stall*/ + movaps xmm1,[eax+304] + addps xmm7,xmm2 + mulps xmm1,[ecx+304] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+320] + addps xmm7,xmm0 + mulps xmm2,[ecx+320] + /*Stall*/ + movaps xmm0,[eax+336] + addps xmm7,xmm1 + mulps xmm0,[ecx+336] + /*Stall*/ + movaps xmm1,[eax+352] + addps xmm7,xmm2 + mulps xmm1,[ecx+352] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+368] + addps xmm7,xmm0 + mulps xmm2,[ecx+368] + /*Stall*/ + movaps xmm0,[eax+384] + addps xmm7,xmm1 + mulps xmm0,[ecx+384] + /*Stall*/ + movaps xmm1,[eax+400] + addps xmm7,xmm2 + mulps xmm1,[ecx+400] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+416] + addps xmm7,xmm0 + mulps xmm2,[ecx+416] + /*Stall*/ + movaps xmm0,[eax+432] + addps xmm7,xmm1 + mulps xmm0,[ecx+432] + /*Stall*/ + movaps xmm1,[eax+448] + addps xmm7,xmm2 + mulps xmm1,[ecx+448] + /*Stall*/ + /*Standard iteration************************************/ + movaps xmm2,[eax+464] + addps xmm7,xmm0 + mulps xmm2,[ecx+464] + /*Stall*/ + movaps xmm0,[eax+480] + addps xmm7,xmm1 + mulps xmm0,[ecx+480] + /*Stall*/ + movaps xmm1,[eax+496] + addps xmm7,xmm2 + mulps xmm1,[ecx+496] + /*Stall*/ + /*Rest iteration************************************/ + addps xmm7,xmm0 + /*Stall*/ + addps xmm7,xmm1 + /*Stall*/ + movaps xmm6,xmm7 + /*Stall*/ + shufps xmm6,xmm6,4Eh + /*Stall*/ + addps xmm7,xmm6 + /*Stall*/ + movaps xmm6,xmm7 + /*Stall*/ + shufps xmm6,xmm6,11h + /*Stall*/ + addps xmm7,xmm6 + /*Stall*/ + movss back,xmm7 + } + + return(back); +#endif /*DB_USE_SIMD*/ +} + +#endif /* DB_UTILITIES_LINALG */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_poly.cpp b/jni/feature_stab/db_vlvm/db_utilities_poly.cpp new file mode 100644 index 000000000..013ac726e --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_poly.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_poly.cpp,v 1.2 2010/09/03 12:00:10 bsouthall Exp $ */ + +#include "db_utilities_poly.h" +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ + +void db_SolveCubic(double *roots,int *nr_roots,double a,double b,double c,double d) +{ + double bp,bp2,cp,dp,q,r,srq; + double r2_min_q3,theta,bp_through3,theta_through3; + double cos_theta_through3,sin_theta_through3,min2_cos_theta_plu,min2_cos_theta_min; + double si_r_srq,A; + + /*For nondegenerate cubics with three roots + [24 mult 9 add 2sqrt 1acos 1cos=33flops 4func] + For nondegenerate cubics with one root + [16 mult 6 add 1sqrt 1qbrt=24flops 3func]*/ + + if(a==0.0) db_SolveQuadratic(roots,nr_roots,b,c,d); + else + { + bp=b/a; + bp2=bp*bp; + cp=c/a; + dp=d/a; + + q=(bp2-3.0*cp)/9.0; + r=(2.0*bp2*bp-9.0*bp*cp+27.0*dp)/54.0; + r2_min_q3=r*r-q*q*q; + if(r2_min_q3<0.0) + { + *nr_roots=3; + /*q has to be > 0*/ + srq=sqrt(q); + theta=acos(db_maxd(-1.0,db_mind(1.0,r/(q*srq)))); + bp_through3=bp/3.0; + theta_through3=theta/3.0; + cos_theta_through3=cos(theta_through3); + sin_theta_through3=sqrt(db_maxd(0.0,1.0-cos_theta_through3*cos_theta_through3)); + + /*cos(theta_through3+2*pi/3)=cos_theta_through3*cos(2*pi/3)-sin_theta_through3*sin(2*pi/3) + = -0.5*cos_theta_through3-sqrt(3)/2.0*sin_theta_through3 + = -0.5*(cos_theta_through3+sqrt(3)*sin_theta_through3)*/ + min2_cos_theta_plu=cos_theta_through3+DB_SQRT3*sin_theta_through3; + min2_cos_theta_min=cos_theta_through3-DB_SQRT3*sin_theta_through3; + + roots[0]= -2.0*srq*cos_theta_through3-bp_through3; + roots[1]=srq*min2_cos_theta_plu-bp_through3; + roots[2]=srq*min2_cos_theta_min-bp_through3; + } + else if(r2_min_q3>0.0) + { + *nr_roots=1; + A= -db_sign(r)*db_CubRoot(db_absd(r)+sqrt(r2_min_q3)); + bp_through3=bp/3.0; + if(A!=0.0) roots[0]=A+q/A-bp_through3; + else roots[0]= -bp_through3; + } + else + { + *nr_roots=2; + bp_through3=bp/3.0; + /*q has to be >= 0*/ + si_r_srq=db_sign(r)*sqrt(q); + /*Single root*/ + roots[0]= -2.0*si_r_srq-bp_through3; + /*Double root*/ + roots[1]=si_r_srq-bp_through3; + } + } +} + +void db_SolveQuartic(double *roots,int *nr_roots,double a,double b,double c,double d,double e) +{ + /*Normalized coefficients*/ + double c0,c1,c2,c3; + /*Temporary coefficients*/ + double c3through2,c3through4,c3c3through4_min_c2,min4_c0; + double lz,ms,ns,mn,m,n,lz_through2; + /*Cubic polynomial roots, nr of roots and coefficients*/ + double c_roots[3]; + int nr_c_roots; + double k0,k1; + /*nr additional roots from second quadratic*/ + int addroots; + + /*For nondegenerate quartics + [16mult 11add 2sqrt 1cubic 2quadratic=74flops 8funcs]*/ + + if(a==0.0) db_SolveCubic(roots,nr_roots,b,c,d,e); + else if(e==0.0) + { + db_SolveCubic(roots,nr_roots,a,b,c,d); + roots[*nr_roots]=0.0; + *nr_roots+=1; + } + else + { + /*Compute normalized coefficients*/ + c3=b/a; + c2=c/a; + c1=d/a; + c0=e/a; + /*Compute temporary coefficients*/ + c3through2=c3/2.0; + c3through4=c3/4.0; + c3c3through4_min_c2=c3*c3through4-c2; + min4_c0= -4.0*c0; + /*Compute coefficients of cubic*/ + k0=min4_c0*c3c3through4_min_c2-c1*c1; + k1=c1*c3+min4_c0; + /*k2= -c2*/ + /*k3=1.0*/ + + /*Solve it for roots*/ + db_SolveCubic(c_roots,&nr_c_roots,1.0,-c2,k1,k0); + + if(nr_c_roots>0) + { + lz=c_roots[0]; + lz_through2=lz/2.0; + ms=lz+c3c3through4_min_c2; + ns=lz_through2*lz_through2-c0; + mn=lz*c3through4-c1/2.0; + + if((ms>=0.0)&&(ns>=0.0)) + { + m=sqrt(ms); + n=sqrt(ns)*db_sign(mn); + + db_SolveQuadratic(roots,nr_roots, + 1.0,c3through2+m,lz_through2+n); + + db_SolveQuadratic(&roots[*nr_roots],&addroots, + 1.0,c3through2-m,lz_through2-n); + + *nr_roots+=addroots; + } + else *nr_roots=0; + } + else *nr_roots=0; + } +} + +void db_SolveQuarticForced(double *roots,int *nr_roots,double a,double b,double c,double d,double e) +{ + /*Normalized coefficients*/ + double c0,c1,c2,c3; + /*Temporary coefficients*/ + double c3through2,c3through4,c3c3through4_min_c2,min4_c0; + double lz,ms,ns,mn,m,n,lz_through2; + /*Cubic polynomial roots, nr of roots and coefficients*/ + double c_roots[3]; + int nr_c_roots; + double k0,k1; + /*nr additional roots from second quadratic*/ + int addroots; + + /*For nondegenerate quartics + [16mult 11add 2sqrt 1cubic 2quadratic=74flops 8funcs]*/ + + if(a==0.0) db_SolveCubic(roots,nr_roots,b,c,d,e); + else if(e==0.0) + { + db_SolveCubic(roots,nr_roots,a,b,c,d); + roots[*nr_roots]=0.0; + *nr_roots+=1; + } + else + { + /*Compute normalized coefficients*/ + c3=b/a; + c2=c/a; + c1=d/a; + c0=e/a; + /*Compute temporary coefficients*/ + c3through2=c3/2.0; + c3through4=c3/4.0; + c3c3through4_min_c2=c3*c3through4-c2; + min4_c0= -4.0*c0; + /*Compute coefficients of cubic*/ + k0=min4_c0*c3c3through4_min_c2-c1*c1; + k1=c1*c3+min4_c0; + /*k2= -c2*/ + /*k3=1.0*/ + + /*Solve it for roots*/ + db_SolveCubic(c_roots,&nr_c_roots,1.0,-c2,k1,k0); + + if(nr_c_roots>0) + { + lz=c_roots[0]; + lz_through2=lz/2.0; + ms=lz+c3c3through4_min_c2; + ns=lz_through2*lz_through2-c0; + mn=lz*c3through4-c1/2.0; + + if(ms<0.0) ms=0.0; + if(ns<0.0) ns=0.0; + + m=sqrt(ms); + n=sqrt(ns)*db_sign(mn); + + db_SolveQuadratic(roots,nr_roots, + 1.0,c3through2+m,lz_through2+n); + + db_SolveQuadratic(&roots[*nr_roots],&addroots, + 1.0,c3through2-m,lz_through2-n); + + *nr_roots+=addroots; + } + else *nr_roots=0; + } +} diff --git a/jni/feature_stab/db_vlvm/db_utilities_poly.h b/jni/feature_stab/db_vlvm/db_utilities_poly.h new file mode 100644 index 000000000..1f8789077 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_poly.h @@ -0,0 +1,383 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_poly.h,v 1.2 2010/09/03 12:00:11 bsouthall Exp $ */ + +#ifndef DB_UTILITIES_POLY +#define DB_UTILITIES_POLY + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMPolynomial (LM) Polynomial utilities (solvers, arithmetic, evaluation, etc.) + */ +/*\{*/ + +/*! +In debug mode closed form quadratic solving takes on the order of 15 microseconds +while eig of the companion matrix takes about 1.1 milliseconds +Speed-optimized code in release mode solves a quadratic in 0.3 microseconds on 450MHz +*/ +inline void db_SolveQuadratic(double *roots,int *nr_roots,double a,double b,double c) +{ + double rs,srs,q; + + /*For non-degenerate quadratics + [5 mult 2 add 1 sqrt=7flops 1func]*/ + if(a==0.0) + { + if(b==0.0) *nr_roots=0; + else + { + roots[0]= -c/b; + *nr_roots=1; + } + } + else + { + rs=b*b-4.0*a*c; + if(rs>=0.0) + { + *nr_roots=2; + srs=sqrt(rs); + q= -0.5*(b+db_sign(b)*srs); + roots[0]=q/a; + /*If b is zero db_sign(b) returns 1, + so q is only zero when b=0 and c=0*/ + if(q==0.0) *nr_roots=1; + else roots[1]=c/q; + } + else *nr_roots=0; + } +} + +/*! +In debug mode closed form cubic solving takes on the order of 45 microseconds +while eig of the companion matrix takes about 1.3 milliseconds +Speed-optimized code in release mode solves a cubic in 1.5 microseconds on 450MHz +For a non-degenerate cubic with two roots, the first root is the single root and +the second root is the double root +*/ +DB_API void db_SolveCubic(double *roots,int *nr_roots,double a,double b,double c,double d); +/*! +In debug mode closed form quartic solving takes on the order of 0.1 milliseconds +while eig of the companion matrix takes about 1.5 milliseconds +Speed-optimized code in release mode solves a quartic in 2.6 microseconds on 450MHz*/ +DB_API void db_SolveQuartic(double *roots,int *nr_roots,double a,double b,double c,double d,double e); +/*! +Quartic solving where a solution is forced when splitting into quadratics, which +can be good if the quartic is sometimes in fact a quadratic, such as in absolute orientation +when the data is planar*/ +DB_API void db_SolveQuarticForced(double *roots,int *nr_roots,double a,double b,double c,double d,double e); + +inline double db_PolyEval1(const double p[2],double x) +{ + return(p[0]+x*p[1]); +} + +inline void db_MultiplyPoly1_1(double *d,const double *a,const double *b) +{ + double a0,a1; + double b0,b1; + a0=a[0];a1=a[1]; + b0=b[0];b1=b[1]; + + d[0]=a0*b0; + d[1]=a0*b1+a1*b0; + d[2]= a1*b1; +} + +inline void db_MultiplyPoly0_2(double *d,const double *a,const double *b) +{ + double a0; + double b0,b1,b2; + a0=a[0]; + b0=b[0];b1=b[1];b2=b[2]; + + d[0]=a0*b0; + d[1]=a0*b1; + d[2]=a0*b2; +} + +inline void db_MultiplyPoly1_2(double *d,const double *a,const double *b) +{ + double a0,a1; + double b0,b1,b2; + a0=a[0];a1=a[1]; + b0=b[0];b1=b[1];b2=b[2]; + + d[0]=a0*b0; + d[1]=a0*b1+a1*b0; + d[2]=a0*b2+a1*b1; + d[3]= a1*b2; +} + + +inline void db_MultiplyPoly1_3(double *d,const double *a,const double *b) +{ + double a0,a1; + double b0,b1,b2,b3; + a0=a[0];a1=a[1]; + b0=b[0];b1=b[1];b2=b[2];b3=b[3]; + + d[0]=a0*b0; + d[1]=a0*b1+a1*b0; + d[2]=a0*b2+a1*b1; + d[3]=a0*b3+a1*b2; + d[4]= a1*b3; +} +/*! +Multiply d=a*b where a is one degree and b is two degree*/ +inline void db_AddPolyProduct0_1(double *d,const double *a,const double *b) +{ + double a0; + double b0,b1; + a0=a[0]; + b0=b[0];b1=b[1]; + + d[0]+=a0*b0; + d[1]+=a0*b1; +} +inline void db_AddPolyProduct0_2(double *d,const double *a,const double *b) +{ + double a0; + double b0,b1,b2; + a0=a[0]; + b0=b[0];b1=b[1];b2=b[2]; + + d[0]+=a0*b0; + d[1]+=a0*b1; + d[2]+=a0*b2; +} +/*! +Multiply d=a*b where a is one degree and b is two degree*/ +inline void db_SubtractPolyProduct0_0(double *d,const double *a,const double *b) +{ + double a0; + double b0; + a0=a[0]; + b0=b[0]; + + d[0]-=a0*b0; +} + +inline void db_SubtractPolyProduct0_1(double *d,const double *a,const double *b) +{ + double a0; + double b0,b1; + a0=a[0]; + b0=b[0];b1=b[1]; + + d[0]-=a0*b0; + d[1]-=a0*b1; +} + +inline void db_SubtractPolyProduct0_2(double *d,const double *a,const double *b) +{ + double a0; + double b0,b1,b2; + a0=a[0]; + b0=b[0];b1=b[1];b2=b[2]; + + d[0]-=a0*b0; + d[1]-=a0*b1; + d[2]-=a0*b2; +} + +inline void db_SubtractPolyProduct1_3(double *d,const double *a,const double *b) +{ + double a0,a1; + double b0,b1,b2,b3; + a0=a[0];a1=a[1]; + b0=b[0];b1=b[1];b2=b[2];b3=b[3]; + + d[0]-=a0*b0; + d[1]-=a0*b1+a1*b0; + d[2]-=a0*b2+a1*b1; + d[3]-=a0*b3+a1*b2; + d[4]-= a1*b3; +} + +inline void db_CharacteristicPolynomial4x4(double p[5],const double A[16]) +{ + /*All two by two determinants of the first two rows*/ + double two01[3],two02[3],two03[3],two12[3],two13[3],two23[3]; + /*Polynomials representing third and fourth row of A*/ + double P0[2],P1[2],P2[2],P3[2]; + double P4[2],P5[2],P6[2],P7[2]; + /*All three by three determinants of the first three rows*/ + double neg_three0[4],neg_three1[4],three2[4],three3[4]; + + /*Compute 2x2 determinants*/ + two01[0]=A[0]*A[5]-A[1]*A[4]; + two01[1]= -(A[0]+A[5]); + two01[2]=1.0; + + two02[0]=A[0]*A[6]-A[2]*A[4]; + two02[1]= -A[6]; + + two03[0]=A[0]*A[7]-A[3]*A[4]; + two03[1]= -A[7]; + + two12[0]=A[1]*A[6]-A[2]*A[5]; + two12[1]=A[2]; + + two13[0]=A[1]*A[7]-A[3]*A[5]; + two13[1]=A[3]; + + two23[0]=A[2]*A[7]-A[3]*A[6]; + + P0[0]=A[8]; + P1[0]=A[9]; + P2[0]=A[10];P2[1]= -1.0; + P3[0]=A[11]; + + P4[0]=A[12]; + P5[0]=A[13]; + P6[0]=A[14]; + P7[0]=A[15];P7[1]= -1.0; + + /*Compute 3x3 determinants.Note that the highest + degree polynomial goes first and the smaller ones + are added or subtracted from it*/ + db_MultiplyPoly1_1( neg_three0,P2,two13); + db_SubtractPolyProduct0_0(neg_three0,P1,two23); + db_SubtractPolyProduct0_1(neg_three0,P3,two12); + + db_MultiplyPoly1_1( neg_three1,P2,two03); + db_SubtractPolyProduct0_1(neg_three1,P3,two02); + db_SubtractPolyProduct0_0(neg_three1,P0,two23); + + db_MultiplyPoly0_2( three2,P3,two01); + db_AddPolyProduct0_1( three2,P0,two13); + db_SubtractPolyProduct0_1(three2,P1,two03); + + db_MultiplyPoly1_2( three3,P2,two01); + db_AddPolyProduct0_1( three3,P0,two12); + db_SubtractPolyProduct0_1(three3,P1,two02); + + /*Compute 4x4 determinants*/ + db_MultiplyPoly1_3( p,P7,three3); + db_AddPolyProduct0_2( p,P4,neg_three0); + db_SubtractPolyProduct0_2(p,P5,neg_three1); + db_SubtractPolyProduct0_2(p,P6,three2); +} + +inline void db_RealEigenvalues4x4(double lambda[4],int *nr_roots,const double A[16],int forced=0) +{ + double p[5]; + + db_CharacteristicPolynomial4x4(p,A); + if(forced) db_SolveQuarticForced(lambda,nr_roots,p[4],p[3],p[2],p[1],p[0]); + else db_SolveQuartic(lambda,nr_roots,p[4],p[3],p[2],p[1],p[0]); +} + +/*! +Compute the unit norm eigenvector v of the matrix A corresponding +to the eigenvalue lambda +[96mult 60add 1sqrt=156flops 1sqrt]*/ +inline void db_EigenVector4x4(double v[4],double lambda,const double A[16]) +{ + double a0,a5,a10,a15; + double d01,d02,d03,d12,d13,d23; + double e01,e02,e03,e12,e13,e23; + double C[16],n0,n1,n2,n3,m; + + /*Compute diagonal + [4add=4flops]*/ + a0=A[0]-lambda; + a5=A[5]-lambda; + a10=A[10]-lambda; + a15=A[15]-lambda; + + /*Compute 2x2 determinants of rows 1,2 and 3,4 + [24mult 12add=36flops]*/ + d01=a0*a5 -A[1]*A[4]; + d02=a0*A[6] -A[2]*A[4]; + d03=a0*A[7] -A[3]*A[4]; + d12=A[1]*A[6]-A[2]*a5; + d13=A[1]*A[7]-A[3]*a5; + d23=A[2]*A[7]-A[3]*A[6]; + + e01=A[8]*A[13]-A[9] *A[12]; + e02=A[8]*A[14]-a10 *A[12]; + e03=A[8]*a15 -A[11]*A[12]; + e12=A[9]*A[14]-a10 *A[13]; + e13=A[9]*a15 -A[11]*A[13]; + e23=a10 *a15 -A[11]*A[14]; + + /*Compute matrix of cofactors + [48mult 32 add=80flops*/ + C[0]= (a5 *e23-A[6]*e13+A[7]*e12); + C[1]= -(A[4]*e23-A[6]*e03+A[7]*e02); + C[2]= (A[4]*e13-a5 *e03+A[7]*e01); + C[3]= -(A[4]*e12-a5 *e02+A[6]*e01); + + C[4]= -(A[1]*e23-A[2]*e13+A[3]*e12); + C[5]= (a0 *e23-A[2]*e03+A[3]*e02); + C[6]= -(a0 *e13-A[1]*e03+A[3]*e01); + C[7]= (a0 *e12-A[1]*e02+A[2]*e01); + + C[8]= (A[13]*d23-A[14]*d13+a15 *d12); + C[9]= -(A[12]*d23-A[14]*d03+a15 *d02); + C[10]= (A[12]*d13-A[13]*d03+a15 *d01); + C[11]= -(A[12]*d12-A[13]*d02+A[14]*d01); + + C[12]= -(A[9]*d23-a10 *d13+A[11]*d12); + C[13]= (A[8]*d23-a10 *d03+A[11]*d02); + C[14]= -(A[8]*d13-A[9]*d03+A[11]*d01); + C[15]= (A[8]*d12-A[9]*d02+a10 *d01); + + /*Compute square sums of rows + [16mult 12add=28flops*/ + n0=db_sqr(C[0]) +db_sqr(C[1]) +db_sqr(C[2]) +db_sqr(C[3]); + n1=db_sqr(C[4]) +db_sqr(C[5]) +db_sqr(C[6]) +db_sqr(C[7]); + n2=db_sqr(C[8]) +db_sqr(C[9]) +db_sqr(C[10])+db_sqr(C[11]); + n3=db_sqr(C[12])+db_sqr(C[13])+db_sqr(C[14])+db_sqr(C[15]); + + /*Take the largest norm row and normalize + [4mult 1 sqrt=4flops 1sqrt]*/ + if(n0>=n1 && n0>=n2 && n0>=n3) + { + m=db_SafeReciprocal(sqrt(n0)); + db_MultiplyScalarCopy4(v,C,m); + } + else if(n1>=n2 && n1>=n3) + { + m=db_SafeReciprocal(sqrt(n1)); + db_MultiplyScalarCopy4(v,&(C[4]),m); + } + else if(n2>=n3) + { + m=db_SafeReciprocal(sqrt(n2)); + db_MultiplyScalarCopy4(v,&(C[8]),m); + } + else + { + m=db_SafeReciprocal(sqrt(n3)); + db_MultiplyScalarCopy4(v,&(C[12]),m); + } +} + + + +/*\}*/ +#endif /* DB_UTILITIES_POLY */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_random.h b/jni/feature_stab/db_vlvm/db_utilities_random.h new file mode 100644 index 000000000..ef24039c1 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_random.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_random.h,v 1.1 2010/08/19 18:09:20 bsouthall Exp $ */ + +#ifndef DB_UTILITIES_RANDOM +#define DB_UTILITIES_RANDOM + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMRandom (LM) Random numbers, random sampling + */ +/*\{*/ +/*! + Random Number generator. Initialize with non-zero +integer value r. A double between zero and one is +returned. +\param r seed +\return random double +*/ +inline double db_QuickRandomDouble(int &r) +{ + int c; + c=r/127773; + r=16807*(r-c*127773)-2836*c; + if(r<0) r+=2147483647; + return((1.0/((double)2147483647))*r); + //return (((double)rand())/(double)RAND_MAX); +} + +/*! +Random Number generator. Initialize with non-zero +integer value r. An int between and including 0 and max + \param r seed + \param max upped limit + \return random int +*/ +inline int db_RandomInt(int &r,int max) +{ + double dtemp; + int itemp; + dtemp=db_QuickRandomDouble(r)*(max+1); + itemp=(int) dtemp; + if(itemp<=0) return(0); + if(itemp>=max) return(max); + return(itemp); +} + +/*! + Generate a random sample indexing into [0..pool_size-1]. + \param s sample (out) pre-allocated array of size sample_size + \param sample_size size of sample + \param pool_size upper limit on item index + \param r_seed random number generator seed + */ +inline void db_RandomSample(int *s,int sample_size,int pool_size,int &r_seed) +{ + int temp,temp2,i,j; + + for(i=0;i<sample_size;i++) + { + temp=db_RandomInt(r_seed,pool_size-1-i); + + for(j=0;j<i;j++) + { + if(s[j]<=temp) temp++; + else + { + /*swap*/ + temp2=temp; + temp=s[j]; + s[j]=temp2; + } + } + s[i]=temp; + } +} +/*\}*/ +#endif /* DB_UTILITIES_RANDOM */ diff --git a/jni/feature_stab/db_vlvm/db_utilities_rotation.h b/jni/feature_stab/db_vlvm/db_utilities_rotation.h new file mode 100644 index 000000000..7f5f937b4 --- /dev/null +++ b/jni/feature_stab/db_vlvm/db_utilities_rotation.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: db_utilities_rotation.h,v 1.2 2010/09/03 12:00:11 bsouthall Exp $ */ + +#ifndef DB_UTILITIES_ROTATION +#define DB_UTILITIES_ROTATION + +#include "db_utilities.h" + + + +/***************************************************************** +* Lean and mean begins here * +*****************************************************************/ +/*! + * \defgroup LMRotation (LM) Rotation Utilities (quaternions, orthonormal) + */ +/*\{*/ +/*! + Takes a unit quaternion and gives its corresponding rotation matrix. + \param R rotation matrix (out) + \param q quaternion + */ +inline void db_QuaternionToRotation(double R[9],const double q[4]) +{ + double q0q0,q0qx,q0qy,q0qz,qxqx,qxqy,qxqz,qyqy,qyqz,qzqz; + + q0q0=q[0]*q[0]; + q0qx=q[0]*q[1]; + q0qy=q[0]*q[2]; + q0qz=q[0]*q[3]; + qxqx=q[1]*q[1]; + qxqy=q[1]*q[2]; + qxqz=q[1]*q[3]; + qyqy=q[2]*q[2]; + qyqz=q[2]*q[3]; + qzqz=q[3]*q[3]; + + R[0]=q0q0+qxqx-qyqy-qzqz; R[1]=2.0*(qxqy-q0qz); R[2]=2.0*(qxqz+q0qy); + R[3]=2.0*(qxqy+q0qz); R[4]=q0q0-qxqx+qyqy-qzqz; R[5]=2.0*(qyqz-q0qx); + R[6]=2.0*(qxqz-q0qy); R[7]=2.0*(qyqz+q0qx); R[8]=q0q0-qxqx-qyqy+qzqz; +} + +/*\}*/ +#endif /* DB_UTILITIES_ROTATION */ |