KeyPointDetector.cpp

Go to the documentation of this file.
00001 /*
00002 * Copyright (C) 2007-2008 Anael Orlinski
00003 *
00004 * This file is part of Panomatic.
00005 *
00006 * Panomatic is free software; you can redistribute it and/or modify
00007 * it under the terms of the GNU General Public License as published by
00008 * the Free Software Foundation; either version 2 of the License, or
00009 * (at your option) any later version.
00010 *
00011 * Panomatic is distributed in the hope that it will be useful,
00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 * GNU General Public License for more details.
00015 *
00016 * You should have received a copy of the GNU General Public License
00017 * along with Panomatic; if not, write to the Free Software
00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019 */
00020 
00021 #include <iostream>
00022 
00023 #include "KeyPoint.h"
00024 #include "KeyPointDetector.h"
00025 #include "BoxFilter.h"
00026 #include "MathStuff.h"
00027 
00028 using namespace lfeat;
00029 
00030 const double lfeat::KeyPointDetector::kBaseSigma = 1.2;
00031 
00032 KeyPointDetector::KeyPointDetector()
00033 {
00034     // initialize default values
00035     _maxScales = 5;             // number of scales : 9x9, 15x15, 21x21, 27x27, ...
00036     _maxOctaves = 4;    // number of octaves
00037 
00038     _scoreThreshold = 1000;
00039 
00040     _initialBoxFilterSize = 3;
00041     _scaleOverlap = 3;
00042 
00043 }
00044 
00045 void KeyPointDetector::detectKeypoints(Image& iImage, KeyPointInsertor& iInsertor)
00046 {
00047     // allocate lots of memory for the scales
00048     double** * aSH = new double** [_maxScales];
00049     for (unsigned int s = 0; s < _maxScales; ++s)
00050     {
00051         aSH[s] = Image::AllocateImage(iImage.getWidth(), iImage.getHeight());
00052     }
00053 
00054     // init the border size
00055     unsigned int* aBorderSize = new unsigned int[_maxScales];
00056 
00057     unsigned int aMaxima = 0;
00058 
00059     // base size + 3 times first increment for step back
00060     // for the first octave 9x9, 15x15, 21x21, 27x27, 33x33
00061     // for the second 21x21, 33x33, 45x45 ...
00062 
00063     // go through all the octaves
00064     for (unsigned int o = 0; o < _maxOctaves; ++o)
00065     {
00066         // calculate the pixel step on the image, and the image size
00067         unsigned int aPixelStep = 1 << o;       // 2^aOctaveIt
00068         int aOctaveWidth = iImage.getWidth() / aPixelStep;      // integer division
00069         int aOctaveHeight = iImage.getHeight() / aPixelStep;    // integer division
00070 
00071         // fill each scale matrices
00072         for (unsigned int s = 0; s < _maxScales; ++s)
00073         {
00074             // create a box filter of the correct size.
00075             BoxFilter aBoxFilter(getFilterSize(o, s), iImage);
00076 
00077             // calculate the border for this scale
00078             aBorderSize[s] = getBorderSize(o, s);
00079 
00080             // fill the hessians
00081             int aEy = aOctaveHeight - aBorderSize[s];
00082             int aEx = aOctaveWidth - aBorderSize[s];
00083 
00084             int aYPS = aBorderSize[s] * aPixelStep;
00085             for (int y = aBorderSize[s]; y < aEy; ++y)
00086             {
00087                 aBoxFilter.setY(aYPS);
00088                 int aXPS = aBorderSize[s] * aPixelStep;
00089                 for (int x = aBorderSize[s]; x < aEx; ++x)
00090                 {
00091                     aSH[s][y][x] = aBoxFilter.getDetWithX(aXPS);
00092                     aXPS += aPixelStep;
00093                 }
00094                 aYPS += aPixelStep;
00095             }
00096         }
00097 
00098         // detect the feature points with a 3x3x3 neighborhood non-maxima suppression
00099         for (unsigned int aSIt = 1; aSIt < (_maxScales - 1); aSIt +=2)
00100         {
00101             const int aBS = aBorderSize[aSIt+1];
00102             for (int aYIt = aBS + 1; aYIt < aOctaveHeight - aBS - 1; aYIt+=2)
00103             {
00104                 for (int aXIt = aBS + 1; aXIt < aOctaveWidth - aBS - 1; aXIt+=2)
00105                 {
00106                     // find the maximum in the 2x2x2 cube
00107                     double aTab[8];
00108 
00109                     // get the values in a
00110                     aTab[0] = aSH[aSIt]  [aYIt]  [aXIt];
00111                     aTab[1] = aSH[aSIt]  [aYIt]  [aXIt+1];
00112                     aTab[2] = aSH[aSIt]  [aYIt+1][aXIt];
00113                     aTab[3] = aSH[aSIt]  [aYIt+1][aXIt+1];
00114                     aTab[4] = aSH[aSIt+1][aYIt]  [aXIt];
00115                     aTab[5] = aSH[aSIt+1][aYIt]  [aXIt+1];
00116                     aTab[6] = aSH[aSIt+1][aYIt+1][aXIt];
00117                     aTab[7] = aSH[aSIt+1][aYIt+1][aXIt+1];
00118 
00119                     // find the max index without using a loop.
00120                     unsigned int a04 = (aTab[0]>aTab[4]?0:4);
00121                     unsigned int a15 = (aTab[1]>aTab[5]?1:5);
00122                     unsigned int a26 = (aTab[2]>aTab[6]?2:6);
00123                     unsigned int a37 = (aTab[3]>aTab[7]?3:7);
00124                     unsigned int a0426 = (aTab[a04]>aTab[a26]?a04:a26);
00125                     unsigned int a1537 = (aTab[a15]>aTab[a37]?a15:a37);
00126                     unsigned int aMaxIdx = (aTab[a0426]>aTab[a1537]?a0426:a1537);
00127 
00128                     // calculate approximate threshold
00129                     double aApproxThres = _scoreThreshold * 0.8;
00130 
00131                     double aScore = aTab[aMaxIdx];
00132 
00133                     // check found point against threshold
00134                     if (aScore < aApproxThres)
00135                     {
00136                         continue;
00137                     }
00138 
00139                     // verify that other missing points in the 3x3x3 cube are also below treshold
00140                     int aXShift = 2*(aMaxIdx & 1)-1;
00141                     int aXAdj = aXIt + (aMaxIdx & 1);
00142                     aMaxIdx >>= 1;
00143 
00144                     int aYShift = 2*(aMaxIdx & 1)-1;
00145                     int aYAdj = aYIt + (aMaxIdx & 1);
00146                     aMaxIdx >>= 1;
00147 
00148                     int aSShift = 2*(aMaxIdx & 1)-1;
00149                     int aSAdj = aSIt + (aMaxIdx & 1);
00150 
00151                     // skip too high scale ajusting
00152                     if (aSAdj == (int)_maxScales - 1)
00153                     {
00154                         continue;
00155                     }
00156 
00157                     if (        (aSH[aSAdj + aSShift][aYAdj - aYShift][aXAdj - 1] > aScore) ||
00158                             (aSH[aSAdj + aSShift][aYAdj - aYShift][aXAdj    ] > aScore) ||
00159                             (aSH[aSAdj + aSShift][aYAdj - aYShift][aXAdj + 1] > aScore) ||
00160                             (aSH[aSAdj + aSShift][aYAdj                 ][aXAdj - 1] > aScore) ||
00161                             (aSH[aSAdj + aSShift][aYAdj                 ][aXAdj    ] > aScore) ||
00162                             (aSH[aSAdj + aSShift][aYAdj                 ][aXAdj + 1] > aScore) ||
00163                             (aSH[aSAdj + aSShift][aYAdj + aYShift][aXAdj - 1] > aScore) ||
00164                             (aSH[aSAdj + aSShift][aYAdj + aYShift][aXAdj    ] > aScore) ||
00165                             (aSH[aSAdj + aSShift][aYAdj + aYShift][aXAdj + 1] > aScore) ||
00166 
00167                             (aSH[aSAdj][                aYAdj + aYShift ][aXAdj - 1] > aScore) ||
00168                             (aSH[aSAdj][                        aYAdj + aYShift ][aXAdj] > aScore) ||
00169                             (aSH[aSAdj][                aYAdj + aYShift ][aXAdj + 1] > aScore) ||
00170                             (aSH[aSAdj][        aYAdj                   ][aXAdj + aXShift] > aScore) ||
00171                             (aSH[aSAdj][        aYAdj - aYShift ][aXAdj + aXShift] > aScore) ||
00172 
00173                             (aSH[aSAdj - aSShift][              aYAdj + aYShift ][aXAdj - 1] > aScore) ||
00174                             (aSH[aSAdj - aSShift][                      aYAdj + aYShift ][aXAdj] > aScore) ||
00175                             (aSH[aSAdj - aSShift][              aYAdj + aYShift ][aXAdj + 1] > aScore) ||
00176                             (aSH[aSAdj - aSShift][      aYAdj                   ][aXAdj + aXShift] > aScore) ||
00177                             (aSH[aSAdj - aSShift][      aYAdj - aYShift ][aXAdj + aXShift] > aScore)
00178                        )
00179                     {
00180                         continue;
00181                     }
00182 
00183                     // fine tune the location
00184                     double aX = aXAdj;
00185                     double aY = aYAdj;
00186                     double aS = aSAdj;
00187 
00188                     if(aBorderSize[aSAdj+1]>aBorderSize[aSAdj])
00189                     {
00190                         if(aX<aBorderSize[aSAdj+1] || aX>aOctaveWidth-aBorderSize[aSAdj+1]-1)
00191                         {
00192                             continue;
00193                         };
00194                         if(aY<aBorderSize[aSAdj+1] || aY>aOctaveHeight-aBorderSize[aSAdj+1]-1)
00195                         {
00196                             continue;
00197                         };
00198                     };
00199                     // try to fine tune, restore the values if it failed
00200                     // if the returned value is true,  keep the point, else drop it.
00201                     if (!fineTuneExtrema(aSH, aXAdj, aYAdj, aSAdj, aX, aY, aS, aScore, aOctaveWidth, aOctaveHeight, aBorderSize[aSAdj+1]))
00202                     {
00203                         continue;
00204                     }
00205 
00206                     // recheck the updated score
00207                     if (aScore < _scoreThreshold)
00208                     {
00209                         continue;
00210                     }
00211 
00212                     //if (aScore > 1e10)
00213                     //{
00214                     //  //continue;
00215                     //  std::cout << "big big score" << std::endl;
00216                     //}
00217 
00218                     // adjust the values
00219                     aX *= aPixelStep;
00220                     aY *= aPixelStep;
00221                     aS = ((2* aS * aPixelStep) + _initialBoxFilterSize + (aPixelStep-1) * _maxScales  ) / 3.0; // this one was hard to guess...
00222 
00223                     // store the point
00224                     int aTrace;
00225                     if (!calcTrace(iImage, aX, aY, aS, aTrace))
00226                     {
00227                         continue;
00228                     }
00229 
00230                     aMaxima++;
00231 
00232                     // do something with the keypoint depending on the insertor
00233                     iInsertor(KeyPoint(aX, aY, aS * kBaseSigma, aScore, aTrace));
00234 
00235                 }
00236             }
00237         }
00238     }
00239 
00240     // deallocate memory of the scale images
00241     for (unsigned int s = 0; s < _maxScales; ++s)
00242     {
00243         Image::DeallocateImage(aSH[s], iImage.getHeight());
00244     }
00245 }
00246 
00247 bool KeyPointDetector::fineTuneExtrema(double** * iSH, unsigned int iX, unsigned int iY, unsigned int iS,
00248                                        double& oX, double& oY, double& oS, double& oScore,
00249                                        unsigned int iOctaveWidth, unsigned int iOctaveHeight, unsigned int iBorder)
00250 {
00251     // maximum fine tune iterations
00252     const unsigned int  kMaxFineTuneIters = 6;
00253 
00254     // shift from the initial position for X and Y (only -1 or + 1 during the iterations).
00255     int aX = iX;
00256     int aY = iY;
00257     int aS = iS;
00258 
00259     int aShiftX = 0;
00260     int aShiftY = 0;
00261 
00262     // current deviations
00263     double aDx = 0, aDy = 0, aDs = 0;
00264 
00265     //result vector
00266     double aV[3];       //(x,y,s)
00267 
00268     for (unsigned int aIter = 0; aIter < kMaxFineTuneIters; ++aIter)
00269     {
00270         // update the extrema position
00271         aX += aShiftX;
00272         aY += aShiftY;
00273 
00274         // create the problem matrix
00275         double aM[3][3]; //symetric, no ordering problem.
00276 
00277         // fill the result vector with gradient from pixels differences (negate to prepare system solve)
00278         aDx = ( iSH[aS  ][aY  ][aX+1] - iSH[aS  ][aY  ][aX-1] ) * 0.5;
00279         aDy = ( iSH[aS  ][aY+1][aX  ] - iSH[aS  ][aY-1][aX  ] ) * 0.5;
00280         aDs = ( iSH[aS+1][aY  ][aX  ] - iSH[aS-1][aY  ][aX  ] ) * 0.5;
00281 
00282         aV[0] = - aDx;
00283         aV[1] = - aDy;
00284         aV[2] = - aDs;
00285 
00286         // fill the matrix with values of the hessian from pixel differences
00287         aM[0][0] = iSH[aS  ][aY  ][aX-1] - 2.0 * iSH[aS][aY][aX] + iSH[aS  ][aY  ][aX+1];
00288         aM[1][1] = iSH[aS  ][aY-1][aX  ] - 2.0 * iSH[aS][aY][aX] + iSH[aS  ][aY+1][aX  ];
00289         aM[2][2] = iSH[aS-1][aY  ][aX  ] - 2.0 * iSH[aS][aY][aX] + iSH[aS+1][aY  ][aX  ];
00290 
00291         aM[0][1] = aM[1][0] =  ( iSH[aS  ][aY+1][aX+1] + iSH[aS  ][aY-1][aX-1] - iSH[aS  ][aY+1][aX-1] - iSH[aS  ][aY-1][aX+1] ) * 0.25;
00292         aM[0][2] = aM[2][0] =  ( iSH[aS+1][aY  ][aX+1] + iSH[aS-1][aY  ][aX-1] - iSH[aS+1][aY  ][aX-1] - iSH[aS-1][aY  ][aX+1] ) * 0.25;
00293         aM[1][2] = aM[2][1] =  ( iSH[aS+1][aY+1][aX  ] + iSH[aS-1][aY-1][aX  ] - iSH[aS+1][aY-1][aX  ] - iSH[aS-1][aY+1][aX  ] ) * 0.25;
00294 
00295         // solve the linear system. results are in aV. exit with error if a problem happened
00296         if (!Math::SolveLinearSystem33(aV, aM))
00297         {
00298             return false;
00299         }
00300 
00301 
00302         // ajust the shifts with the results and stop if no significant change
00303 
00304         if (aIter < kMaxFineTuneIters - 1)
00305         {
00306             aShiftX = 0;
00307             aShiftY = 0;
00308 
00309             if (aV[0] > 0.6 && aX < (int)(iOctaveWidth - iBorder - 2))
00310             {
00311                 aShiftX++;
00312             }
00313             else if (aV[0] < -0.6 && aX > (int)iBorder + 1)
00314             {
00315                 aShiftX--;
00316             }
00317 
00318             if (aV[1] > 0.6 && aY < (int)(iOctaveHeight - iBorder - 2))
00319             {
00320                 aShiftY++;
00321             }
00322             else if (aV[1] < -0.6 && aY > (int)iBorder + 1)
00323             {
00324                 aShiftY--;
00325             }
00326 
00327             if( aShiftX == 0 && aShiftY == 0)
00328             {
00329                 break;
00330             }
00331         }
00332     }
00333 
00334     // update the score
00335     oScore = iSH[aS][aY][aX] + 0.5 * (aDx * aV[0] + aDy * aV[1] + aDs * aV[2]);
00336 
00337     // reject too big deviation in last step (unfinished job).
00338     if (Math::Abs(aV[0]) > 1.5 || Math::Abs(aV[1]) > 1.5  || Math::Abs(aV[2]) > 1.5)
00339     {
00340         return false;
00341     }
00342 
00343     // put the last deviation (not integer :) to the output
00344     oX = aX + aV[0];
00345     oY = aY + aV[1];
00346     oS = iS + aV[2];
00347 
00348     return true;
00349 }
00350 
00351 unsigned int            KeyPointDetector::getFilterSize(unsigned int iOctave, unsigned int iScale)
00352 {
00353     unsigned int aScaleShift = 2 << iOctave;
00354     return      _initialBoxFilterSize + (aScaleShift - 2)*(_maxScales - _scaleOverlap) + aScaleShift * iScale;
00355 }
00356 
00357 unsigned int            KeyPointDetector::getBorderSize(unsigned int iOctave, unsigned int iScale)
00358 {
00359     unsigned int aScaleShift = 2 << iOctave;
00360     if (iScale <= 2)
00361     {
00362         unsigned int aMult = (iOctave == 0 ? 1 : 2);
00363         return (getFilterSize(iOctave, 1) + aMult * aScaleShift) * 3 / aScaleShift + 1;
00364     }
00365     return getFilterSize(iOctave, iScale) * 3 / aScaleShift + 1;
00366 }
00367 
00368 bool KeyPointDetector::calcTrace(Image& iImage,
00369                                  double iX,
00370                                  double iY,
00371                                  double iScale,
00372                                  int& oTrace)
00373 {
00374     unsigned int aRX = Math::Round(iX);
00375     unsigned int aRY = Math::Round(iY);
00376 
00377     BoxFilter aBox(3*iScale, iImage);
00378 
00379     if(!aBox.checkBounds(aRX, aRY))
00380     {
00381         return false;
00382     }
00383 
00384     aBox.setY(aRY);
00385     double aTrace = aBox.getDxxWithX(aRX) + aBox.getDyyWithX(aRX);
00386     oTrace = (aTrace <= 0.0 ? -1 : 1);
00387 
00388     return true;
00389 }
00390 

Generated on 25 Oct 2014 for Hugintrunk by  doxygen 1.4.7