Main Page | Modules | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages
deghosting/khan.h
Go to the documentation of this file.00001 00021 #ifndef KHAN_H_ 00022 #define KHAN_H_ 00023 00024 #include "deghosting.h" 00025 // for AlgTinyVector, NormalizeFunctor and LogarithmFunctor 00026 #include "support.h" 00027 #include "algtinyvector.h" 00028 00029 // needed for RGB2Lab 00030 #include <vigra/imageinfo.hxx> 00031 #include <vigra/transformimage.hxx> 00032 #include <vigra/colorconversions.hxx> 00033 00034 // for resampleImage 00035 #include <vigra/resizeimage.hxx> 00036 00037 // for RGBvalue used in hat function 00038 #include <vigra/rgbvalue.hxx> 00039 00040 // needed for Kh() 00041 #define PI 3.14159265358979323846 00042 00043 // number of pixels to look at in all directions 00044 // ie. 1 for neighbourhood of size 3x3, 2 for 5x5 etc. 00045 #define NEIGHB_DIST 1 00046 00047 #if defined WIN32 00048 #define snprintf _snprintf 00049 #endif 00050 // define for use atan based kernel function 00051 // leave undefined for gaussian normal distribution function 00052 //#define ATAN_KH 00053 00054 #ifdef DEGHOSTING_CACHE_IMAGES 00055 #include <cstring> 00056 #include <vigra/cachedfileimage.hxx> 00057 #endif 00058 00059 using namespace vigra; 00060 00061 namespace deghosting 00062 { 00063 template <class PixelType> 00064 class ImageTypes { 00065 public: 00066 typedef vigra::FImage ImageType; 00067 typedef boost::shared_ptr<ImageType> ImagePtr; 00068 #ifdef DEGHOSTING_CACHE_IMAGES 00069 typedef CachedFileImage<float> ProcessImageType; 00070 #else 00071 typedef BasicImage<float> ProcessImageType; 00072 #endif 00073 typedef boost::shared_ptr<ProcessImageType> ProcessImageTypePtr; 00074 }; 00075 00076 template <class PixelType> 00077 class ImageTypes<RGBValue<PixelType> > { 00078 public: 00079 typedef vigra::FRGBImage ImageType; 00080 typedef boost::shared_ptr<ImageType> ImagePtr; 00081 #ifdef DEGHOSTING_CACHE_IMAGES 00082 typedef CachedFileImage<AlgTinyVector<float, 3> > ProcessImageType; 00083 #else 00084 typedef BasicImage<AlgTinyVector<float, 3> > ProcessImageType; 00085 #endif 00086 typedef boost::shared_ptr<ProcessImageType> ProcessImageTypePtr; 00087 }; 00088 00089 template <class PixelType> 00090 class Khan : public Deghosting, private ImageTypes<PixelType> 00091 { 00092 public: 00093 Khan(std::vector<std::string>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity); 00094 Khan(std::vector<ImageImportInfo>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity); 00095 std::vector<FImagePtr> createWeightMasks(); 00096 ~Khan() {} 00097 protected: 00098 typedef typename ImageTypes<PixelType>::ImageType ImageType; 00099 typedef typename ImageTypes<PixelType>::ProcessImageType ProcessImageType; 00100 typedef typename ImageTypes<PixelType>::ProcessImageType::traverser ProcessImageTraverser; 00101 typedef typename ImageTypes<PixelType>::ProcessImageType::PixelType ProcessImagePixelType; 00102 typedef typename ImageTypes<PixelType>::ProcessImageTypePtr ProcessImageTypePtr; 00103 typedef typename NumericTraits<PixelType>::isScalar srcIsScalar; 00104 00105 // Kh() things 00106 // (2*pi)^(1/2) 00107 double PIPOW; 00108 // 1/Kh denominator 00109 double denom; 00110 // sigma in gauusian density function 00111 double sigma; 00112 00113 // other necessary stuff 00114 std::vector<ProcessImageTypePtr> processImages; 00115 std::vector<FImagePtr> weights; 00116 00120 void setSigma(double sigma); 00121 00126 //void linearizeRGB(std::string, FRGBImage* pInputImg); 00127 00131 inline float Kh(ProcessImagePixelType x); 00132 00137 void convertImage(ImageType * in, ProcessImageTypePtr & out, VigraFalseType); 00138 void convertImage(ImageType * in, ProcessImageTypePtr & out, VigraTrueType); 00139 00142 void importRGBImage(ImageImportInfo & info, ImageType * img, VigraFalseType); 00143 void importRGBImage(ImageImportInfo & info, ImageType * img, VigraTrueType); 00144 00149 void preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output); 00150 }; 00151 00152 template <class PixelType> 00153 Khan<PixelType>::Khan(std::vector<std::string>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags, 00154 int newIterations, double newSigma, int newVerbosity) { 00155 try { 00156 Deghosting::loadImages(newInputFiles); 00157 Deghosting::setFlags(newFlags); 00158 Deghosting::setDebugFlags(newDebugFlags); 00159 Deghosting::setIterationNum(newIterations); 00160 Deghosting::setVerbosity(newVerbosity); 00161 00162 // I don't know why, but sigma for HDR input have to approximately 10 times smaller 00163 // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed 00164 const char * fileType= ImageImportInfo(newInputFiles[0].c_str()).getFileType(); 00165 if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) { 00166 setSigma(newSigma/10); 00167 } else { 00168 setSigma(newSigma); 00169 } 00170 00171 for (unsigned int i=0; i<5; i++) 00172 Deghosting::response.push_back(0); 00173 PIPOW = sigma*std::sqrt(2*PI); 00174 denom = 1/PIPOW; 00175 } catch (...) { 00176 throw; 00177 } 00178 } 00179 00180 template <class PixelType> 00181 Khan<PixelType>::Khan(std::vector<ImageImportInfo>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags, 00182 int newIterations, double newSigma, int newVerbosity) { 00183 try { 00184 Deghosting::loadImages(newInputFiles); 00185 Deghosting::setFlags(newFlags); 00186 Deghosting::setDebugFlags(newDebugFlags); 00187 Deghosting::setIterationNum(newIterations); 00188 Deghosting::setVerbosity(newVerbosity); 00189 00190 // I don't know why, but sigma for HDR input have to approximately 10 times smaller 00191 // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed 00192 const char * fileType= newInputFiles[0].getFileType(); 00193 if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) { 00194 setSigma(newSigma/10); 00195 } else { 00196 setSigma(newSigma); 00197 } 00198 00199 for (unsigned int i=0; i<5; i++) 00200 Deghosting::response.push_back(0); 00201 PIPOW = sigma*std::sqrt(2*PI); 00202 denom = 1/PIPOW; 00203 } catch (...) { 00204 throw; 00205 } 00206 } 00207 00208 template <class PixelType> 00209 void Khan<PixelType>::setSigma(double newSigma) { 00210 sigma = newSigma; 00211 } 00212 00213 template <class PixelType> 00214 float Khan<PixelType>::Kh(ProcessImagePixelType x) { 00215 #ifdef ATAN_KH 00216 // good choice for sigma for this function is around 600 00217 return std::atan(-(x*x)+sigma)/PI + 0.5; 00218 #else 00219 // good choice for sigma for this function is around 30 00220 return (std::exp(-(x*x)/(2*sigma*sigma)) * denom); 00221 #endif 00222 } 00223 00224 /*void Khan::linearizeRGB(std::string inputFile,FRGBImage *pInputImg) { 00225 HuginBase::SrcPanoImage panoImg(inputFile); 00226 panoImg.setResponseType(HuginBase::SrcPanoImage::RESPONSE_EMOR); 00227 panoImg.setEMoRParams(response); 00228 // response transform functor 00229 HuginBase::Photometric::InvResponseTransform<RGBValue<float>, 00230 RGBValue<float> > invResponse(panoImg); 00231 invResponse.enforceMonotonicity(); 00232 00233 // iterator to the upper left corner 00234 FRGBImage::traverser imgIterSourceY = pInputImg->upperLeft(); 00235 // iterator to he lower right corner 00236 FRGBImage::traverser imgIterEnd = pInputImg->lowerRight(); 00237 // iterator to the output 00238 FRGBImage::traverser imgIterOut = pInputImg->upperLeft(); 00239 // loop through the image 00240 for (int y=1; imgIterSourceY.y != imgIterEnd.y; ++imgIterSourceY.y, ++imgIterOut.y, ++y) { 00241 // iterator to the input 00242 FRGBImage::traverser sx = imgIterSourceY; 00243 // iterator to the output 00244 FRGBImage::traverser dx = imgIterOut; 00245 for (int x=1; sx.x != imgIterEnd.x; ++sx.x, ++dx.x, ++x) { 00246 // transform image using response 00247 *dx = vigra_ext::zeroNegative(invResponse(*sx, hugin_utils::FDiff2D(x, y))); 00248 } 00249 } 00250 }*/ 00251 00252 // RGB 00253 template <class PixelType> 00254 void Khan<PixelType>::convertImage(ImageType * in, ProcessImageTypePtr & out, VigraFalseType) { 00255 RGB2LabFunctor<float> RGB2Lab; 00256 transformImage(srcImageRange(*in), destImage(*out), RGB2Lab); 00257 } 00258 00259 // grayscale 00260 template <class PixelType> 00261 void Khan<PixelType>::convertImage(ImageType* in, ProcessImageTypePtr& out, VigraTrueType) { 00262 copyImage(srcImageRange(*in), destImage(*out)); 00263 } 00264 00265 // load image and convert it to grayscale 00266 template <class PixelType> 00267 void Khan<PixelType>::importRGBImage(ImageImportInfo & info, ImageType * img, VigraTrueType) { 00268 // NOTE: I guess this is not optimal, but it works 00269 RGBToGrayAccessor<FRGBImage::PixelType> color2gray; 00270 FRGBImage tmpImg(info.size()); 00271 if (info.numBands() == 4) { 00272 BImage imgAlpha(info.size()); 00273 importImageAlpha(info, destImage(tmpImg), destImage(imgAlpha)); 00274 } else { 00275 importImage(info, destImage(tmpImg)); 00276 } 00277 transformImage(srcImageRange(tmpImg, color2gray), destImage(*img), log(Arg1()+Param(1.0f))); 00278 } 00279 00280 // only load image 00281 template <class PixelType> 00282 void Khan<PixelType>::importRGBImage(ImageImportInfo & info, ImageType * img, VigraFalseType) { 00283 if (info.numBands() == 4) { 00284 BImage imgAlpha(info.size()); 00285 importImageAlpha(info, destImage(*img), destImage(imgAlpha)); 00286 } else { 00287 importImage(info, destImage(*img)); 00288 } 00289 } 00290 00291 template <class PixelType> 00292 void Khan<PixelType>::preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output) { 00293 ImageImportInfo imgInfo(inputFiles[i]); 00294 ImageType * pInputImg = new ImageType(imgInfo.size()); 00295 weight = FImagePtr(new FImage(imgInfo.size())); 00296 output = ProcessImageTypePtr(new ProcessImageType(imgInfo.size())); 00297 00298 // import image 00299 // NOTE: Maybe alpha can be of some use but I don't 00300 // know about any now 00301 if (imgInfo.isColor()) { 00302 importRGBImage(imgInfo, pInputImg, srcIsScalar()); 00303 } else { 00304 importImage(imgInfo, destImage(*pInputImg)); 00305 } 00306 00307 // linearize RGB and convert it to L*a*b image 00308 //linearizeRGB(inputFiles[i], pInputImg); 00309 00310 // take logarithm or gamma correction if the input images are HDR 00311 // I'm not sure if it's the right way how to 00312 // find out if they are HDR 00313 const char * fileType= imgInfo.getFileType(); 00314 if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) { 00315 // use gamma 2.2 00316 if (flags & ADV_GAMMA) { 00317 // GammaFunctor is only in vigra 1.6 GRRR 00318 // I have to use BrightnessContrastFunctor 00319 // TODO: change to the GammaFunctor in the future 00320 vigra::FindMinMax<float> minmax; 00321 vigra::inspectImage(srcImageRange(*pInputImg), minmax); 00322 transformImage(srcImageRange(*pInputImg),destImage(*pInputImg),BrightnessContrastFunctor<PixelType>(0.45,1.0,minmax.min, minmax.max)); 00323 } else { 00324 // take logarithm 00325 transformImage(srcImageRange(*pInputImg),destImage(*pInputImg),LogarithmFunctor<PixelType>(1.0)); 00326 } 00327 } 00328 00329 // generate initial weights 00330 transformImage(srcImageRange(*pInputImg),destImage(*weight),HatFunctor<PixelType>()); 00331 00332 convertImage(pInputImg, output, srcIsScalar()); 00333 00334 delete pInputImg; 00335 pInputImg = 0; 00336 } 00337 00338 template <class PixelType> 00339 std::vector<FImagePtr> Khan<PixelType>::createWeightMasks() { 00340 for (unsigned int i = 0; i < inputFiles.size(); i++) { 00341 FImagePtr weight; 00342 ProcessImageTypePtr processImage; 00343 preprocessImage(i, weight, processImage); 00344 processImages.push_back(processImage); 00345 weights.push_back(weight); 00346 00347 // save init weights 00348 if (debugFlags & SAVE_INITWEIGHTS) { 00349 char tmpfn[100]; 00350 snprintf(tmpfn, 99, "init_weights_%d.tiff", i); 00351 ImageExportInfo exWeights(tmpfn); 00352 exportImage(srcImageRange(*weight), exWeights.setPixelType("UINT8")); 00353 } 00354 } 00355 00356 float maxWeight = 0; 00357 // image size 00358 const int origWidth = weights[0]->width(); 00359 const int origHeight = weights[0]->height(); 00360 00361 // if we doing scaling, we have to backup L*a*b images of original size 00362 std::vector<ProcessImageTypePtr> backupLab; 00363 if (flags & ADV_MULTIRES) { 00364 for (unsigned int i = 0; i < processImages.size(); i++) { 00365 // backup original size L*a*b 00366 backupLab.push_back(processImages[i]); 00367 } 00368 } 00369 00370 if (verbosity > 0) 00371 cout << "Running khan algorithm" << endl; 00372 // and we can run khan algorithm 00373 // khan iteration 00374 for (int it = 0; it < iterations; it++) { 00375 if (verbosity > 0) 00376 cout << "iteration " << it+1 << endl; 00377 // copy weights from previous iteration 00378 if (verbosity > 1) 00379 cout << "copying weights from previous iteration" << endl; 00380 00381 std::vector<FImagePtr> prevWeights; 00382 for (unsigned int i = 0; i < weights.size(); i++) { 00383 // scale weights to the requied size 00384 if (flags & ADV_MULTIRES) { 00385 // it would be better to use resampleImage, but it seems to be present only in VIGRA 1.6 00386 // so let's use some of the resizeImageINTERPOLATION() functions 00387 00388 // compute width 00389 int resized_width = origWidth / ( iterations/(it+1) ); 00390 //compute height 00391 int resized_height = origHeight / ( iterations/(it+1) ); 00392 // destination images 00393 FImage resizedWeight; 00394 ProcessImageType resizedLab; 00395 // it's not worthy to scale to less than 100px per side 00396 if (resized_width > 100 && resized_height > 100) { 00397 // create destination image of desired size 00398 resizedWeight = FImage(Size2D(resized_width,resized_height)); 00399 resizedLab = ProcessImageType(Size2D(resized_width,resized_height)); 00400 } else if (origWidth >= 100 && origHeight >= 100) { 00401 // resize it to the smallest value (ie 100px for the shorter side) 00402 if (origWidth >= origHeight) { 00403 resizedWeight = FImage(Size2D(100*origWidth/origHeight, 100)); 00404 resizedLab = ProcessImageType(Size2D(100*origWidth/origHeight, 100)); 00405 } else { 00406 resizedWeight = FImage(Size2D(100, 100*origHeight/origWidth)); 00407 resizedLab = ProcessImageType(Size2D(100, 100*origHeight/origWidth)); 00408 } 00409 } else { 00410 // don't scale at all 00411 // just copy weights as if no scaling seting was applied 00412 goto DONTSCALE; 00413 } 00414 00415 // No interpolation – only for testing 00416 resizeImageNoInterpolation(srcImageRange(*weights[i]), destImageRange(resizedWeight)); 00417 resizeImageNoInterpolation(srcImageRange(*backupLab[i]), destImageRange(resizedLab)); 00418 00419 FImagePtr tmp(new FImage(resizedWeight)); 00420 prevWeights.push_back(tmp); 00421 processImages[i] = ProcessImageTypePtr(new ProcessImageType(resizedLab)); 00422 weights[i] = FImagePtr(new FImage(resizedWeight)); 00423 } else { 00424 DONTSCALE: 00425 FImagePtr tmp(new FImage(*weights[i])); 00426 prevWeights.push_back(tmp); 00427 } 00428 } 00429 00430 // loop through all images 00431 for (unsigned int i = 0; i < processImages.size(); i++) { 00432 if (verbosity > 1) 00433 cout << "processing image " << i+1 << endl; 00434 00435 // vector storing pixel data 00436 ProcessImagePixelType X; 00437 // sums for eq. 6 00438 double wpqssum = 0; 00439 double wpqsKhsum = 0; 00440 // image size 00441 const int width = processImages[i]->width(); 00442 const int height = processImages[i]->height(); 00443 00444 // iterator to the upper left corner 00445 ProcessImageTraverser sy = processImages[i]->upperLeft(); 00446 // iterator to the lower right corner 00447 ProcessImageTraverser send = processImages[i]->lowerRight(); 00448 // iterator to the weight image left corner 00449 FImage::traverser wy = weights[i]->upperLeft(); 00450 // loop through the row 00451 for (int y=0; sy.y != send.y; ++sy.y, ++wy.y, ++y) { 00452 // iterator to the source (L*a*b image) 00453 ProcessImageTraverser sx = sy; 00454 // iterator to the weight 00455 FImage::traverser wx = wy; 00456 // loop over the pixels 00457 for (int x=0; sx.x != send.x; ++sx.x, ++wx.x, ++x) { 00458 if (verbosity > 2) 00459 cout << "processing pixel (" << x+1 << "," << y+1 << ")" << endl; 00460 // set pixel vector 00461 X = *sx; 00462 00463 // loop through all layers 00464 for (unsigned int j = 0; j < processImages.size(); j++) { 00465 if (verbosity > 2) 00466 cout << "processing layer " << j << endl; 00467 00468 // iterator to the neighbour 00469 ProcessImageTraverser neighby = processImages[j]->upperLeft(); 00470 // iterator to the weight 00471 FImage::traverser weighty = prevWeights[j]->upperLeft(); 00472 // pixel offset 00473 int ndy = -NEIGHB_DIST; 00474 // move them to the upper bound 00475 // find the best upper bound 00476 if (y-NEIGHB_DIST < 0) { 00477 ndy = -y; 00478 } 00479 else { 00480 neighby.y += y-NEIGHB_DIST; 00481 weighty.y += y-NEIGHB_DIST; 00482 } 00483 00484 // iterate through neighbourhoods y axis 00485 int maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1); 00486 for (; ndy <= maxDisty; ++neighby.y, ++weighty.y, ++ndy) { 00487 ProcessImageTraverser neighbx = neighby; 00488 FImage::traverser weightx = weighty; 00489 // pixel offset 00490 int ndx = -NEIGHB_DIST; 00491 // move them to the upper bound 00492 // find the best upper bound 00493 if (x-NEIGHB_DIST < 0) { 00494 ndx = -x; 00495 } 00496 else { 00497 neighbx.x += x-NEIGHB_DIST; 00498 weightx.x += x-NEIGHB_DIST; 00499 } 00500 // iterate through neighbourhoods x axis 00501 int maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1); 00502 for (; ndx <= maxDistx; ++neighbx.x, ++weightx.x, ++ndx) { 00503 if (verbosity > 3) 00504 cout << "(" << ndx << "," << ndy << ")"; 00505 // now we can construct pixel vector 00506 // should omit the middle pixel, ie use only neighbours 00507 if (ndx != 0 || ndy != 0) { 00508 wpqsKhsum += (*weightx * Kh(X-(*neighbx))); 00509 wpqssum += *weightx; 00510 } 00511 00512 maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1); 00513 } 00514 if (verbosity > 3) 00515 cout << endl; 00516 00517 maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1); 00518 } 00519 } 00520 00521 if (verbosity > 2) 00522 cout << "computing new weight" << endl; 00523 // compute probability and set weight 00524 //cout << "P=" << (float) wpqsKhsum/wpqssum << endl; 00525 if (flags & ADV_ONLYP) 00526 *wx = (float) wpqsKhsum/wpqssum; 00527 else 00528 *wx *= (float) wpqsKhsum/wpqssum; 00529 if (maxWeight < *wx) 00530 maxWeight = *wx; 00531 wpqsKhsum = wpqssum = 0; 00532 00533 } 00534 } 00535 } 00536 } 00537 00538 if (verbosity > 1) 00539 cout << "normalizing weights" << endl; 00540 double factor = 255.0f/maxWeight; 00541 for (unsigned int i=0; i<weights.size(); ++i) { 00542 transformImage(srcImageRange(*(weights[i])), destImage(*(weights[i])), NormalizeFunctor<float>(factor)); 00543 } 00544 return weights; 00545 } 00546 } 00547 00548 #endif /* KHAN_H_ */
1.3.9.1