khan.h

Go to the documentation of this file.
00001 
00021 #ifndef KHAN_H_
00022 #define KHAN_H_
00023 
00024 #include "deghosting.h"
00025 // for AlgTinyVector, NormalizeFunctor and LogarithmFunctor
00026 #include "support.h"
00027 #include "algtinyvector.h"
00028 
00029 // needed for RGB2Lab
00030 #include <vigra/imageinfo.hxx>
00031 #include <vigra/transformimage.hxx>
00032 #include <vigra/colorconversions.hxx>
00033 
00034 // for resampleImage
00035 #include <vigra/resizeimage.hxx>
00036 
00037 // for RGBvalue used in hat function
00038 #include <vigra/rgbvalue.hxx>
00039 
00040 // needed for Kh()
00041 #define PI 3.14159265358979323846
00042 
00043 // number of pixels to look at in all directions
00044 // ie. 1 for neighbourhood of size 3x3, 2 for 5x5 etc.
00045 #define NEIGHB_DIST 1
00046 
00047 #if defined WIN32
00048     #define snprintf _snprintf
00049 #endif
00050 // define for use atan based kernel function
00051 // leave undefined for gaussian normal distribution function
00052 //#define ATAN_KH
00053 
00054 #ifdef DEGHOSTING_CACHE_IMAGES
00055     #include <cstring>
00056     #include <vigra/cachedfileimage.hxx>
00057 #endif
00058 
00059 using namespace vigra;
00060 
00061 namespace deghosting
00062 {
00063     template <class PixelType>
00064     class ImageTypes {
00065         public:
00066             typedef vigra::FImage ImageType;
00067             typedef boost::shared_ptr<ImageType> ImagePtr;
00068             #ifdef DEGHOSTING_CACHE_IMAGES
00069                 typedef CachedFileImage<float> ProcessImageType;
00070             #else
00071                 typedef BasicImage<float> ProcessImageType;
00072             #endif
00073             typedef boost::shared_ptr<ProcessImageType> ProcessImageTypePtr;
00074     };
00075 
00076     template <class PixelType>
00077     class ImageTypes<RGBValue<PixelType> > {
00078         public:
00079             typedef vigra::FRGBImage ImageType;
00080             typedef boost::shared_ptr<ImageType> ImagePtr;
00081             #ifdef DEGHOSTING_CACHE_IMAGES
00082                 typedef CachedFileImage<AlgTinyVector<float, 3> > ProcessImageType;
00083             #else
00084                 typedef BasicImage<AlgTinyVector<float, 3> > ProcessImageType;
00085             #endif
00086             typedef boost::shared_ptr<ProcessImageType> ProcessImageTypePtr;
00087     };
00088 
00089     template <class PixelType>
00090     class Khan : public Deghosting, private ImageTypes<PixelType>
00091     {
00092         public:
00093             Khan(std::vector<std::string>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity);
00094             Khan(std::vector<ImageImportInfo>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity);
00095             std::vector<FImagePtr> createWeightMasks();
00096             ~Khan() {}
00097         protected:
00098             typedef typename ImageTypes<PixelType>::ImageType ImageType;
00099             typedef typename ImageTypes<PixelType>::ProcessImageType ProcessImageType;
00100             typedef typename ImageTypes<PixelType>::ProcessImageType::traverser ProcessImageTraverser;
00101             typedef typename ImageTypes<PixelType>::ProcessImageType::PixelType ProcessImagePixelType;
00102             typedef typename ImageTypes<PixelType>::ProcessImageTypePtr ProcessImageTypePtr;
00103             typedef typename NumericTraits<PixelType>::isScalar srcIsScalar;
00104             
00105             // Kh() things
00106             // (2*pi)^(1/2)
00107             double PIPOW;
00108             // 1/Kh denominator
00109             double denom;
00110             // sigma in gauusian density function
00111             double sigma;
00112             
00113             // other necessary stuff
00114             std::vector<ProcessImageTypePtr> processImages;
00115             std::vector<FImagePtr> weights;
00116             
00120             void setSigma(double sigma);
00121             
00126             //void linearizeRGB(std::string, FRGBImage* pInputImg);
00127             
00131             inline float Kh(ProcessImagePixelType x);
00132             
00137             void convertImage(ImageType * in, ProcessImageTypePtr & out, VigraFalseType);
00138             void convertImage(ImageType * in, ProcessImageTypePtr & out, VigraTrueType);
00139             
00142             void importRGBImage(ImageImportInfo & info, ImageType * img, VigraFalseType);
00143             void importRGBImage(ImageImportInfo & info, ImageType * img, VigraTrueType);
00144             
00149             void preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output);
00150     };
00151     
00152     template <class PixelType>
00153     Khan<PixelType>::Khan(std::vector<std::string>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags,
00154                 int newIterations, double newSigma, int newVerbosity) {
00155         try {
00156             Deghosting::loadImages(newInputFiles);
00157             Deghosting::setFlags(newFlags);
00158             Deghosting::setDebugFlags(newDebugFlags);
00159             Deghosting::setIterationNum(newIterations);
00160             Deghosting::setVerbosity(newVerbosity);
00161             
00162             // I don't know why, but sigma for HDR input have to approximately 10 times smaller
00163             // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed
00164             const char * fileType= ImageImportInfo(newInputFiles[0].c_str()).getFileType();
00165             if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00166                 setSigma(newSigma/10);
00167             } else {
00168                 setSigma(newSigma);
00169             }
00170             
00171             for (unsigned int i=0; i<5; i++)
00172                 Deghosting::response.push_back(0);
00173             PIPOW = sigma*std::sqrt(2*PI);
00174             denom = 1/PIPOW;
00175         } catch (...) {
00176             throw;
00177         }
00178     }
00179     
00180     template <class PixelType>
00181     Khan<PixelType>::Khan(std::vector<ImageImportInfo>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags,
00182                 int newIterations, double newSigma, int newVerbosity) {
00183         try {
00184             Deghosting::loadImages(newInputFiles);
00185             Deghosting::setFlags(newFlags);
00186             Deghosting::setDebugFlags(newDebugFlags);
00187             Deghosting::setIterationNum(newIterations);
00188             Deghosting::setVerbosity(newVerbosity);
00189             
00190             // I don't know why, but sigma for HDR input have to approximately 10 times smaller
00191             // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed
00192             const char * fileType= newInputFiles[0].getFileType();
00193             if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00194                 setSigma(newSigma/10);
00195             } else {
00196                 setSigma(newSigma);
00197             }
00198             
00199             for (unsigned int i=0; i<5; i++)
00200                 Deghosting::response.push_back(0);
00201             PIPOW = sigma*std::sqrt(2*PI);
00202             denom = 1/PIPOW;
00203         } catch (...) {
00204             throw;
00205         }
00206     }
00207     
00208     template <class PixelType>
00209     void Khan<PixelType>::setSigma(double newSigma) {
00210         sigma = newSigma;
00211     }
00212     
00213     template <class PixelType>
00214     float Khan<PixelType>::Kh(ProcessImagePixelType x) {
00215         #ifdef ATAN_KH
00216             // good choice for sigma for this function is around 600
00217             return std::atan(-(x*x)+sigma)/PI + 0.5;
00218         #else
00219             // good choice for sigma for this function is around 30
00220             return (std::exp(-(x*x)/(2*sigma*sigma)) * denom);
00221         #endif
00222     }
00223     
00224     /*void Khan::linearizeRGB(std::string inputFile,FRGBImage *pInputImg) {
00225         HuginBase::SrcPanoImage panoImg(inputFile);
00226         panoImg.setResponseType(HuginBase::SrcPanoImage::RESPONSE_EMOR);
00227         panoImg.setEMoRParams(response);
00228         // response transform functor
00229         HuginBase::Photometric::InvResponseTransform<RGBValue<float>,
00230                                                      RGBValue<float> > invResponse(panoImg);
00231         invResponse.enforceMonotonicity();
00232 
00233         // iterator to the upper left corner
00234         FRGBImage::traverser imgIterSourceY = pInputImg->upperLeft();
00235         // iterator to he lower right corner
00236         FRGBImage::traverser imgIterEnd = pInputImg->lowerRight();
00237         // iterator to the output
00238         FRGBImage::traverser imgIterOut = pInputImg->upperLeft();
00239         // loop through the image
00240         for (int y=1; imgIterSourceY.y != imgIterEnd.y; ++imgIterSourceY.y, ++imgIterOut.y, ++y) {
00241             // iterator to the input
00242             FRGBImage::traverser sx = imgIterSourceY;
00243             // iterator to the output
00244             FRGBImage::traverser dx = imgIterOut;
00245             for (int x=1; sx.x != imgIterEnd.x; ++sx.x, ++dx.x, ++x) {
00246                 // transform image using response
00247                 *dx = vigra_ext::zeroNegative(invResponse(*sx, hugin_utils::FDiff2D(x, y)));
00248             }
00249         }
00250     }*/
00251     
00252     // RGB
00253     template <class PixelType>
00254     void Khan<PixelType>::convertImage(ImageType * in, ProcessImageTypePtr & out, VigraFalseType) {
00255         RGB2LabFunctor<float> RGB2Lab;
00256         transformImage(srcImageRange(*in), destImage(*out), RGB2Lab);
00257     }
00258     
00259     // grayscale
00260     template <class PixelType>
00261     void Khan<PixelType>::convertImage(ImageType* in, ProcessImageTypePtr& out, VigraTrueType) {
00262         copyImage(srcImageRange(*in), destImage(*out));
00263     }
00264     
00265     // load image and convert it to grayscale
00266     template <class PixelType>
00267     void Khan<PixelType>::importRGBImage(ImageImportInfo & info, ImageType * img, VigraTrueType) {
00268         // NOTE: I guess this is not optimal, but it works
00269         RGBToGrayAccessor<FRGBImage::PixelType> color2gray;
00270         FRGBImage tmpImg(info.size());
00271         if (info.numBands() == 4) {
00272             BImage imgAlpha(info.size());
00273             importImageAlpha(info, destImage(tmpImg), destImage(imgAlpha));
00274         } else {
00275             importImage(info, destImage(tmpImg));
00276         }
00277         transformImage(srcImageRange(tmpImg, color2gray), destImage(*img), log(Arg1()+Param(1.0f)));
00278     }
00279     
00280     // only load image
00281     template <class PixelType>
00282     void Khan<PixelType>::importRGBImage(ImageImportInfo & info, ImageType * img, VigraFalseType) {
00283         if (info.numBands() == 4) {
00284             BImage imgAlpha(info.size());
00285             importImageAlpha(info, destImage(*img), destImage(imgAlpha));
00286         } else {
00287             importImage(info, destImage(*img));
00288         }
00289     }
00290     
00291     template <class PixelType>
00292     void Khan<PixelType>::preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output) {
00293         ImageImportInfo imgInfo(inputFiles[i]);
00294         ImageType * pInputImg =  new ImageType(imgInfo.size());
00295         weight = FImagePtr(new FImage(imgInfo.size()));
00296         output = ProcessImageTypePtr(new ProcessImageType(imgInfo.size()));
00297         
00298         // import image
00299         // NOTE: Maybe alpha can be of some use but I don't
00300         // know about any now
00301         if (imgInfo.isColor()) {
00302             importRGBImage(imgInfo, pInputImg, srcIsScalar());
00303         } else {
00304             importImage(imgInfo, destImage(*pInputImg));
00305         }
00306         
00307         // linearize RGB and convert it to L*a*b image
00308         //linearizeRGB(inputFiles[i], pInputImg);
00309         
00310         // take logarithm or gamma correction if the input images are HDR
00311         // I'm not sure if it's the right way how to
00312         // find out if they are HDR
00313         const char * fileType= imgInfo.getFileType();
00314         if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00315             // use gamma 2.2
00316             if (flags & ADV_GAMMA) {
00317                 // GammaFunctor is only in vigra 1.6 GRRR
00318                 // I have to use BrightnessContrastFunctor
00319                 // TODO: change to the GammaFunctor in the future
00320                 vigra::FindMinMax<float> minmax;
00321                 vigra::inspectImage(srcImageRange(*pInputImg), minmax);
00322                 transformImage(srcImageRange(*pInputImg),destImage(*pInputImg),BrightnessContrastFunctor<PixelType>(0.45,1.0,minmax.min, minmax.max));
00323             } else {
00324                 // take logarithm
00325                 transformImage(srcImageRange(*pInputImg),destImage(*pInputImg),LogarithmFunctor<PixelType>(1.0));
00326             }
00327         }
00328         
00329         // generate initial weights
00330         transformImage(srcImageRange(*pInputImg),destImage(*weight),HatFunctor<PixelType>());
00331         
00332         convertImage(pInputImg, output, srcIsScalar());
00333         
00334         delete pInputImg;
00335         pInputImg = 0;
00336     }
00337     
00338     template <class PixelType>
00339     std::vector<FImagePtr> Khan<PixelType>::createWeightMasks() {
00340         for (unsigned int i = 0; i < inputFiles.size(); i++) {
00341             FImagePtr weight;
00342             ProcessImageTypePtr processImage;
00343             preprocessImage(i, weight, processImage);
00344             processImages.push_back(processImage);
00345             weights.push_back(weight);
00346             
00347             // save init weights
00348             if (debugFlags & SAVE_INITWEIGHTS) {
00349                 char tmpfn[100];
00350                 snprintf(tmpfn, 99, "init_weights_%d.tiff", i);
00351                 ImageExportInfo exWeights(tmpfn);
00352                 exportImage(srcImageRange(*weight), exWeights.setPixelType("UINT8"));
00353             }
00354         }
00355         
00356         float maxWeight = 0;
00357         // image size
00358         const int origWidth = weights[0]->width();
00359         const int origHeight = weights[0]->height();
00360         
00361         // if we doing scaling, we have to backup L*a*b images of original size
00362         std::vector<ProcessImageTypePtr> backupLab;
00363         if (flags & ADV_MULTIRES) {
00364             for (unsigned int i = 0; i < processImages.size(); i++) {
00365                 // backup original size L*a*b
00366                 backupLab.push_back(processImages[i]);
00367             }
00368         }
00369         
00370         if (verbosity > 0)
00371             cout << "Running khan algorithm" << endl;
00372         // and we can run khan algorithm
00373         // khan iteration
00374         for (int it = 0; it < iterations; it++) {
00375             if (verbosity > 0)
00376                 cout << "iteration " << it+1 << endl;
00377             // copy weights from previous iteration
00378             if (verbosity > 1)
00379                 cout << "copying weights from previous iteration" << endl;
00380             
00381             std::vector<FImagePtr> prevWeights;
00382             for (unsigned int i = 0; i < weights.size(); i++) {
00383                 // scale weights to the requied size
00384                 if (flags & ADV_MULTIRES) {
00385                     // it would be better to use resampleImage, but it seems to be present only in VIGRA 1.6
00386                     // so let's use some of the resizeImageINTERPOLATION() functions
00387                     
00388                     // compute width
00389                     int resized_width = origWidth / ( iterations/(it+1) );
00390                     //compute height
00391                     int resized_height = origHeight / ( iterations/(it+1) );
00392                     // destination images
00393                     FImage resizedWeight;
00394                     ProcessImageType resizedLab;
00395                     // it's not worthy to scale to less than 100px per side
00396                     if (resized_width > 100 && resized_height > 100) {
00397                         // create destination image of desired size
00398                         resizedWeight = FImage(Size2D(resized_width,resized_height));
00399                         resizedLab = ProcessImageType(Size2D(resized_width,resized_height));
00400                     } else if (origWidth >= 100 && origHeight >= 100) {
00401                         // resize it to the smallest value (ie 100px for the shorter side)
00402                         if (origWidth >= origHeight) {
00403                             resizedWeight = FImage(Size2D(100*origWidth/origHeight, 100));
00404                             resizedLab = ProcessImageType(Size2D(100*origWidth/origHeight, 100));
00405                         } else {
00406                             resizedWeight = FImage(Size2D(100, 100*origHeight/origWidth));
00407                             resizedLab = ProcessImageType(Size2D(100, 100*origHeight/origWidth));
00408                         }
00409                     } else {
00410                         // don't scale at all
00411                         // just copy weights as if no scaling seting was applied
00412                         goto DONTSCALE;
00413                     }
00414                     
00415                     // No interpolation – only for testing
00416                     resizeImageNoInterpolation(srcImageRange(*weights[i]), destImageRange(resizedWeight));
00417                     resizeImageNoInterpolation(srcImageRange(*backupLab[i]), destImageRange(resizedLab));
00418                     
00419                     FImagePtr tmp(new FImage(resizedWeight));
00420                     prevWeights.push_back(tmp);
00421                     processImages[i] = ProcessImageTypePtr(new ProcessImageType(resizedLab));
00422                     weights[i] = FImagePtr(new FImage(resizedWeight));
00423                 } else {
00424                     DONTSCALE:
00425                     FImagePtr tmp(new FImage(*weights[i]));
00426                     prevWeights.push_back(tmp);
00427                 }
00428             }
00429             
00430             // loop through all images
00431             for (unsigned int i = 0; i < processImages.size(); i++) {
00432                 if (verbosity > 1)
00433                     cout << "processing image " << i+1 << endl;
00434                 
00435                 // vector storing pixel data
00436                 ProcessImagePixelType X;
00437                 // sums for eq. 6
00438                 double wpqssum = 0;
00439                 double wpqsKhsum = 0;
00440                 // image size
00441                 const int width = processImages[i]->width();
00442                 const int height = processImages[i]->height();
00443 
00444                 // iterator to the upper left corner
00445                 ProcessImageTraverser sy = processImages[i]->upperLeft();
00446                 // iterator to the lower right corner
00447                 ProcessImageTraverser send = processImages[i]->lowerRight();
00448                 // iterator to the weight image left corner
00449                 FImage::traverser wy = weights[i]->upperLeft();
00450                 // loop through the row
00451                 for (int y=0; sy.y != send.y; ++sy.y, ++wy.y, ++y) {
00452                     // iterator to the source (L*a*b image)
00453                     ProcessImageTraverser sx = sy;
00454                     // iterator to the weight
00455                     FImage::traverser wx = wy;
00456                     // loop over the pixels
00457                     for (int x=0; sx.x != send.x; ++sx.x, ++wx.x, ++x) {
00458                         if (verbosity > 2)
00459                             cout << "processing pixel (" << x+1 << "," << y+1 << ")" << endl;
00460                         // set pixel vector
00461                         X = *sx;
00462                         
00463                         // loop through all layers
00464                         for (unsigned int j = 0; j < processImages.size(); j++) {
00465                             if (verbosity > 2)
00466                                 cout << "processing layer " << j << endl;
00467                             
00468                             // iterator to the neighbour
00469                             ProcessImageTraverser neighby = processImages[j]->upperLeft();
00470                             // iterator to the weight
00471                             FImage::traverser weighty = prevWeights[j]->upperLeft();
00472                             // pixel offset
00473                             int ndy = -NEIGHB_DIST;
00474                             // move them to the upper bound
00475                             // find the best upper bound
00476                             if (y-NEIGHB_DIST < 0) {
00477                                 ndy = -y;
00478                             }
00479                             else {
00480                                 neighby.y += y-NEIGHB_DIST;
00481                                 weighty.y += y-NEIGHB_DIST;
00482                             }
00483                             
00484                             // iterate through neighbourhoods y axis
00485                             int maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1);
00486                             for (; ndy <= maxDisty; ++neighby.y, ++weighty.y, ++ndy) {
00487                                 ProcessImageTraverser neighbx = neighby;
00488                                 FImage::traverser weightx = weighty;
00489                                 // pixel offset
00490                                 int ndx = -NEIGHB_DIST;
00491                                 // move them to the upper bound
00492                                 // find the best upper bound
00493                                 if (x-NEIGHB_DIST < 0) {
00494                                     ndx = -x;
00495                                 }
00496                                 else {
00497                                     neighbx.x += x-NEIGHB_DIST;
00498                                     weightx.x += x-NEIGHB_DIST;
00499                                 }
00500                                 // iterate through neighbourhoods x axis
00501                                 int maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1);
00502                                 for (; ndx <= maxDistx; ++neighbx.x, ++weightx.x, ++ndx) {
00503                                     if (verbosity > 3)
00504                                         cout << "(" << ndx << "," << ndy << ")";
00505                                     // now we can construct pixel vector
00506                                     // should omit the middle pixel, ie use only neighbours
00507                                     if (ndx != 0 || ndy != 0) {
00508                                         wpqsKhsum += (*weightx * Kh(X-(*neighbx)));
00509                                         wpqssum += *weightx;
00510                                     }
00511                                     
00512                                     maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1);
00513                                 }
00514                                 if (verbosity > 3)
00515                                     cout << endl;
00516                                 
00517                                 maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1);
00518                             }
00519                         }
00520                         
00521                         if (verbosity > 2)
00522                             cout << "computing new weight" << endl;
00523                         // compute probability and set weight
00524                         //cout << "P=" << (float) wpqsKhsum/wpqssum << endl;
00525                         if (flags & ADV_ONLYP)
00526                             *wx = (float) wpqsKhsum/wpqssum;
00527                         else
00528                             *wx *= (float) wpqsKhsum/wpqssum;
00529                         if (maxWeight < *wx)
00530                             maxWeight = *wx;
00531                         wpqsKhsum = wpqssum = 0;
00532                         
00533                     }
00534                 }
00535             }
00536         }
00537         
00538         if (verbosity > 1)
00539                 cout << "normalizing weights" << endl;
00540         double factor = 255.0f/maxWeight;
00541         for (unsigned int i=0; i<weights.size(); ++i) {
00542             transformImage(srcImageRange(*(weights[i])), destImage(*(weights[i])), NormalizeFunctor<float>(factor));
00543         }
00544         return weights;
00545     }
00546 }
00547 
00548 #endif /* KHAN_H_ */

Generated on Mon Sep 22 01:25:31 2014 for Hugintrunk by  doxygen 1.3.9.1