khan.h

Go to the documentation of this file.
00001 
00021 #ifndef KHAN_H_
00022 #define KHAN_H_
00023 
00024 #include "deghosting.h"
00025 // for AlgTinyVector, NormalizeFunctor and LogarithmFunctor
00026 #include "support.h"
00027 #include "algtinyvector.h"
00028 
00029 // needed for RGB2Lab
00030 #include <vigra/imageinfo.hxx>
00031 #include <vigra/transformimage.hxx>
00032 #include <vigra/colorconversions.hxx>
00033 
00034 // for resampleImage
00035 #include <vigra/resizeimage.hxx>
00036 
00037 // for RGBvalue used in hat function
00038 #include <vigra/rgbvalue.hxx>
00039 // for importImage und importImageAlpha
00040 #include <vigra_ext/impexalpha.hxx>
00041 
00042 // needed for Kh()
00043 #define PI 3.14159265358979323846
00044 
00045 // number of pixels to look at in all directions
00046 // ie. 1 for neighbourhood of size 3x3, 2 for 5x5 etc.
00047 #define NEIGHB_DIST 1
00048 
00049 #if defined WIN32
00050     #define snprintf _snprintf
00051 #endif
00052 // define for use atan based kernel function
00053 // leave undefined for gaussian normal distribution function
00054 //#define ATAN_KH
00055 
00056 using namespace vigra;
00057 
00058 namespace deghosting
00059 {
00060     template <class PixelType>
00061     class ImageTypes {
00062         public:
00063             typedef vigra::FImage ImageType;
00064             typedef sharedPtrNamespace::shared_ptr<ImageType> ImagePtr;
00065             typedef BasicImage<float> ProcessImageType;
00066             typedef sharedPtrNamespace::shared_ptr<ProcessImageType> ProcessImageTypePtr;
00067     };
00068 
00069     template <class PixelType>
00070     class ImageTypes<RGBValue<PixelType> > {
00071         public:
00072             typedef vigra::FRGBImage ImageType;
00073             typedef sharedPtrNamespace::shared_ptr<ImageType> ImagePtr;
00074             typedef BasicImage<AlgTinyVector<float, 3> > ProcessImageType;
00075             typedef sharedPtrNamespace::shared_ptr<ProcessImageType> ProcessImageTypePtr;
00076     };
00077 
00078     template <class PixelType>
00079     class Khan : public Deghosting, private ImageTypes<PixelType>
00080     {
00081         public:
00082             Khan(std::vector<std::string>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity);
00083             Khan(std::vector<ImageImportInfo>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity);
00084             std::vector<FImagePtr> createWeightMasks();
00085             ~Khan() {}
00086         protected:
00087             typedef typename ImageTypes<PixelType>::ImageType ImageType;
00088             typedef typename ImageTypes<PixelType>::ProcessImageType ProcessImageType;
00089             typedef typename ImageTypes<PixelType>::ProcessImageType::traverser ProcessImageTraverser;
00090             typedef typename ImageTypes<PixelType>::ProcessImageType::PixelType ProcessImagePixelType;
00091             typedef typename ImageTypes<PixelType>::ProcessImageTypePtr ProcessImageTypePtr;
00092             typedef typename NumericTraits<PixelType>::isScalar srcIsScalar;
00093             
00094             // Kh() things
00095             // (2*pi)^(1/2)
00096             double PIPOW;
00097             // 1/Kh denominator
00098             double denom;
00099             // sigma in gauusian density function
00100             double sigma;
00101             
00102             // other necessary stuff
00103             std::vector<ProcessImageTypePtr> processImages;
00104             std::vector<FImagePtr> weights;
00105             
00109             void setSigma(double sigma);
00110             
00115             //void linearizeRGB(std::string, FRGBImage* pInputImg);
00116             
00120             inline float Kh(ProcessImagePixelType x);
00121             
00126             void convertImage(ImageType * in, ProcessImageTypePtr & out, VigraFalseType);
00127             void convertImage(ImageType * in, ProcessImageTypePtr & out, VigraTrueType);
00128             
00131             void importRGBImage(ImageImportInfo & info, ImageType * img, VigraFalseType);
00132             void importRGBImage(ImageImportInfo & info, ImageType * img, VigraTrueType);
00133             
00138             void preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output);
00139     };
00140     
00141     template <class PixelType>
00142     Khan<PixelType>::Khan(std::vector<std::string>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags,
00143                 int newIterations, double newSigma, int newVerbosity) {
00144         try {
00145             Deghosting::loadImages(newInputFiles);
00146             Deghosting::setFlags(newFlags);
00147             Deghosting::setDebugFlags(newDebugFlags);
00148             Deghosting::setIterationNum(newIterations);
00149             Deghosting::setVerbosity(newVerbosity);
00150             
00151             // I don't know why, but sigma for HDR input have to approximately 10 times smaller
00152             // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed
00153             const char * fileType= ImageImportInfo(newInputFiles[0].c_str()).getFileType();
00154             if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00155                 setSigma(newSigma/10);
00156             } else {
00157                 setSigma(newSigma);
00158             }
00159             
00160             for (unsigned int i=0; i<5; i++)
00161                 Deghosting::response.push_back(0);
00162             PIPOW = sigma*std::sqrt(2*PI);
00163             denom = 1/PIPOW;
00164         } catch (...) {
00165             throw;
00166         }
00167     }
00168     
00169     template <class PixelType>
00170     Khan<PixelType>::Khan(std::vector<ImageImportInfo>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags,
00171                 int newIterations, double newSigma, int newVerbosity) {
00172         try {
00173             Deghosting::loadImages(newInputFiles);
00174             Deghosting::setFlags(newFlags);
00175             Deghosting::setDebugFlags(newDebugFlags);
00176             Deghosting::setIterationNum(newIterations);
00177             Deghosting::setVerbosity(newVerbosity);
00178             
00179             // I don't know why, but sigma for HDR input have to approximately 10 times smaller
00180             // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed
00181             const char * fileType= newInputFiles[0].getFileType();
00182             if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00183                 setSigma(newSigma/10);
00184             } else {
00185                 setSigma(newSigma);
00186             }
00187             
00188             for (unsigned int i=0; i<5; i++)
00189                 Deghosting::response.push_back(0);
00190             PIPOW = sigma*std::sqrt(2*PI);
00191             denom = 1/PIPOW;
00192         } catch (...) {
00193             throw;
00194         }
00195     }
00196     
00197     template <class PixelType>
00198     void Khan<PixelType>::setSigma(double newSigma) {
00199         sigma = newSigma;
00200     }
00201     
00202     template <class PixelType>
00203     float Khan<PixelType>::Kh(ProcessImagePixelType x) {
00204         #ifdef ATAN_KH
00205             // good choice for sigma for this function is around 600
00206             return std::atan(-(x*x)+sigma)/PI + 0.5;
00207         #else
00208             // good choice for sigma for this function is around 30
00209             return (std::exp(-(x*x)/(2*sigma*sigma)) * denom);
00210         #endif
00211     }
00212     
00213     /*void Khan::linearizeRGB(std::string inputFile,FRGBImage *pInputImg) {
00214         HuginBase::SrcPanoImage panoImg(inputFile);
00215         panoImg.setResponseType(HuginBase::SrcPanoImage::RESPONSE_EMOR);
00216         panoImg.setEMoRParams(response);
00217         // response transform functor
00218         HuginBase::Photometric::InvResponseTransform<RGBValue<float>,
00219                                                      RGBValue<float> > invResponse(panoImg);
00220         invResponse.enforceMonotonicity();
00221 
00222         // iterator to the upper left corner
00223         FRGBImage::traverser imgIterSourceY = pInputImg->upperLeft();
00224         // iterator to he lower right corner
00225         FRGBImage::traverser imgIterEnd = pInputImg->lowerRight();
00226         // iterator to the output
00227         FRGBImage::traverser imgIterOut = pInputImg->upperLeft();
00228         // loop through the image
00229         for (int y=1; imgIterSourceY.y != imgIterEnd.y; ++imgIterSourceY.y, ++imgIterOut.y, ++y) {
00230             // iterator to the input
00231             FRGBImage::traverser sx = imgIterSourceY;
00232             // iterator to the output
00233             FRGBImage::traverser dx = imgIterOut;
00234             for (int x=1; sx.x != imgIterEnd.x; ++sx.x, ++dx.x, ++x) {
00235                 // transform image using response
00236                 *dx = vigra_ext::zeroNegative(invResponse(*sx, hugin_utils::FDiff2D(x, y)));
00237             }
00238         }
00239     }*/
00240     
00241     // RGB
00242     template <class PixelType>
00243     void Khan<PixelType>::convertImage(ImageType * in, ProcessImageTypePtr & out, VigraFalseType) {
00244         RGB2LabFunctor<float> RGB2Lab;
00245         transformImage(srcImageRange(*in), destImage(*out), RGB2Lab);
00246     }
00247     
00248     // grayscale
00249     template <class PixelType>
00250     void Khan<PixelType>::convertImage(ImageType* in, ProcessImageTypePtr& out, VigraTrueType) {
00251         copyImage(srcImageRange(*in), destImage(*out));
00252     }
00253     
00254     // load image and convert it to grayscale
00255     template <class PixelType>
00256     void Khan<PixelType>::importRGBImage(ImageImportInfo & info, ImageType * img, VigraTrueType) {
00257         // NOTE: I guess this is not optimal, but it works
00258         RGBToGrayAccessor<FRGBImage::PixelType> color2gray;
00259         FRGBImage tmpImg(info.size());
00260         if (info.numBands() == 4) {
00261             BImage imgAlpha(info.size());
00262             importImageAlpha(info, destImage(tmpImg), destImage(imgAlpha));
00263         } else {
00264             importImage(info, destImage(tmpImg));
00265         }
00266         transformImage(srcImageRange(tmpImg, color2gray), destImage(*img), log(Arg1()+Param(1.0f)));
00267     }
00268     
00269     // only load image
00270     template <class PixelType>
00271     void Khan<PixelType>::importRGBImage(ImageImportInfo & info, ImageType * img, VigraFalseType) {
00272         if (info.numBands() == 4) {
00273             BImage imgAlpha(info.size());
00274             importImageAlpha(info, destImage(*img), destImage(imgAlpha));
00275         } else {
00276             importImage(info, destImage(*img));
00277         }
00278     }
00279     
00280     template <class PixelType>
00281     void Khan<PixelType>::preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output) {
00282         ImageImportInfo imgInfo(inputFiles[i]);
00283         ImageType * pInputImg =  new ImageType(imgInfo.size());
00284         weight = FImagePtr(new FImage(imgInfo.size()));
00285         output = ProcessImageTypePtr(new ProcessImageType(imgInfo.size()));
00286         
00287         // import image
00288         // NOTE: Maybe alpha can be of some use but I don't
00289         // know about any now
00290         if (imgInfo.isColor()) {
00291             importRGBImage(imgInfo, pInputImg, srcIsScalar());
00292         } else {
00293             importImage(imgInfo, destImage(*pInputImg));
00294         }
00295         
00296         // linearize RGB and convert it to L*a*b image
00297         //linearizeRGB(inputFiles[i], pInputImg);
00298         
00299         // take logarithm or gamma correction if the input images are HDR
00300         // I'm not sure if it's the right way how to
00301         // find out if they are HDR
00302         const char * fileType= imgInfo.getFileType();
00303         if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00304             // use gamma 2.2
00305             if (flags & ADV_GAMMA) {
00306                 // GammaFunctor is only in vigra 1.6 GRRR
00307                 // I have to use BrightnessContrastFunctor
00308                 // TODO: change to the GammaFunctor in the future
00309                 vigra::FindMinMax<float> minmax;
00310                 vigra::inspectImage(srcImageRange(*pInputImg), minmax);
00311                 transformImage(srcImageRange(*pInputImg),destImage(*pInputImg),BrightnessContrastFunctor<PixelType>(0.45f,1.0,minmax.min, minmax.max));
00312             } else {
00313                 // take logarithm
00314                 transformImage(srcImageRange(*pInputImg),destImage(*pInputImg),LogarithmFunctor<PixelType>(1.0));
00315             }
00316         }
00317         
00318         // generate initial weights
00319         transformImage(srcImageRange(*pInputImg),destImage(*weight),HatFunctor<PixelType>());
00320         
00321         convertImage(pInputImg, output, srcIsScalar());
00322         
00323         delete pInputImg;
00324         pInputImg = 0;
00325     }
00326     
00327     template <class PixelType>
00328     std::vector<FImagePtr> Khan<PixelType>::createWeightMasks() {
00329         for (unsigned int i = 0; i < inputFiles.size(); i++) {
00330             FImagePtr weight;
00331             ProcessImageTypePtr processImage;
00332             preprocessImage(i, weight, processImage);
00333             processImages.push_back(processImage);
00334             weights.push_back(weight);
00335             
00336             // save init weights
00337             if (debugFlags & SAVE_INITWEIGHTS) {
00338                 char tmpfn[100];
00339                 snprintf(tmpfn, 99, "init_weights_%u.tiff", i);
00340                 ImageExportInfo exWeights(tmpfn);
00341                 exportImage(srcImageRange(*weight), exWeights.setPixelType("UINT8"));
00342             }
00343         }
00344         
00345         float maxWeight = 0;
00346         // image size
00347         const int origWidth = weights[0]->width();
00348         const int origHeight = weights[0]->height();
00349         
00350         // if we doing scaling, we have to backup L*a*b images of original size
00351         std::vector<ProcessImageTypePtr> backupLab;
00352         if (flags & ADV_MULTIRES) {
00353             for (unsigned int i = 0; i < processImages.size(); i++) {
00354                 // backup original size L*a*b
00355                 backupLab.push_back(processImages[i]);
00356             }
00357         }
00358         
00359         if (verbosity > 0)
00360             std::cout << "Running khan algorithm" << std::endl;
00361         // and we can run khan algorithm
00362         // khan iteration
00363         for (int it = 0; it < iterations; it++) {
00364             if (verbosity > 0)
00365                 std::cout << "iteration " << it+1 << std::endl;
00366             // copy weights from previous iteration
00367             if (verbosity > 1)
00368                 std::cout << "copying weights from previous iteration" << std::endl;
00369             
00370             std::vector<FImagePtr> prevWeights;
00371             for (unsigned int i = 0; i < weights.size(); i++) {
00372                 // scale weights to the requied size
00373                 if (flags & ADV_MULTIRES) {
00374                     // it would be better to use resampleImage, but it seems to be present only in VIGRA 1.6
00375                     // so let's use some of the resizeImageINTERPOLATION() functions
00376                     
00377                     // compute width
00378                     int resized_width = origWidth / ( iterations/(it+1) );
00379                     //compute height
00380                     int resized_height = origHeight / ( iterations/(it+1) );
00381                     // destination images
00382                     FImage resizedWeight;
00383                     ProcessImageType resizedLab;
00384                     // it's not worthy to scale to less than 100px per side
00385                     if (resized_width > 100 && resized_height > 100) {
00386                         // create destination image of desired size
00387                         resizedWeight = FImage(Size2D(resized_width,resized_height));
00388                         resizedLab = ProcessImageType(Size2D(resized_width,resized_height));
00389                     } else if (origWidth >= 100 && origHeight >= 100) {
00390                         // resize it to the smallest value (ie 100px for the shorter side)
00391                         if (origWidth >= origHeight) {
00392                             resizedWeight = FImage(Size2D(100*origWidth/origHeight, 100));
00393                             resizedLab = ProcessImageType(Size2D(100*origWidth/origHeight, 100));
00394                         } else {
00395                             resizedWeight = FImage(Size2D(100, 100*origHeight/origWidth));
00396                             resizedLab = ProcessImageType(Size2D(100, 100*origHeight/origWidth));
00397                         }
00398                     } else {
00399                         // don't scale at all
00400                         // just copy weights as if no scaling seting was applied
00401                         goto DONTSCALE;
00402                     }
00403                     
00404                     // No interpolation – only for testing
00405                     resizeImageNoInterpolation(srcImageRange(*weights[i]), destImageRange(resizedWeight));
00406                     resizeImageNoInterpolation(srcImageRange(*backupLab[i]), destImageRange(resizedLab));
00407                     
00408                     FImagePtr tmp(new FImage(resizedWeight));
00409                     prevWeights.push_back(tmp);
00410                     processImages[i] = ProcessImageTypePtr(new ProcessImageType(resizedLab));
00411                     weights[i] = FImagePtr(new FImage(resizedWeight));
00412                 } else {
00413                     DONTSCALE:
00414                     FImagePtr tmp(new FImage(*weights[i]));
00415                     prevWeights.push_back(tmp);
00416                 }
00417             }
00418             
00419             // loop through all images
00420             for (unsigned int i = 0; i < processImages.size(); i++) {
00421                 if (verbosity > 1)
00422                     std::cout << "processing image " << i+1 << std::endl;
00423                 
00424                 // vector storing pixel data
00425                 ProcessImagePixelType X;
00426                 // sums for eq. 6
00427                 double wpqssum = 0;
00428                 double wpqsKhsum = 0;
00429                 // image size
00430                 const int width = processImages[i]->width();
00431                 const int height = processImages[i]->height();
00432 
00433                 // iterator to the upper left corner
00434                 ProcessImageTraverser sy = processImages[i]->upperLeft();
00435                 // iterator to the lower right corner
00436                 ProcessImageTraverser send = processImages[i]->lowerRight();
00437                 // iterator to the weight image left corner
00438                 FImage::traverser wy = weights[i]->upperLeft();
00439                 // loop through the row
00440                 for (int y=0; sy.y != send.y; ++sy.y, ++wy.y, ++y) {
00441                     // iterator to the source (L*a*b image)
00442                     ProcessImageTraverser sx = sy;
00443                     // iterator to the weight
00444                     FImage::traverser wx = wy;
00445                     // loop over the pixels
00446                     for (int x=0; sx.x != send.x; ++sx.x, ++wx.x, ++x) {
00447                         if (verbosity > 2)
00448                             std::cout << "processing pixel (" << x+1 << "," << y+1 << ")" << std::endl;
00449                         // set pixel vector
00450                         X = *sx;
00451                         
00452                         // loop through all layers
00453                         for (unsigned int j = 0; j < processImages.size(); j++) {
00454                             if (verbosity > 2)
00455                                 std::cout << "processing layer " << j << std::endl;
00456                             
00457                             // iterator to the neighbour
00458                             ProcessImageTraverser neighby = processImages[j]->upperLeft();
00459                             // iterator to the weight
00460                             FImage::traverser weighty = prevWeights[j]->upperLeft();
00461                             // pixel offset
00462                             int ndy = -NEIGHB_DIST;
00463                             // move them to the upper bound
00464                             // find the best upper bound
00465                             if (y-NEIGHB_DIST < 0) {
00466                                 ndy = -y;
00467                             }
00468                             else {
00469                                 neighby.y += y-NEIGHB_DIST;
00470                                 weighty.y += y-NEIGHB_DIST;
00471                             }
00472                             
00473                             // iterate through neighbourhoods y axis
00474                             int maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1);
00475                             for (; ndy <= maxDisty; ++neighby.y, ++weighty.y, ++ndy) {
00476                                 ProcessImageTraverser neighbx = neighby;
00477                                 FImage::traverser weightx = weighty;
00478                                 // pixel offset
00479                                 int ndx = -NEIGHB_DIST;
00480                                 // move them to the upper bound
00481                                 // find the best upper bound
00482                                 if (x-NEIGHB_DIST < 0) {
00483                                     ndx = -x;
00484                                 }
00485                                 else {
00486                                     neighbx.x += x-NEIGHB_DIST;
00487                                     weightx.x += x-NEIGHB_DIST;
00488                                 }
00489                                 // iterate through neighbourhoods x axis
00490                                 int maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1);
00491                                 for (; ndx <= maxDistx; ++neighbx.x, ++weightx.x, ++ndx) {
00492                                     if (verbosity > 3)
00493                                         std::cout << "(" << ndx << "," << ndy << ")";
00494                                     // now we can construct pixel vector
00495                                     // should omit the middle pixel, ie use only neighbours
00496                                     if (ndx != 0 || ndy != 0) {
00497                                         wpqsKhsum += (*weightx * Kh(X-(*neighbx)));
00498                                         wpqssum += *weightx;
00499                                     }
00500                                     
00501                                     maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1);
00502                                 }
00503                                 if (verbosity > 3)
00504                                     std::cout << std::endl;
00505                                 
00506                                 maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1);
00507                             }
00508                         }
00509                         
00510                         if (verbosity > 2)
00511                             std::cout << "computing new weight" << std::endl;
00512                         // compute probability and set weight
00513                         //std::cout << "P=" << (float) wpqsKhsum/wpqssum << std::endl;
00514                         if (flags & ADV_ONLYP)
00515                             *wx = (float) wpqsKhsum/wpqssum;
00516                         else
00517                             *wx *= (float) wpqsKhsum/wpqssum;
00518                         if (maxWeight < *wx)
00519                             maxWeight = *wx;
00520                         wpqsKhsum = wpqssum = 0;
00521                         
00522                     }
00523                 }
00524             }
00525         }
00526         
00527         if (verbosity > 1)
00528                 std::cout << "normalizing weights" << std::endl;
00529         double factor = 255.0f/maxWeight;
00530         for (unsigned int i=0; i<weights.size(); ++i) {
00531             transformImage(srcImageRange(*(weights[i])), destImage(*(weights[i])), NormalizeFunctor<float>(factor));
00532         }
00533         return weights;
00534     }
00535 }
00536 
00537 #endif /* KHAN_H_ */

Generated on 28 Jul 2015 for Hugintrunk by  doxygen 1.4.7