khan.h

Go to the documentation of this file.
00001 
00021 #ifndef KHAN_H_
00022 #define KHAN_H_
00023 
00024 #include "deghosting.h"
00025 // for AlgTinyVector, NormalizeFunctor and LogarithmFunctor
00026 #include "support.h"
00027 #include "algtinyvector.h"
00028 
00029 // needed for RGB2Lab
00030 #include <vigra/imageinfo.hxx>
00031 #include <vigra/transformimage.hxx>
00032 #include <vigra/colorconversions.hxx>
00033 
00034 // for resampleImage
00035 #include <vigra/resizeimage.hxx>
00036 
00037 // for RGBvalue used in hat function
00038 #include <vigra/rgbvalue.hxx>
00039 // for importImage und importImageAlpha
00040 #include <vigra_ext/impexalpha.hxx>
00041 
00042 // needed for Kh()
00043 #define PI 3.14159265358979323846
00044 
00045 // number of pixels to look at in all directions
00046 // ie. 1 for neighbourhood of size 3x3, 2 for 5x5 etc.
00047 #define NEIGHB_DIST 1
00048 
00049 #if defined _WIN32
00050     #define snprintf _snprintf
00051 #endif
00052 // define for use atan based kernel function
00053 // leave undefined for gaussian normal distribution function
00054 //#define ATAN_KH
00055 
00056 namespace deghosting
00057 {
00058     template <class PixelType>
00059     class ImageTypes {
00060         public:
00061             typedef vigra::FImage ImageType;
00062             typedef std::shared_ptr<ImageType> ImagePtr;
00063             typedef vigra::BasicImage<float> ProcessImageType;
00064             typedef std::shared_ptr<ProcessImageType> ProcessImageTypePtr;
00065     };
00066 
00067     template <class PixelType>
00068     class ImageTypes<vigra::RGBValue<PixelType> > {
00069         public:
00070             typedef vigra::FRGBImage ImageType;
00071             typedef std::shared_ptr<ImageType> ImagePtr;
00072             typedef vigra::BasicImage<vigra::AlgTinyVector<float, 3> > ProcessImageType;
00073             typedef std::shared_ptr<ProcessImageType> ProcessImageTypePtr;
00074     };
00075 
00076     template <class PixelType>
00077     class Khan : public Deghosting, private ImageTypes<PixelType>
00078     {
00079         public:
00080             Khan(std::vector<std::string>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity);
00081             Khan(std::vector<vigra::ImageImportInfo>& inputFiles, const uint16_t flags, const uint16_t debugFlags, int iterations, double sigma, int verbosity);
00082             std::vector<FImagePtr> createWeightMasks();
00083             ~Khan() {}
00084         protected:
00085             typedef typename ImageTypes<PixelType>::ImageType ImageType;
00086             typedef typename ImageTypes<PixelType>::ProcessImageType ProcessImageType;
00087             typedef typename ImageTypes<PixelType>::ProcessImageType::traverser ProcessImageTraverser;
00088             typedef typename ImageTypes<PixelType>::ProcessImageType::PixelType ProcessImagePixelType;
00089             typedef typename ImageTypes<PixelType>::ProcessImageTypePtr ProcessImageTypePtr;
00090             typedef typename vigra::NumericTraits<PixelType>::isScalar srcIsScalar;
00091             
00092             // Kh() things
00093             // (2*pi)^(1/2)
00094             double PIPOW;
00095             // 1/Kh denominator
00096             double denom;
00097             // sigma in gauusian density function
00098             double sigma;
00099             
00100             // other necessary stuff
00101             std::vector<ProcessImageTypePtr> processImages;
00102             std::vector<FImagePtr> weights;
00103             
00107             void setSigma(double sigma);
00108             
00113             //void linearizeRGB(std::string, FRGBImage* pInputImg);
00114             
00118             inline float Kh(ProcessImagePixelType x);
00119             
00124             void convertImage(ImageType * in, ProcessImageTypePtr & out, vigra::VigraFalseType);
00125             void convertImage(ImageType * in, ProcessImageTypePtr & out, vigra::VigraTrueType);
00126             
00129             void importRGBImage(vigra::ImageImportInfo & info, ImageType * img, vigra::VigraFalseType);
00130             void importRGBImage(vigra::ImageImportInfo & info, ImageType * img, vigra::VigraTrueType);
00131             
00136             void preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output);
00137     };
00138     
00139     template <class PixelType>
00140     Khan<PixelType>::Khan(std::vector<std::string>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags,
00141                 int newIterations, double newSigma, int newVerbosity) {
00142         try {
00143             Deghosting::loadImages(newInputFiles);
00144             Deghosting::setFlags(newFlags);
00145             Deghosting::setDebugFlags(newDebugFlags);
00146             Deghosting::setIterationNum(newIterations);
00147             Deghosting::setVerbosity(newVerbosity);
00148             
00149             // I don't know why, but sigma for HDR input have to approximately 10 times smaller
00150             // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed
00151             const char * fileType= vigra::ImageImportInfo(newInputFiles[0].c_str()).getFileType();
00152             if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00153                 setSigma(newSigma/10);
00154             } else {
00155                 setSigma(newSigma);
00156             }
00157             
00158             for (unsigned int i=0; i<5; i++)
00159                 Deghosting::response.push_back(0);
00160             PIPOW = sigma*std::sqrt(2*PI);
00161             denom = 1/PIPOW;
00162         } catch (...) {
00163             throw;
00164         }
00165     }
00166     
00167     template <class PixelType>
00168     Khan<PixelType>::Khan(std::vector<vigra::ImageImportInfo>& newInputFiles, const uint16_t newFlags, const uint16_t newDebugFlags,
00169                 int newIterations, double newSigma, int newVerbosity) {
00170         try {
00171             Deghosting::loadImages(newInputFiles);
00172             Deghosting::setFlags(newFlags);
00173             Deghosting::setDebugFlags(newDebugFlags);
00174             Deghosting::setIterationNum(newIterations);
00175             Deghosting::setVerbosity(newVerbosity);
00176             
00177             // I don't know why, but sigma for HDR input have to approximately 10 times smaller
00178             // FIXME: Maybe it would be better to use different sigma for different images in case both HDR and LDR are mixed
00179             const char * fileType= newInputFiles[0].getFileType();
00180             if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00181                 setSigma(newSigma/10);
00182             } else {
00183                 setSigma(newSigma);
00184             }
00185             
00186             for (unsigned int i=0; i<5; i++)
00187                 Deghosting::response.push_back(0);
00188             PIPOW = sigma*std::sqrt(2*PI);
00189             denom = 1/PIPOW;
00190         } catch (...) {
00191             throw;
00192         }
00193     }
00194     
00195     template <class PixelType>
00196     void Khan<PixelType>::setSigma(double newSigma) {
00197         sigma = newSigma;
00198     }
00199     
00200     template <class PixelType>
00201     float Khan<PixelType>::Kh(ProcessImagePixelType x) {
00202         #ifdef ATAN_KH
00203             // good choice for sigma for this function is around 600
00204             return std::atan(-(x*x)+sigma)/PI + 0.5;
00205         #else
00206             // good choice for sigma for this function is around 30
00207             return (std::exp(-(x*x)/(2*sigma*sigma)) * denom);
00208         #endif
00209     }
00210     
00211     /*void Khan::linearizeRGB(std::string inputFile,FRGBImage *pInputImg) {
00212         HuginBase::SrcPanoImage panoImg(inputFile);
00213         panoImg.setResponseType(HuginBase::SrcPanoImage::RESPONSE_EMOR);
00214         panoImg.setEMoRParams(response);
00215         // response transform functor
00216         HuginBase::Photometric::InvResponseTransform<RGBValue<float>,
00217                                                      RGBValue<float> > invResponse(panoImg);
00218         invResponse.enforceMonotonicity();
00219 
00220         // iterator to the upper left corner
00221         FRGBImage::traverser imgIterSourceY = pInputImg->upperLeft();
00222         // iterator to he lower right corner
00223         FRGBImage::traverser imgIterEnd = pInputImg->lowerRight();
00224         // iterator to the output
00225         FRGBImage::traverser imgIterOut = pInputImg->upperLeft();
00226         // loop through the image
00227         for (int y=1; imgIterSourceY.y != imgIterEnd.y; ++imgIterSourceY.y, ++imgIterOut.y, ++y) {
00228             // iterator to the input
00229             FRGBImage::traverser sx = imgIterSourceY;
00230             // iterator to the output
00231             FRGBImage::traverser dx = imgIterOut;
00232             for (int x=1; sx.x != imgIterEnd.x; ++sx.x, ++dx.x, ++x) {
00233                 // transform image using response
00234                 *dx = vigra_ext::zeroNegative(invResponse(*sx, hugin_utils::FDiff2D(x, y)));
00235             }
00236         }
00237     }*/
00238     
00239     // RGB
00240     template <class PixelType>
00241     void Khan<PixelType>::convertImage(ImageType * in, ProcessImageTypePtr & out, vigra::VigraFalseType) {
00242         vigra::RGB2LabFunctor<float> RGB2Lab;
00243         vigra::transformImage(vigra::srcImageRange(*in), vigra::destImage(*out), RGB2Lab);
00244     }
00245     
00246     // grayscale
00247     template <class PixelType>
00248     void Khan<PixelType>::convertImage(ImageType* in, ProcessImageTypePtr& out, vigra::VigraTrueType) {
00249         vigra::copyImage(srcImageRange(*in), destImage(*out));
00250     }
00251     
00252     // load image and convert it to grayscale
00253     template <class PixelType>
00254     void Khan<PixelType>::importRGBImage(vigra::ImageImportInfo & info, ImageType * img, vigra::VigraTrueType) {
00255         // NOTE: I guess this is not optimal, but it works
00256         vigra::RGBToGrayAccessor<vigra::FRGBImage::PixelType> color2gray;
00257         vigra::FRGBImage tmpImg(info.size());
00258         if (info.numBands() == 4) {
00259             vigra::BImage imgAlpha(info.size());
00260             vigra::importImageAlpha(info, destImage(tmpImg), destImage(imgAlpha));
00261         } else {
00262             vigra::importImage(info, destImage(tmpImg));
00263         }
00264         vigra::transformImage(vigra::srcImageRange(tmpImg, color2gray), vigra::destImage(*img), log(vigra::functor::Arg1() + vigra::functor::Param(1.0f)));
00265     }
00266     
00267     // only load image
00268     template <class PixelType>
00269     void Khan<PixelType>::importRGBImage(vigra::ImageImportInfo & info, ImageType * img, vigra::VigraFalseType) {
00270         if (info.numBands() == 4) {
00271             vigra::BImage imgAlpha(info.size());
00272             vigra::importImageAlpha(info, destImage(*img), destImage(imgAlpha));
00273         } else {
00274             vigra::importImage(info, destImage(*img));
00275         }
00276     }
00277     
00278     template <class PixelType>
00279     void Khan<PixelType>::preprocessImage(unsigned int i, FImagePtr &weight, ProcessImageTypePtr &output) {
00280         vigra::ImageImportInfo imgInfo(inputFiles[i]);
00281         ImageType * pInputImg =  new ImageType(imgInfo.size());
00282         weight = FImagePtr(new vigra::FImage(imgInfo.size()));
00283         output = ProcessImageTypePtr(new ProcessImageType(imgInfo.size()));
00284         
00285         // import image
00286         // NOTE: Maybe alpha can be of some use but I don't
00287         // know about any now
00288         if (imgInfo.isColor()) {
00289             importRGBImage(imgInfo, pInputImg, srcIsScalar());
00290         } else {
00291             importImage(imgInfo, destImage(*pInputImg));
00292         }
00293         
00294         // linearize RGB and convert it to L*a*b image
00295         //linearizeRGB(inputFiles[i], pInputImg);
00296         
00297         // take logarithm or gamma correction if the input images are HDR
00298         // I'm not sure if it's the right way how to
00299         // find out if they are HDR
00300         const char * fileType= imgInfo.getFileType();
00301         if ( (!strcmp(fileType,"TIFF") && strcmp(fileType,"UINT8")) || !strcmp(fileType,"EXR") || !strcmp(fileType,"FLOAT")) {
00302             // use gamma 2.2
00303             if (flags & ADV_GAMMA) {
00304                 // GammaFunctor is only in vigra 1.6 GRRR
00305                 // I have to use BrightnessContrastFunctor
00306                 // TODO: change to the GammaFunctor in the future
00307                 vigra::FindMinMax<float> minmax;
00308                 vigra::inspectImage(vigra::srcImageRange(*pInputImg), minmax);
00309                 vigra::transformImage(vigra::srcImageRange(*pInputImg), vigra::destImage(*pInputImg), vigra::BrightnessContrastFunctor<PixelType>(0.45f, 1.0, minmax.min, minmax.max));
00310             } else {
00311                 // take logarithm
00312                 vigra::transformImage(vigra::srcImageRange(*pInputImg), vigra::destImage(*pInputImg), LogarithmFunctor<PixelType>(1.0));
00313             }
00314         }
00315         
00316         // generate initial weights
00317         vigra::transformImage(vigra::srcImageRange(*pInputImg), vigra::destImage(*weight), HatFunctor<PixelType>());
00318         
00319         convertImage(pInputImg, output, srcIsScalar());
00320         
00321         delete pInputImg;
00322         pInputImg = 0;
00323     }
00324     
00325     template <class PixelType>
00326     std::vector<FImagePtr> Khan<PixelType>::createWeightMasks() {
00327         for (unsigned int i = 0; i < inputFiles.size(); i++) {
00328             FImagePtr weight;
00329             ProcessImageTypePtr processImage;
00330             preprocessImage(i, weight, processImage);
00331             processImages.push_back(processImage);
00332             weights.push_back(weight);
00333             
00334             // save init weights
00335             if (debugFlags & SAVE_INITWEIGHTS) {
00336                 char tmpfn[100];
00337                 snprintf(tmpfn, 99, "init_weights_%u.tiff", i);
00338                 vigra::ImageExportInfo exWeights(tmpfn);
00339                 vigra::exportImage(vigra::srcImageRange(*weight), exWeights.setPixelType("UINT8"));
00340             }
00341         }
00342         
00343         float maxWeight = 0;
00344         // image size
00345         const int origWidth = weights[0]->width();
00346         const int origHeight = weights[0]->height();
00347         
00348         // if we doing scaling, we have to backup L*a*b images of original size
00349         std::vector<ProcessImageTypePtr> backupLab;
00350         if (flags & ADV_MULTIRES) {
00351             for (unsigned int i = 0; i < processImages.size(); i++) {
00352                 // backup original size L*a*b
00353                 backupLab.push_back(processImages[i]);
00354             }
00355         }
00356         
00357         if (verbosity > 0)
00358             std::cout << "Running khan algorithm" << std::endl;
00359         // and we can run khan algorithm
00360         // khan iteration
00361         for (int it = 0; it < iterations; it++) {
00362             if (verbosity > 0)
00363                 std::cout << "iteration " << it+1 << std::endl;
00364             // copy weights from previous iteration
00365             if (verbosity > 1)
00366                 std::cout << "copying weights from previous iteration" << std::endl;
00367             
00368             std::vector<FImagePtr> prevWeights;
00369             for (unsigned int i = 0; i < weights.size(); i++) {
00370                 // scale weights to the requied size
00371                 if (flags & ADV_MULTIRES) {
00372                     // it would be better to use resampleImage, but it seems to be present only in VIGRA 1.6
00373                     // so let's use some of the resizeImageINTERPOLATION() functions
00374                     
00375                     // compute width
00376                     int resized_width = origWidth / ( iterations/(it+1) );
00377                     //compute height
00378                     int resized_height = origHeight / ( iterations/(it+1) );
00379                     // destination images
00380                     vigra::FImage resizedWeight;
00381                     ProcessImageType resizedLab;
00382                     // it's not worthy to scale to less than 100px per side
00383                     if (resized_width > 100 && resized_height > 100) {
00384                         // create destination image of desired size
00385                         resizedWeight = vigra::FImage(vigra::Size2D(resized_width,resized_height));
00386                         resizedLab = ProcessImageType(vigra::Size2D(resized_width, resized_height));
00387                     } else if (origWidth >= 100 && origHeight >= 100) {
00388                         // resize it to the smallest value (ie 100px for the shorter side)
00389                         if (origWidth >= origHeight) {
00390                             resizedWeight = vigra::FImage(vigra::Size2D(100 * origWidth / origHeight, 100));
00391                             resizedLab = ProcessImageType(vigra::Size2D(100 * origWidth / origHeight, 100));
00392                         } else {
00393                             resizedWeight = vigra::FImage(vigra::Size2D(100, 100 * origHeight / origWidth));
00394                             resizedLab = ProcessImageType(vigra::Size2D(100, 100 * origHeight / origWidth));
00395                         }
00396                     } else {
00397                         // don't scale at all
00398                         // just copy weights as if no scaling seting was applied
00399                         goto DONTSCALE;
00400                     }
00401                     
00402                     // No interpolation – only for testing
00403                     resizeImageNoInterpolation(srcImageRange(*weights[i]), destImageRange(resizedWeight));
00404                     resizeImageNoInterpolation(srcImageRange(*backupLab[i]), destImageRange(resizedLab));
00405                     
00406                     FImagePtr tmp(new vigra::FImage(resizedWeight));
00407                     prevWeights.push_back(tmp);
00408                     processImages[i] = ProcessImageTypePtr(new ProcessImageType(resizedLab));
00409                     weights[i] = FImagePtr(new vigra::FImage(resizedWeight));
00410                 } else {
00411                     DONTSCALE:
00412                     FImagePtr tmp(new vigra::FImage(*weights[i]));
00413                     prevWeights.push_back(tmp);
00414                 }
00415             }
00416             
00417             // loop through all images
00418             for (unsigned int i = 0; i < processImages.size(); i++) {
00419                 if (verbosity > 1)
00420                     std::cout << "processing image " << i+1 << std::endl;
00421                 
00422                 // vector storing pixel data
00423                 ProcessImagePixelType X;
00424                 // sums for eq. 6
00425                 double wpqssum = 0;
00426                 double wpqsKhsum = 0;
00427                 // image size
00428                 const int width = processImages[i]->width();
00429                 const int height = processImages[i]->height();
00430 
00431                 // iterator to the upper left corner
00432                 ProcessImageTraverser sy = processImages[i]->upperLeft();
00433                 // iterator to the lower right corner
00434                 ProcessImageTraverser send = processImages[i]->lowerRight();
00435                 // iterator to the weight image left corner
00436                 vigra::FImage::traverser wy = weights[i]->upperLeft();
00437                 // loop through the row
00438                 for (int y=0; sy.y != send.y; ++sy.y, ++wy.y, ++y) {
00439                     // iterator to the source (L*a*b image)
00440                     ProcessImageTraverser sx = sy;
00441                     // iterator to the weight
00442                     vigra::FImage::traverser wx = wy;
00443                     // loop over the pixels
00444                     for (int x=0; sx.x != send.x; ++sx.x, ++wx.x, ++x) {
00445                         if (verbosity > 2)
00446                             std::cout << "processing pixel (" << x+1 << "," << y+1 << ")" << std::endl;
00447                         // set pixel vector
00448                         X = *sx;
00449                         
00450                         // loop through all layers
00451                         for (unsigned int j = 0; j < processImages.size(); j++) {
00452                             if (verbosity > 2)
00453                                 std::cout << "processing layer " << j << std::endl;
00454                             
00455                             // iterator to the neighbour
00456                             ProcessImageTraverser neighby = processImages[j]->upperLeft();
00457                             // iterator to the weight
00458                             vigra::FImage::traverser weighty = prevWeights[j]->upperLeft();
00459                             // pixel offset
00460                             int ndy = -NEIGHB_DIST;
00461                             // move them to the upper bound
00462                             // find the best upper bound
00463                             if (y-NEIGHB_DIST < 0) {
00464                                 ndy = -y;
00465                             }
00466                             else {
00467                                 neighby.y += y-NEIGHB_DIST;
00468                                 weighty.y += y-NEIGHB_DIST;
00469                             }
00470                             
00471                             // iterate through neighbourhoods y axis
00472                             int maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1);
00473                             for (; ndy <= maxDisty; ++neighby.y, ++weighty.y, ++ndy) {
00474                                 ProcessImageTraverser neighbx = neighby;
00475                                 vigra::FImage::traverser weightx = weighty;
00476                                 // pixel offset
00477                                 int ndx = -NEIGHB_DIST;
00478                                 // move them to the upper bound
00479                                 // find the best upper bound
00480                                 if (x-NEIGHB_DIST < 0) {
00481                                     ndx = -x;
00482                                 }
00483                                 else {
00484                                     neighbx.x += x-NEIGHB_DIST;
00485                                     weightx.x += x-NEIGHB_DIST;
00486                                 }
00487                                 // iterate through neighbourhoods x axis
00488                                 int maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1);
00489                                 for (; ndx <= maxDistx; ++neighbx.x, ++weightx.x, ++ndx) {
00490                                     if (verbosity > 3)
00491                                         std::cout << "(" << ndx << "," << ndy << ")";
00492                                     // now we can construct pixel vector
00493                                     // should omit the middle pixel, ie use only neighbours
00494                                     if (ndx != 0 || ndy != 0) {
00495                                         wpqsKhsum += (*weightx * Kh(X-(*neighbx)));
00496                                         wpqssum += *weightx;
00497                                     }
00498                                     
00499                                     maxDistx = (width - x) > NEIGHB_DIST ? NEIGHB_DIST : (width - x-1);
00500                                 }
00501                                 if (verbosity > 3)
00502                                     std::cout << std::endl;
00503                                 
00504                                 maxDisty = (height - y) > NEIGHB_DIST ? NEIGHB_DIST : (height - y-1);
00505                             }
00506                         }
00507                         
00508                         if (verbosity > 2)
00509                             std::cout << "computing new weight" << std::endl;
00510                         // compute probability and set weight
00511                         //std::cout << "P=" << (float) wpqsKhsum/wpqssum << std::endl;
00512                         if (flags & ADV_ONLYP)
00513                             *wx = (float) wpqsKhsum/wpqssum;
00514                         else
00515                             *wx *= (float) wpqsKhsum/wpqssum;
00516                         if (maxWeight < *wx)
00517                             maxWeight = *wx;
00518                         wpqsKhsum = wpqssum = 0;
00519                         
00520                     }
00521                 }
00522             }
00523         }
00524         
00525         if (verbosity > 1)
00526                 std::cout << "normalizing weights" << std::endl;
00527         double factor = 255.0f/maxWeight;
00528         for (unsigned int i=0; i<weights.size(); ++i) {
00529             transformImage(srcImageRange(*(weights[i])), destImage(*(weights[i])), NormalizeFunctor<float>(factor));
00530         }
00531         return weights;
00532     }
00533 }
00534 
00535 #endif /* KHAN_H_ */

Generated on 27 Jul 2016 for Hugintrunk by  doxygen 1.4.7