微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

OpenCV 4.5.1 与 3.4.2 的性能差异

如何解决OpenCV 4.5.1 与 3.4.2 的性能差异

使用不同版本的 opencv 有一些非常奇怪的结果。我看到 cv::calcHist 对于 3 通道 (RGB) 蒙版图像的性能差异很大(不使用 ipp,因为它只支持单通道)。 使用 3.4.2 平均花费 15 毫秒,而使用 4.5.1 花费 0.7 毫秒。您可能会怀疑 opencv 中 histogram.cpp 代码性能有所改进,但并未进行太多更改。

所以我基本上将 cv::calcHist 4.5.1 代码复制到下面的测试应用程序中,除了 Mat 实现代码删除了不必要的英特尔 ipp 代码)。

做了一些性能分析,它说从 4.5.1 分配的内存基本上 cv::calcHist 更快。

当然,Mat 分配没有太大区别。

为了测试这个,我基本上用 4.5.1 和 3.5.2 在下面构建并比较性能

// HistogramBenchmark.cpp : This file contains the 'main' function. Program execution begins and ends there.
//

#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/core.hpp>
#include <iomanip>
#include <chrono>
#include <opencv2/highgui.hpp>



#define BATCH_SIZE 30
#define WIDTH 164
#define HEIGHT 196

void mycalcHist(const cv::Mat* images,int nimages,const int* channels,cv::InputArray _mask,cv::OutputArray _hist,int dims,const int* histSize,const float** ranges,bool uniform,bool accumulate);

void
mycalcHist_8u(std::vector<uchar*>& _ptrs,const std::vector<int>& _deltas,cv::Size imsize,cv::Mat& hist,const float** _ranges,const double* _uniranges,bool uniform);

typedef std::chrono::duration<double,std::chrono::milliseconds::period> Ms;

void testVersions();

int main()
{
    //testVersions();
    int channels[] = { 0,1,2 };
    float histRanges[] = { 0,256 };
    int histSize[] = { 10,10,10 };
    const float* ranges[] = { histRanges,histRanges,histRanges };

    std::vector<cv::Mat> crops;
    std::vector<cv::Mat> masks;
    cv::Mat histogram;

    for (int i = 0; i < BATCH_SIZE; i++)
        masks.push_back(cv::Mat());

    for (int i = 0; i < BATCH_SIZE; i++)
        crops.push_back(cv::Mat(HEIGHT,WIDTH,CV_8UC3) * 255);

    // warm up 
    for (int i = 0; i < 20; i++)
    {
        for (int j = 0; j < BATCH_SIZE; j++)
        mycalcHist(&crops[i],channels,masks[i],histogram,3,histSize,ranges,true,false);
    }

    std::chrono::steady_clock::duration latencySum{ 0 };
    unsigned latencySamplesNum = 0;
    std::ostringstream latencyStream;

    // warm up 
    for (int i = 0; i < 200; i++)
    {
        std::chrono::steady_clock::time_point t0 = std::chrono::steady_clock::Now();

        for (int j = 0; j < BATCH_SIZE; j++)
            mycalcHist(&crops[j],masks[j],false);

        latencySum += std::chrono::steady_clock::Now() - t0;
        latencySamplesNum += 1;
    }

    latencyStream.str("");
    latencyStream << std::fixed << std::setprecision(1)
        << (std::chrono::duration_cast<Ms>(latencySum) / latencySamplesNum).count() << " ms \n" << latencySamplesNum;
    std::cout << "Mean pipeline latency: " << latencyStream.str() << '\n';

    return 0;
}

void testVersions()
{
    int channels[] = { 0,histRanges };

    auto img1 = cv::imread(R"(C:\Users\christopher.eviParke\Documents\Configuration\IPU1\images\Reference\1\132571809612622068_Hist3_Fac2.04.bmp)");
    cv::Mat img1_hist;
    cv::cvtColor(img1,img1,cv::COLOR_BGR2RGB);
    cv::calcHist(&img1,cv::Mat(),img1_hist,false);

    auto img2 = cv::imread(R"(C:\Users\christopher.eviParke\Documents\Configuration\IPU1\images\Reference\2\132571827343906357_Hist3_Fac1.80.bmp)");
    cv::Mat img2_hist;
    cv::cvtColor(img2,img2,cv::COLOR_BGR2RGB);
    cv::calcHist(&img2,img2_hist,false);

    auto dist = cv::compareHist(img1_hist,cv::HISTCMP_BHATTACHARYYA);

    return;
}

void myhistPrepareImages(const cv::Mat* images,const cv::Mat& mask,std::vector<uchar*>& ptrs,std::vector<int>& deltas,cv::Size& imsize,std::vector<double>& uniranges)
{
    int i,j,c;
    CV_Assert(channels != 0 || nimages == dims);

    imsize = images[0].size();
    int depth = images[0].depth(),esz1 = (int)images[0].elemSize1();
    bool isContinuous = true;

    ptrs.resize(dims + 1);
    deltas.resize((dims + 1) * 2);

    for (i = 0; i < dims; i++)
    {
        if (!channels)
        {
            j = i;
            c = 0;
            CV_Assert(images[j].channels() == 1);
        }
        else
        {
            c = channels[i];
            CV_Assert(c >= 0);
            for (j = 0; j < nimages; c -= images[j].channels(),j++)
                if (c < images[j].channels())
                    break;
            CV_Assert(j < nimages);
        }

        CV_Assert(images[j].size() == imsize && images[j].depth() == depth);
        if (!images[j].isContinuous())
            isContinuous = false;
        ptrs[i] = images[j].data + c * esz1;
        deltas[i * 2] = images[j].channels();
        deltas[i * 2 + 1] = (int)(images[j].step / esz1 - imsize.width * deltas[i * 2]);
    }

    if (!mask.empty())
    {
        CV_Assert(mask.size() == imsize && mask.channels() == 1);
        isContinuous = isContinuous && mask.isContinuous();
        ptrs[dims] = mask.data;
        deltas[dims * 2] = 1;
        deltas[dims * 2 + 1] = (int)(mask.step / mask.elemSize1());
    }

    if (isContinuous)
    {
        imsize.width *= imsize.height;
        imsize.height = 1;
    }

    if (!ranges) // implicit uniform ranges for 8U
    {
        CV_Assert(depth == CV_8U);

        uniranges.resize(dims * 2);
        for (i = 0; i < dims; i++)
        {
            uniranges[i * 2] = histSize[i] / 256.;
            uniranges[i * 2 + 1] = 0;
        }
    }
    else if (uniform)
    {
        uniranges.resize(dims * 2);
        for (i = 0; i < dims; i++)
        {
            CV_Assert(ranges[i] && ranges[i][0] < ranges[i][1]);
            double low = ranges[i][0],high = ranges[i][1];
            double t = histSize[i] / (high - low);
            uniranges[i * 2] = t;
            uniranges[i * 2 + 1] = -t * low;
#if 0  // This should be true by math,but it is not accurate numerically
            CV_Assert(cvFloor(low * uniranges[i * 2] + uniranges[i * 2 + 1]) == 0);
            CV_Assert((high * uniranges[i * 2] + uniranges[i * 2 + 1]) < histSize[i]);
#endif
        }
    }
    else
    {
        for (i = 0; i < dims; i++)
        {
            size_t n = histSize[i];
            for (size_t k = 0; k < n; k++)
                CV_Assert(ranges[i][k] < ranges[i][k + 1]);
        }
    }
}


void mycalcHist(const cv::Mat* images,bool accumulate)
{
   

    const uchar* const histdata = _hist.getMat().ptr();

    if (_hist.empty())
    {
        _hist.create(dims,CV_32F);
    }
    cv::Mat hist = _hist.getMat();

    if (histdata != hist.data)
        accumulate = false;
    
    cv::Mat ihist = hist;
    ihist.flags = (ihist.flags & ~CV_MAT_TYPE_MASK) | CV_32S;

    if (!accumulate)
        hist = cv::Scalar(0.);
    else
        hist.convertTo(ihist,CV_32S);

    std::vector<uchar*> ptrs;
    std::vector<int> deltas;
    std::vector<double> uniranges;
    cv::Size imsize;
    cv::Mat mask = _mask.getMat();
    CV_Assert(mask.empty() || mask.type() == CV_8UC1);
    myhistPrepareImages(images,nimages,mask,dims,hist.size,uniform,ptrs,deltas,imsize,uniranges);
    const double* _uniranges = uniform ? &uniranges[0] : 0;

    int depth = images[0].depth();

    mycalcHist_8u(ptrs,ihist,_uniranges,uniform);

    ihist.convertTo(hist,CV_32F);
}

#define CV_CLAMP_INT(v,vmin,vmax) (v < vmin ? vmin : (vmax < v ? vmax : v))

void
mycalcHistLookupTables_8u(const cv::Mat& hist,const cv::SparseMat& shist,const double* uniranges,bool issparse,std::vector<size_t>& _tab)
{
    static const size_t OUT_OF_RANGE = (size_t)1 << (sizeof(size_t) * 8 - 2);

    const int low = 0,high = 256;
    int i,j;
    _tab.resize((high - low) * dims);
    size_t* tab = &_tab[0];

    if (uniform)
    {
        for (i = 0; i < dims; i++)
        {
            double a = uniranges[i * 2];
            double b = uniranges[i * 2 + 1];
            int sz = !issparse ? hist.size[i] : shist.size(i);
            size_t step = !issparse ? hist.step[i] : 1;

            double v_lo = ranges ? ranges[i][0] : 0;
            double v_hi = ranges ? ranges[i][1] : 256;

            for (j = low; j < high; j++)
            {
                int idx = cvFloor(j * a + b);
                size_t written_idx = OUT_OF_RANGE;
                if (j >= v_lo && j < v_hi)
                {
                    idx = CV_CLAMP_INT(idx,sz - 1);
                    written_idx = idx * step;
                }
                tab[i * (high - low) + j - low] = written_idx;
            }
        }
    }
    else if (ranges)
    {
        for (i = 0; i < dims; i++)
        {
            int limit = std::min(cvCeil(ranges[i][0]),high);
            int idx = -1,sz = !issparse ? hist.size[i] : shist.size(i);
            size_t written_idx = OUT_OF_RANGE;
            size_t step = !issparse ? hist.step[i] : 1;

            for (j = low;;)
            {
                for (; j < limit; j++)
                    tab[i * (high - low) + j - low] = written_idx;

                if ((unsigned)(++idx) < (unsigned)sz)
                {
                    limit = std::min(cvCeil(ranges[i][idx + 1]),high);
                    written_idx = idx * step;
                }
                else
                {
                    for (; j < high; j++)
                        tab[i * (high - low) + j - low] = OUT_OF_RANGE;
                    break;
                }
            }
        }
    }
}

void
mycalcHist_8u(std::vector<uchar*>& _ptrs,bool uniform)
{
    static const size_t OUT_OF_RANGE = (size_t)1 << (sizeof(size_t) * 8 - 2);

    uchar** ptrs = &_ptrs[0];
    const int* deltas = &_deltas[0];
    uchar* H = hist.ptr();
    int x;
    const uchar* mask = _ptrs[dims];
    int mstep = _deltas[dims * 2 + 1];
    std::vector<size_t> _tab;

    mycalcHistLookupTables_8u(hist,cv::SparseMat(),_ranges,false,_tab);
    const size_t* tab = &_tab[0];


    
    int d0 = deltas[0],step0 = deltas[1],d1 = deltas[2],step1 = deltas[3],d2 = deltas[4],step2 = deltas[5];

    const uchar* p0 = (const uchar*)ptrs[0];
    const uchar* p1 = (const uchar*)ptrs[1];
    const uchar* p2 = (const uchar*)ptrs[2];

    for (; imsize.height--; p0 += step0,p1 += step1,p2 += step2,mask += mstep)
    {
        if (!mask)
            for (x = 0; x < imsize.width; x++,p0 += d0,p1 += d1,p2 += d2)
            {
                size_t idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512];
                if (idx < OUT_OF_RANGE)
                    ++* (int*)(H + idx);
            }
        else
            for (x = 0; x < imsize.width; x++,p2 += d2)
            {
                size_t idx;
                if (mask[x] && (idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512]) < OUT_OF_RANGE)
                    ++* (int*)(H + idx);
            }
    }
    
    
    
}

解决方法

所以经过大量挖掘后,负责任的代码竟然是这个混蛋:

ihist.convertTo(hist,CV_32F);

在 calcHist 代码中使用

在 4.0 及以上版本中,函数的定义不同。我没有看得太深,但基本上我认为它以某种方式避免了分配。 所以我基本上将 v4 代码 + 辅助函数复制到 v3,我的软件又是实时的.... :)

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。