微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

像C#HashSet和Dictionary这样的快速C容器?

我在C#中经常使用HashSet和Dictionary,发现它们非常快……

我已经尝试过使用std :: map和std :: hash_map,并且在比较中发现它们非常慢.这听起来像预期的行为吗?在使用std :: hash_map时,我可能做错了吗?

或者,那里有更好的C哈希容器吗?

我正在散列int32s,通常大约有100,000个.

更新:我在C#和C中创建了一个repro.它进行了两次试验,它们在C#中占用19ms和13ms,在C中占用大约11,000ms.我的C代码肯定有问题:)

(两者都作为发布版本运行,都是控制台应用程序)

C#输出

Found 511 values in the intersection,in 19 ms
Found 508 values in the intersection,in 13 ms

C输出

Found 308 values in the intersection,in 11764.7ms
Found 316 values in the intersection,in 11742.8ms

C输出(使用stdext :: hash_map而不是std :: map)

Found 300 values in the intersection,in 383.552ms
Found 306 values in the intersection,in 2277.02ms

C输出(使用stdext :: hash_map,发布x64版本)

Found 292 values in the intersection,in 1037.67ms
Found 302 values in the intersection,in 3663.71ms

笔记:

> Set2没有像我想要的那样在C中填充,我期望它与Set1有50%的交集(就像在C#中那样),但由于某些原因,我不得不将我的随机数乘以10甚至得到它们部分不相交

C#:

static void Main(string[] args)
    {
        int start = DateTime.Now.Millisecond;
        int intersectionSize = runIntersectiontest();
        int duration = DateTime.Now.Millisecond - start;

        Console.WriteLine(String.Format("Found {0} values in the intersection,in {1} ms",intersectionSize,duration));

        start = DateTime.Now.Millisecond;
        intersectionSize = runIntersectiontest();
        duration = DateTime.Now.Millisecond - start;

        Console.WriteLine(String.Format("Found {0} values in the intersection,duration));

        Console.ReadKey();
    }

    static int runIntersectiontest()
    {
        Random random = new Random(DateTime.Now.Millisecond);

        Dictionary<int,int> theMap = new Dictionary<int,int>();

        List<int> set1 = new List<int>();
        List<int> set2 = new List<int>();

        // Create 100,000 values for set1
        for ( int i = 0; i < 100000; i++ )
        {
            int value = 1000000000 + i;
            set1.Add(value);
        }

        // Create 1,000 values for set2
        for ( int i = 0; i < 1000; i++ )
        {
            int value = 1000000000 + (random.Next() % 200000 + 1);
            set2.Add(value);
        }

        // Now intersect the two sets by populating the map
        foreach( int value in set1 )
        {
            theMap[value] = 1;
        }

        int intersectionSize = 0;

        foreach ( int value in set2 )
        {
            int count;
            if ( theMap.TryGetValue(value,out count ) )
            {
                intersectionSize++;
                theMap[value] = 2;
            }
        }

        return intersectionSize;
    }

C :

int runIntersectiontest()
{
    std::map<int,int> theMap;

    vector<int> set1;
    vector<int> set2;

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Now intersect the two sets by populating the map
    for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
    {
        int value = *iterator;

        map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int _tmain(int argc,_TCHAR* argv[])
{
    srand ( time(NULL) );

    Timer timer;
    int intersectionSize = runIntersectiontest();
    timer.Stop();

    cout << "Found " << intersectionSize << " values in the intersection,in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    intersectionSize = runIntersectiontest();
    timer.Stop();

    cout << "Found " << intersectionSize << " values in the intersection,in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

解决方法

Hash_map和hash_set是非标准的,unordered_mapunordered_set最有可能很快成为标准版本.没有复制器,我认为这不会有太大的影响.在引擎盖下,它们是相同的数据结构,因此它们应具有相似的性能.

我在MS Visual Studio 2008 v9.0.30729.1下编译了提供的样本,如Visual C – > Win32 – >控制台应用程序(虽然我推出了自己的Timer类,因为我不确定你使用的是什么).在调试下,我得到1000毫秒的时间,但在发布时的编译是50毫秒.

#include <vector>
#include <iostream>
#include <map>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include <windows.h>

typedef struct {
    LARGE_INTEGER start;
    LARGE_INTEGER stop;
} stopWatch;

class CStopWatch {

private:
    stopWatch timer;
    LARGE_INTEGER frequency;
    double LIToSecs( LARGE_INTEGER & L);
public:
    CStopWatch();
    void startTimer( );
    void stopTimer( );
    double getelapsedtime();
};

double CStopWatch::LIToSecs( LARGE_INTEGER & L) {
    return ((double)L.QuadPart /(double)frequency.QuadPart) ;
}

CStopWatch::CStopWatch(){
    timer.start.QuadPart=0;
    timer.stop.QuadPart=0;
    QueryPerformanceFrequency( &frequency ) ;
}

void CStopWatch::startTimer( ) {
    QueryPerformanceCounter(&timer.start) ;
}

void CStopWatch::stopTimer( ) {
    QueryPerformanceCounter(&timer.stop) ;
}

double CStopWatch::getelapsedtime() {
    LARGE_INTEGER time;
    time.QuadPart = timer.stop.QuadPart - timer.start.QuadPart;
    return LIToSecs( time) ;
}

using namespace std;
int runIntersectiontest()
{
    std::map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
                theMap[value] = 2;

                intersectionSize++;
        }
    }

    return intersectionSize;

}

int main(int argc,char* argv[])
{
    srand ( time(NULL) );
    int tests = 2;
    while(tests--){
      CStopWatch timer;
      timer.startTimer();
      int intersectionSize = runIntersectiontest();
      timer.stopTimer();

      cout << "Found " << intersectionSize << " values in the intersection,in " << timer.getelapsedtime() << "s\r\n";
    }

    getchar();

    return 0;
}

(我会尝试使用unordered_map,但我的版本没有它).我怀疑你的C设置有问题.

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐