微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

有没有更有效的方法来比较两个列表的项目并找到 leftOuterItems、rightOuterItems 和 matchingItems?

如何解决有没有更有效的方法来比较两个列表的项目并找到 leftOuterItems、rightOuterItems 和 matchingItems?

我们经常需要一种方法来比较两个列表的项,找出哪些项只存在于ListA(leftOuterItems)中,哪些只存在于ListB(rightOuterItems)中以及它们的共同项(matchedItems)...

我最终得到了两个解决方案,如下所示:

一种方式是对列表进行排序并一个一个地迭代(当集合由于排序而有太多项时会有性能损失),另一种方式是使用字典和散列(比第一种慢当集合有几个项目时的方式 - 由于内存分配等)

*还要记住,我想比较两个对象列表,例如两个 Person 类列表(不仅仅是基元)。这就是我创建通用扩展方法的原因

那么,您有什么更好的建议吗?

先谢谢你!

program output

class Program
{
    static void Main(string[] args)
    {
        var fewItemsList1 = new[] { 1,4,2,3,7,6,9,5 };
        var fewItemsList2 = new[] { 15,5,14,13,12,8,11,10 };
        Run(100_000,fewItemsList1,fewItemsList2);

        var manyItemsList1 = Enumerable.Range(0,100_000).ToArray();
        var manyItemsList2 = Enumerable.Range(50000,150_000).ToArray();
        Run(1000,manyItemsList1,manyItemsList2);

        Console.WriteLine("Hello World!");
        Console.Read();
    }

    private static void Run(int count,int[] l1,int[] l2)
    {
        var sw = Stopwatch.StartNew();
        for (int i = 0; i < count; i++)
            l1.OrderedCompare(l2,x => x,out int[] leftOuterItems,out int[] rightOuterItems,out (int,int)[] matchedItems);
        sw.Stop();

        Console.WriteLine($"OrderedCompare for {count} iterations with L1 items:{l1.Count()} and L2 items:{l2.Count()} took {sw.Elapsed}");

        sw.Restart();
        for (int i = 0; i < count; i++)
            l1.HashedCompare(l2,out int[] leftOuterItems2,out int[] rightOuterItems2,int)[] matchedItems2);
        Console.WriteLine($"HashedCompare for {count} with L1 items:{l1.Count()} and L2 items:{l2.Count()} iterations took {sw.Elapsed}");
    }
}

public static class Extensions
{
    public static void OrderedCompare<T1,T2,TKey>(
        this IEnumerable<T1> source,IEnumerable<T2> target,Func<T1,TKey> sourceKeyGetter,Func<T2,TKey> targetKeyGetter,out T1[] leftOuterItems,out T2[] rightOuterItems,out (T1,T2)[] matchedItems) where TKey : IComparable<TKey>
    {
        var leftOuterItemsList = new List<T1>();
        var rightOuterItemsList = new List<T2>();
        var matchedItemsList = new List<(T1,T2)>();
        source = source.OrderBy(x => sourceKeyGetter(x)).ToArray();
        target = target.OrderBy(x => targetKeyGetter(x)).ToArray();

        bool reverseCompare = false;
        int i = 0,j = 0,sourcZeroBasedCount = source.Count() - 1,targetZeroBaseCount = target.Count() - 1;
        while (true)
        {
            var end = i == sourcZeroBasedCount && j == targetZeroBaseCount;
            var sourceItem = source.ElementAt(i);
            var targetItem = target.ElementAt(j);
            var sourceKey = sourceKeyGetter(sourceItem);
            var targetKey = targetKeyGetter(targetItem);

            int diff = reverseCompare ? targetKey.Compareto(sourceKey) : sourceKey.Compareto(targetKey);
            reverseCompare = i == sourcZeroBasedCount || j == targetZeroBaseCount;
            switch (diff)
            {
                case -1:
                    leftOuterItemsList.Add(sourceItem);
                    i = i < sourcZeroBasedCount ? i + 1 : i;
                    break;
                case 0:
                    matchedItemsList.Add((sourceItem,targetItem));
                    i = i < sourcZeroBasedCount ? i + 1 : i;
                    j = j < targetZeroBaseCount ? j + 1 : j;
                    break;
                case 1:
                    rightOuterItemsList.Add(targetItem);
                    j = j < targetZeroBaseCount ? j + 1 : j;
                    break;
            }

            if (end)
                break;
        }

        leftOuterItems = leftOuterItemsList.ToArray();
        rightOuterItems = rightOuterItemsList.ToArray();
        matchedItems = matchedItemsList.ToArray();
    }

    public static void HashedCompare<T1,T2)[] matchedItems) where TKey : IComparable<TKey>
    {
        var sourceDic = source.ToDictionary(x => sourceKeyGetter(x));
        var targetDic = target.ToDictionary(x => targetKeyGetter(x));

        var leftOuterKeys = sourceDic.Keys.Except(targetDic.Keys).ToArray();
        var rightOuterKeys = targetDic.Keys.Except(sourceDic.Keys).ToArray();
        var matchedKeys = sourceDic.Keys.Concat(targetDic.Keys).Except(leftOuterKeys.Concat(rightOuterKeys)).ToArray();

        leftOuterItems = leftOuterKeys.Select(key => sourceDic[key]).ToArray();
        rightOuterItems = rightOuterKeys.Select(key => targetDic[key]).ToArray();
        matchedItems = matchedKeys.Select(key => (sourceDic[key],targetDic[key])).ToArray();
    }
}

解决方法

HashedCompare() 中的大部分低效率都归结为在字典中进行了不必要的枚举和查找。如果您以命令式风格编写算法,则可以避免所有这些,并且代码在我看来更易于遵循:

我赞同@00110001 的建议,即您应该使用适当的基准测试框架,因为不同实现之间的差异具有相同的复杂性。

public static void HashedCompare<T1,T2,TKey>(
    this IEnumerable<T1> source,IEnumerable<T2> target,Func<T1,TKey> sourceKeyGetter,Func<T2,TKey> targetKeyGetter,out List<T1> leftOuterItems,out List<T2> rightOuterItems,out List<(T1,T2)> matchedItems) where TKey : IEquatable<TKey>
{
    var sourceItems = source.ToDictionary(x => sourceKeyGetter(x));
    var targetItems = target.ToDictionary(x => targetKeyGetter(x));

    matchedItems = new List<(T1,T2)>();
    leftOuterItems = new List<T1>();
    rightOuterItems = new List<T2>();
    foreach (var sourceItem in sourceItems)
    {
        if (targetItems.TryGetValue(sourceItem.Key,out var targetItem))
            matchedItems.Add((sourceItem.Value,targetItem));
        else
            leftOuterItems.Add(sourceItem.Value);
    }

    foreach (var targetItem in targetItems)
    {
        if (!sourceItems.ContainsKey(targetItem.Key))
            rightOuterItems.Add(targetItem.Value);
    }
}
,

您可以使用 ExceptIntersect,它们都适用于 Sets(轻量级哈希集)并且可以O(n) 线性时间复杂度

var list1 = new[] { 1,4,2,3,7,6,9,5 };
var List2 = new[] { 15,5,14,13,12,8,11,10 };
Console.WriteLine(string.Join(",",list1.Except(List2)));
Console.WriteLine(string.Join(",List2.Except(list1)));
Console.WriteLine(string.Join(",List2.Intersect(list1)));

输出

1,3
15,10
5,9

至于它是更快还是更慢,您必须基准,但我的直觉是它们会更高效、更快。


关于基准测试的话题

enter image description here

使用像BenchmarkDotNet这样可靠的测试框架,如果你自己动手,你很可能会以无限的方式得到错误的结果

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。