有没有更有效的方法来比较两个列表的项目并找到 leftOuterItems、rightOuterItems 和 matchingItems？

如何解决有没有更有效的方法来比较两个列表的项目并找到 leftOuterItems、rightOuterItems 和 matchingItems？

我们经常需要一种方法来比较两个列表的项，找出哪些项只存在于ListA（leftOuterItems）中，哪些只存在于ListB（rightOuterItems）中以及它们的共同项（matchedItems）...

我最终得到了两个解决方案，如下所示：

一种方式是对列表进行排序并一个一个地迭代（当集合由于排序而有太多项时会有性能损失），另一种方式是使用字典和散列（比第一种慢当集合有几个项目时的方式 - 由于内存分配等）

*还要记住，我想比较两个对象列表，例如两个 Person 类列表（不仅仅是基元）。这就是我创建通用扩展方法的原因

那么，您有什么更好的建议吗？

先谢谢你！

class Program
{
    static void Main(string[] args)
    {
        var fewItemsList1 = new[] { 1,4,2,3,7,6,9,5 };
        var fewItemsList2 = new[] { 15,5,14,13,12,8,11,10 };
        Run(100_000,fewItemsList1,fewItemsList2);

        var manyItemsList1 = Enumerable.Range(0,100_000).ToArray();
        var manyItemsList2 = Enumerable.Range(50000,150_000).ToArray();
        Run(1000,manyItemsList1,manyItemsList2);

        Console.WriteLine("Hello World!");
        Console.Read();
    }

    private static void Run(int count,int[] l1,int[] l2)
    {
        var sw = Stopwatch.StartNew();
        for (int i = 0; i < count; i++)
            l1.OrderedCompare(l2,x => x,out int[] leftOuterItems,out int[] rightOuterItems,out (int,int)[] matchedItems);
        sw.Stop();

        Console.WriteLine($"OrderedCompare for {count} iterations with L1 items:{l1.Count()} and L2 items:{l2.Count()} took {sw.Elapsed}");

        sw.Restart();
        for (int i = 0; i < count; i++)
            l1.HashedCompare(l2,out int[] leftOuterItems2,out int[] rightOuterItems2,int)[] matchedItems2);
        Console.WriteLine($"HashedCompare for {count} with L1 items:{l1.Count()} and L2 items:{l2.Count()} iterations took {sw.Elapsed}");
    }
}

public static class Extensions
{
    public static void OrderedCompare<T1,T2,TKey>(
        this IEnumerable<T1> source,IEnumerable<T2> target,Func<T1,TKey> sourceKeyGetter,Func<T2,TKey> targetKeyGetter,out T1[] leftOuterItems,out T2[] rightOuterItems,out (T1,T2)[] matchedItems) where TKey : IComparable<TKey>
    {
        var leftOuterItemsList = new List<T1>();
        var rightOuterItemsList = new List<T2>();
        var matchedItemsList = new List<(T1,T2)>();
        source = source.OrderBy(x => sourceKeyGetter(x)).ToArray();
        target = target.OrderBy(x => targetKeyGetter(x)).ToArray();

        bool reverseCompare = false;
        int i = 0,j = 0,sourcZeroBasedCount = source.Count() - 1,targetZeroBaseCount = target.Count() - 1;
        while (true)
        {
            var end = i == sourcZeroBasedCount && j == targetZeroBaseCount;
            var sourceItem = source.ElementAt(i);
            var targetItem = target.ElementAt(j);
            var sourceKey = sourceKeyGetter(sourceItem);
            var targetKey = targetKeyGetter(targetItem);

            int diff = reverseCompare ? targetKey.Compareto(sourceKey) : sourceKey.Compareto(targetKey);
            reverseCompare = i == sourcZeroBasedCount || j == targetZeroBaseCount;
            switch (diff)
            {
                case -1:
                    leftOuterItemsList.Add(sourceItem);
                    i = i < sourcZeroBasedCount ? i + 1 : i;
                    break;
                case 0:
                    matchedItemsList.Add((sourceItem,targetItem));
                    i = i < sourcZeroBasedCount ? i + 1 : i;
                    j = j < targetZeroBaseCount ? j + 1 : j;
                    break;
                case 1:
                    rightOuterItemsList.Add(targetItem);
                    j = j < targetZeroBaseCount ? j + 1 : j;
                    break;
            }

            if (end)
                break;
        }

        leftOuterItems = leftOuterItemsList.ToArray();
        rightOuterItems = rightOuterItemsList.ToArray();
        matchedItems = matchedItemsList.ToArray();
    }

    public static void HashedCompare<T1,T2)[] matchedItems) where TKey : IComparable<TKey>
    {
        var sourceDic = source.ToDictionary(x => sourceKeyGetter(x));
        var targetDic = target.ToDictionary(x => targetKeyGetter(x));

        var leftOuterKeys = sourceDic.Keys.Except(targetDic.Keys).ToArray();
        var rightOuterKeys = targetDic.Keys.Except(sourceDic.Keys).ToArray();
        var matchedKeys = sourceDic.Keys.Concat(targetDic.Keys).Except(leftOuterKeys.Concat(rightOuterKeys)).ToArray();

        leftOuterItems = leftOuterKeys.Select(key => sourceDic[key]).ToArray();
        rightOuterItems = rightOuterKeys.Select(key => targetDic[key]).ToArray();
        matchedItems = matchedKeys.Select(key => (sourceDic[key],targetDic[key])).ToArray();
    }
}

解决方法

HashedCompare() 中的大部分低效率都归结为在字典中进行了不必要的枚举和查找。如果您以命令式风格编写算法，则可以避免所有这些，并且代码在我看来更易于遵循：

我赞同@00110001 的建议，即您应该使用适当的基准测试框架，因为不同实现之间的差异具有相同的复杂性。

public static void HashedCompare<T1,T2,TKey>(
    this IEnumerable<T1> source,IEnumerable<T2> target,Func<T1,TKey> sourceKeyGetter,Func<T2,TKey> targetKeyGetter,out List<T1> leftOuterItems,out List<T2> rightOuterItems,out List<(T1,T2)> matchedItems) where TKey : IEquatable<TKey>
{
    var sourceItems = source.ToDictionary(x => sourceKeyGetter(x));
    var targetItems = target.ToDictionary(x => targetKeyGetter(x));

    matchedItems = new List<(T1,T2)>();
    leftOuterItems = new List<T1>();
    rightOuterItems = new List<T2>();
    foreach (var sourceItem in sourceItems)
    {
        if (targetItems.TryGetValue(sourceItem.Key,out var targetItem))
            matchedItems.Add((sourceItem.Value,targetItem));
        else
            leftOuterItems.Add(sourceItem.Value);
    }

    foreach (var targetItem in targetItems)
    {
        if (!sourceItems.ContainsKey(targetItem.Key))
            rightOuterItems.Add(targetItem.Value);
    }
}

您可以使用 Except 和 Intersect，它们都适用于 Sets（轻量级哈希集）并且可以O(n) 线性时间复杂度

var list1 = new[] { 1,4,2,3,7,6,9,5 };
var List2 = new[] { 15,5,14,13,12,8,11,10 };
Console.WriteLine(string.Join(",",list1.Except(List2)));
Console.WriteLine(string.Join(",List2.Except(list1)));
Console.WriteLine(string.Join(",List2.Intersect(list1)));

输出

1,3
15,10
5,9

至于它是更快还是更慢，您必须基准，但我的直觉是它们会更高效、更快。

关于基准测试的话题

使用像BenchmarkDotNet这样可靠的测试框架，如果你自己动手，你很可能会以无限的方式得到错误的结果