Here's how to get the difference between two lists preserving duplicates in each list that is super fast, using BitArrays.
As in my previous post, i dismiss all the Stack Overflow solutions with live code, and I presented my 1st attempt at the solution.
This post demonstrates my super fast implementation, enjoy!
//UNORDERED, NOT UNIQUE lists, lists with duplicates
string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "9", "7", "7", "7" };
string[] arrB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5" };
//DESIRED RESULT
2,5,5,5,7,7,7,9 OR 2,5,5,5,9,7,7,7 OR 9,7,7,7,2,5,5,5 //UNORDERED IS SUFFICIENT
NOTE: Linq Except but it's designed as a set command, and as a consequence it removes duplicates.
IEnumerable<string> AminuBdifference = arrA.Except(arrB); //removes dupsIEnumerable<string> BminuAdifference = arrB.Except(arrA); //remove dups
IEnumerable<string> Uniondifference = arrA.Union(arrb); //ideally, but no good because dups are removed
See https://dotnetfiddle.net/oHOms5 which demonstrates Linq Except use.
Below is my super fast implementation, using a pure BitArray implementations, enjoy!
Use case: For list with less than 100 values, otherwise use ExceptIndistinct.
Source Code
using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Diagnostics; //CHALLENGE - Get difference between two unorderd, not unique lists, duplicates allowed within a list and across both lists. //SOLUTION - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-aka-duplicates-allowed-within-a-list-and-across-both-lists.html //OPTIMIZE - THIS IS IT using BitArray - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-lightning-fast-optimized.html //OPTIMIZEv2 - All BitArray imp - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-using-BitArrays-for-super-fast-speed.html // - use ExceptIndistinct for >100 items public static class Program { public static int loopcnt = 0; //https://stackoverflow.com/questions/2975944/except-has-similar-effect-to-distinct public static IEnumerable<T> ExceptFrom<T>( this IEnumerable<T> first, IEnumerable<T> second) //, //IEqualityComparer<TSource> comparer) { if (first == null) throw new ArgumentNullException(); if (second == null) throw new ArgumentNullException(); //var secondSet = second as HashSet<TSource> ?? // this trick ignore the comparer // second.ToHashSet(comparer ?? EqualityComparer<TSource>.Default); var secondSet = second as HashSet<T> ?? // this trick ignore the comparer second.ToHashSet(EqualityComparer<T>.Default); // Contains is O(1) for HashSet. return first.Where(v => !secondSet.Equals(v)); //change to equals, not Contains } //use after 100 items public static IEnumerable<T> ExceptIndistinct<T>(this IEnumerable<T> first, IEnumerable<T> second) { if(first == null) { throw new ArgumentNullException("first"); } if(second == null) { throw new ArgumentNullException("second"); } var secondCounts = new Dictionary<T, int>(EqualityComparer<T>.Default); int count; int nullCount = 0; foreach(var item in second) { loopcnt++; if(item == null) { nullCount++; } else if(secondCounts.TryGetValue(item, out count)) { secondCounts[item] = count + 1; } else { secondCounts.Add(item, 1); } } foreach(var item in first) { loopcnt++; if(item == null) { nullCount--; if(nullCount < 0) { yield return item; } } else if(secondCounts.TryGetValue(item, out count)) { if(count == 0) { secondCounts.Remove(item); yield return item; } else { secondCounts[item] = count - 1; } } else { yield return item; } } } public static void Main() { Stopwatch sw = new Stopwatch(); //RANDOM case - typical use string[] arrA = new string[] { "a", "7", "1", "2", "3", "4", "5", "9", "7", "7", "1","222222222222222222","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222323" }; string[] arrB = new string[] { "a", "1", "1", "2", "2", "3", "4", "5", "5", "5", "5","999999999999999999","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222323" }; sw.Start(); //WANT 2,5,5,5,9,7,7,7 //WORST case - entirely different, but have 1 value in common to make it interesting //string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//1st list //string[] arrB = new string[] { "8", "9", "10", "12", "1", "14", "15", "15", "11", "9" };//2nd list //BEST case - identical //string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//1st list //string[] arrB = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//2nd list sw.Stop(); Console.WriteLine("RANDOM CASE -two unorderd, not unique lists "); Console.WriteLine("A = " + string.Join(",", arrA)); Console.WriteLine("B = " + string.Join(",", arrB)); Console.WriteLine("A count = " + arrA.Count()); Console.WriteLine("B count = " + arrB.Count()); sw.Restart(); string[] arrBminusA = new string[arrB.Length]; string[] arrAminuB = new string[arrA.Length]; BitArray bitArrAminusB = new BitArray(arrA.Length); BitArray bitarrBminusA = new BitArray(arrB.Length); //May seem counter intuitive to run this over 3 loop but it's faster ~ 10 ticks int a = 0; for (int b = 0; b < arrB.Length; b++) { for (a = 0; a < arrA.Length; a++) { loopcnt++;//not need for timing analysis if (!bitArrAminusB[a] && arrA[a].Length == arrB[b].Length && arrA[a] == arrB[b]) { bitArrAminusB.Set(a, true); //A-B, list A difference/subtract of list B bitarrBminusA.Set(b, true); //B-A, list B difference/subtract of list A break; } } } int idxa = 0; for (int aa = 0; aa < arrA.Length; aa++) { if (!bitArrAminusB[aa]) arrAminuB[idxa++] = arrA[aa]; } int idxb = 0; for (int bb = 0; bb < arrB.Length; bb++) { if (!bitarrBminusA[bb]) arrBminusA[idxb++] = arrB[bb]; } sw.Stop(); Console.WriteLine("(A-B) Result = " + string.Join(",", arrAminuB).Trim(',')); //Console.WriteLine("D = " + string.Join(",", D)); //Console.WriteLine("E = " + string.Join(",", E)); Console.WriteLine("(B-A) Result = " + string.Join(",", arrBminusA).TrimEnd(',')); Console.WriteLine("No. of loops = " + loopcnt); Console.WriteLine("No. of loops = " + arrA.Length*2 + " - Post copy using BitArray to a sized array"); //Un Console.WriteLine("Result Unorderd Final A then B = " + (string.Join(",", arrAminuB)).Trim(',') + "," + string.Join(",", arrBminusA).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); Console.WriteLine("Result Unorderd Final B then A = " + (string.Join(",", arrBminusA)).Trim(',') + "," + string.Join(",",arrAminuB).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); Console.WriteLine("Note: In a VS solution this is 47 ticks."); List<string> D = arrBminusA.ToList().Concat(arrAminuB.ToList()).ToList(); D.Sort(); Console.WriteLine("Result Ordered Final = " + string.Join(",", D).TrimStart(',')); Console.WriteLine("Result O(n^2+2n) but very fast - can you beat it?"); loopcnt = 0; Console.WriteLine("\n\nExceptIndistinct - Reddit Solution by UninformedPleb\n"); sw.Reset(); List<string> listA = new List<string> { "a", "7", "1", "2", "3", "4", "5", "9", "7", "7", "1","222222222222222222","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222323" }; List<string> listB = new List<string> { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5" }; sw.Start(); List<string> listC = listA.ExceptIndistinct(listB).ToList(); List<string> listD = listB.ExceptIndistinct(listA).ToList(); sw.Stop(); Console.WriteLine("No. of loops = " + loopcnt); Console.WriteLine("Result Unorderd Final A then B = " + (string.Join(",", listC)).Trim(',') + "," + string.Join(",", listD).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); loopcnt = 0; Console.WriteLine("\n\nExceptIndistinct - Reddit Solution by UninformedPleb using arrays\n"); sw.Reset(); string[] arrAA = new string[] { "a", "7", "1", "2", "3", "4", "5", "9", "7", "7", "1","222222222222222222","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222323" }; string[] arrBB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5","999999999999999999","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222" }; sw.Start(); string[] listCC = arrAA.ExceptIndistinct(arrBB).ToArray(); string[] listDD = arrBB.ExceptIndistinct(arrAA).ToArray(); sw.Stop(); Console.WriteLine("No. of loops = " + loopcnt); Console.WriteLine("Result Unorderd Final A then B = " + (string.Join(",", listC)).Trim(',') + "," + string.Join(",", listD).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); loopcnt = 0; Console.WriteLine("\n\nStackOverflow ExceptFrom\n"); sw.Reset(); string[] arrAAA = new string[] { "a", "7", "1", "2", "3", "4", "5", "9", "7", "7", "1","222222222222222222","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222323" }; string[] arrBBB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5","999999999999999999","22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222" }; sw.Start(); string[] listCCC = arrAAA.ExceptFrom(arrBBB).ToArray(); string[] listDDD = arrBBB.ExceptFrom(arrAAA).ToArray(); sw.Stop(); Console.WriteLine("No. of loops = " + loopcnt); Console.WriteLine("Result Unorderd Final A then B = " + (string.Join(",", listC)).Trim(',') + "," + string.Join(",", listD).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); } }
No comments:
Post a Comment