Here's how to get the difference between two lists preserving duplicates in each list that is super fast.
As in my previous post, i dismiss all the Stack Overflow solutions with live code, and I presented my 1st attempt at the solution.
This post demonstrates my super fast implementation, enjoy!
//UNORDERED, NOT UNIQUE lists, lists with duplicates
string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "9", "7", "7", "7" };
string[] arrB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5" };
//DESIRED RESULT
2,5,5,5,7,7,7,9 OR 2,5,5,5,9,7,7,7 OR 9,7,7,7,2,5,5,5 //UNORDERED IS SUFFICIENT
NOTE: Linq Except but it's designed as a set command, and as a consequence it removes duplicates.
IEnumerable<string> AminuBdifference = arrA.Except(arrB); //removes dupsIEnumerable<string> BminuAdifference = arrB.Except(arrA); //remove dups
IEnumerable<string> Uniondifference = arrA.Union(arrb); //ideally, but no good dups removed
See https://dotnetfiddle.net/oHOms5 which demonstrates Linq Except use.
Update : C# .NET Get difference between two unordered not unique lists using 2 BitArrays for super fast speed - handles worst case scenario
Below is my super fast implementation, enjoy!
Source Code
using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Diagnostics; //CHALLENGE - Get difference between two unorderd, not unique lists, duplicates allowed within a list and across both lists. //SOLUTION - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-aka-duplicates-allowed-within-a-list-and-across-both-lists.html //OPTIMIZE - THIS IS IT using BitArray - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-lightning-fast-optimized.html public class Program { public static int loopcnt = 0; public static void Main() { Stopwatch sw = new Stopwatch(); sw.Start(); //RANDOM case - typical use ~ 78 ticks string[] arrA = new string[] { "7", "1", "2", "3", "4", "5", "9", "7", "7", "1" }; string[] arrB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5" }; //WANT 2,5,5,5,9,7,7,7 //WORST case - entirely different, but have 1 value in common to make it interesting ~ 88 ticks //string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//1st list //string[] arrB = new string[] { "8", "9", "10", "12", "1", "14", "15", "15", "11", "9" };//2nd list //BEST case - identical ~ 24 ticks //string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//1st list //string[] arrB = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//2nd list sw.Stop(); Console.WriteLine("RANDOM CASE -two unorderd, not unique lists "); Console.WriteLine("A = " + string.Join(",", arrA)); Console.WriteLine("B = " + string.Join(",", arrB)); sw.Restart(); string[] diffAminusB = new string[arrA.Length]; string[] diffBminusA = new string[arrB.Length]; BitArray bitarrRemoveA = new BitArray(arrA.Length); int d = 0; //index for diff list int a = 0; bool nf = true; //not found //bitarrRemove.Set(a, true); version -> 13 ticks blazing fast, 19 ticks for worst case identical sets on my computer! for (int b = 0; b < arrB.Length; b++) { nf = true; for (a = 0; a < arrA.Length; a++) { loopcnt++; //not require for metrics //diffAminusB[a] = arrA[a]; //Tried to preload this array, and remove items using speedy BitArray, but slow if (!bitarrRemoveA[a] && arrA[a] == arrB[b]) { nf = false; bitarrRemoveA.Set(a, true); //magic - A-B = diffsA - flag removals for post processing break; } } if (nf) //not found in set B diffBminusA[d++] = arrB[b]; //diffs B not found in A, mathematically B-A = diffsB (order important here, driven by B, so those results appear) } //Get A-B = diffsA, some memory savings (but very slight) here since it's a sized array int idx = 0; for (a = 0; a < arrA.Length; a++) { if (!bitarrRemoveA[a]) diffAminusB[idx++] = arrA[a]; } sw.Stop(); Console.WriteLine("(A-B) Result = " + string.Join(",", diffAminusB).Trim(',')); //Console.WriteLine("D = " + string.Join(",", D)); //Console.WriteLine("E = " + string.Join(",", E)); Console.WriteLine("(B-A) Result = " + string.Join(",", diffBminusA).TrimEnd(',')); Console.WriteLine("No. of loops = " + loopcnt); Console.WriteLine("No. of loops = " + arrA.Length + " - Post copy using BitArray to a sized array"); //Un Console.WriteLine("Result Unorderd Final A then B = " + (string.Join(",", diffAminusB)).Trim(',') + "," + string.Join(",", diffBminusA).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); Console.WriteLine("Result Unorderd Final B then A = " + (string.Join(",", diffBminusA)).Trim(',') + "," + string.Join(",",diffAminusB).Trim(',') + " in " + sw.ElapsedTicks + " ticks."); List<string> C = diffBminusA.ToList(); List<string> D = diffBminusA.ToList().Concat(diffAminusB.ToList()).ToList(); D.Sort(); Console.WriteLine("Result Ordered Final = " + string.Join(",", D).TrimStart(',')); Console.WriteLine("Result O(n^2) but very fast - can you beat it?"); } }
No comments:
Post a Comment