Pages

Wednesday, February 10, 2021

C# .NET Get difference between two unordered not unique lists, lightning fast, optimized


Here's how to get the difference between two lists preserving duplicates in each list that is super fast. 

As in my previous post, i dismiss all the Stack Overflow solutions with live code, and I presented my 1st attempt at the solution. 

This post demonstrates my super fast implementation, enjoy! 


//UNORDERED, NOT UNIQUE lists, lists with duplicates 

string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "9", "7", "7", "7" };
string[] arrB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5" };

//DESIRED RESULT

2,5,5,5,7,7,7,9 OR 2,5,5,5,9,7,7,7 OR 9,7,7,7,2,5,5,5  //UNORDERED IS SUFFICIENT

NOTE: Linq Except but it's designed as a set command, and as a consequence it removes duplicates.

IEnumerable<string> AminuBdifference = arrA.Except(arrB); //removes dups
IEnumerable<string> BminuAdifference = arrB.Except(arrA); //remove dups

IEnumerable<string> Uniondifference = arrA.Union(arrb); //ideally, but no good dups removed

See https://dotnetfiddle.net/oHOms5  which demonstrates Linq Except use.


UpdateC# .NET Get difference between two unordered not unique lists using 2 BitArrays for super fast speed - handles worst case scenario

Below is my super fast implementation, enjoy!

 

Source Code
using System;
using System.Collections; 
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics; 

//CHALLENGE - Get difference between two unorderd, not unique lists, duplicates allowed within a list and across both lists.
//SOLUTION - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-aka-duplicates-allowed-within-a-list-and-across-both-lists.html
//OPTIMIZE - THIS IS IT using BitArray - https://metadataconsulting.blogspot.com/2021/02/CSharp-dotNet-Get-difference-between-two-unordered-not-unique-lists-lightning-fast-optimized.html
                    
public class Program
{
    public static int loopcnt = 0;
    
    public static void Main()
    {
            Stopwatch sw = new Stopwatch();
            sw.Start(); 
            
            //RANDOM case - typical use ~ 78 ticks 
            string[] arrA = new string[] { "7", "1", "2", "3", "4", "5", "9", "7", "7", "1" };
            string[] arrB = new string[] { "1", "1", "2", "2", "3", "4", "5", "5", "5", "5" };

            //WANT           2,5,5,5,9,7,7,7
        
            //WORST case - entirely different, but have 1 value in common to make it interesting ~ 88 ticks
            //string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//1st list
            //string[] arrB = new string[] { "8", "9", "10", "12", "1", "14", "15", "15", "11", "9" };//2nd list
        
            //BEST case - identical ~ 24 ticks
            //string[] arrA = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//1st list
            //string[] arrB = new string[] { "1", "1", "2", "3", "4", "5", "6", "7", "7", "7" };//2nd list
        
        
            sw.Stop();
            Console.WriteLine("RANDOM CASE -two unorderd, not unique lists ");
            Console.WriteLine("A  = " + string.Join(",", arrA));
            Console.WriteLine("B  = " + string.Join(",", arrB));
            sw.Restart();
            
            string[] diffAminusB = new string[arrA.Length];      
            string[] diffBminusA = new string[arrB.Length];
             
            BitArray bitarrRemoveA = new BitArray(arrA.Length); 
                        
            int d = 0; //index for diff list
            int a = 0; 
            bool nf = true; //not found 

            //bitarrRemove.Set(a, true);  version -> 13 ticks blazing fast, 19 ticks for worst case identical sets on my computer!
            for (int b = 0; b < arrB.Length; b++)
            {
                nf = true;

                for (a = 0; a < arrA.Length; a++)
                {
                    loopcnt++; //not require for metrics
                    //diffAminusB[a] = arrA[a]; //Tried to preload this array, and remove items using speedy BitArray, but slow             

                    if (!bitarrRemoveA[a] && arrA[a] == arrB[b])
                    {
                        nf = false;
                        bitarrRemoveA.Set(a, true); //magic - A-B = diffsA - flag removals for post processing
                        break;

                    }
                }

                if (nf) //not found in set B 
                    diffBminusA[d++] = arrB[b]; //diffs B not found in A, mathematically B-A = diffsB (order important here, driven by B, so those results appear)
                    
                
                
            }
            
            //Get A-B = diffsA, some memory savings (but very slight) here since it's a sized array
            int idx = 0;
            for (a = 0; a < arrA.Length; a++)
            {
                if (!bitarrRemoveA[a])
                    diffAminusB[idx++] = arrA[a];
               
            }

    
           sw.Stop();

           Console.WriteLine("(A-B) Result = " + string.Join(",", diffAminusB).Trim(','));
           //Console.WriteLine("D  = " + string.Join(",", D));
           //Console.WriteLine("E  = " + string.Join(",", E)); 

           Console.WriteLine("(B-A) Result = " + string.Join(",", diffBminusA).TrimEnd(','));

           Console.WriteLine("No. of loops = " + loopcnt);
           Console.WriteLine("No. of loops = " + arrA.Length + " - Post copy using BitArray to a sized array");

           //Un 
           Console.WriteLine("Result Unorderd Final A then B = " + (string.Join(",", diffAminusB)).Trim(',') + "," + string.Join(",", diffBminusA).Trim(',') + " in " + sw.ElapsedTicks + " ticks.");
           Console.WriteLine("Result Unorderd Final B then A = " + (string.Join(",",  diffBminusA)).Trim(',') + "," + string.Join(",",diffAminusB).Trim(',') + " in " + sw.ElapsedTicks + " ticks.");

            
           List<string> C = diffBminusA.ToList();

           List<string> D = diffBminusA.ToList().Concat(diffAminusB.ToList()).ToList();
           D.Sort();

           Console.WriteLine("Result  Ordered Final = " + string.Join(",", D).TrimStart(','));
           Console.WriteLine("Result  O(n^2) but very fast - can you beat it?"); 
    
    }
}

No comments:

Post a Comment