Monday, June 1, 2020

CSharp - Get a number/integer from a string fast, a speed comparison

Here's a few implementations to extract a integer found at beginning of a string.
See my post on the fastest method to extract the 1st found integer anywhere in a string.

The comparisons below are predicated on first extracting the lead d
igits found in a string ( sign integer allowed), and then converting the string of numbers into an integer.

This is not Unicode compliant, but works for English speaking populations using ASCII numerals 0-9.





Code in case above service perishes. 

using System;
using System.Diagnostics;
using System.Text; 
using System.Text.RegularExpressions; 


public static class StringExtensions {
 
       
        public static string GetAllIntRegex(this string numint)
        {
          return Regex.Replace(numint, "[^0-9]", "");
    
        }
        

        public static string GetLeadIntFast(this string numint)
        {
            bool minus = false; //we pass in absolute number
            if (numint.IndexOf('-') == 0)
            {
                minus = true;
                numint = numint.Replace("-", ""); //we expect this to be in first place, not bullet proof but GE
            }
            else if (numint.IndexOf('+') == 0)
                numint = numint.Replace("+", "");

            
            char[] n = numint.ToCharArray();
            StringBuilder sb = new StringBuilder(); 
            for (int i = 0; i < n.Length; i++)
            {
                if (!char.IsDigit(n[i]))
                    break; 

                sb.Append(n[i]); 
            }

            if (minus)
                return string.Concat("-", sb.ToString());
            
            return sb.ToString();
        }

        public static string GetLeadIntFaster(this string numint)
        {
            bool minus = false; //we pass in absolute number
            if (numint.IndexOf('-') == 0)
            {
                minus = true;
                numint = numint.Replace("-", ""); //we expect this to be in first place
            }
            else if (numint.IndexOf('+') == 0)
                numint = numint.Replace("+", "");


            char[] n = numint.ToCharArray();
            int exit = 0;
            for (int i = 0; i < n.Length; i++)
            {
                if (!char.IsDigit(n[i])) {
                    exit = i; 
                    break;
                }
            }
            Array.Resize(ref n, exit);

            if (minus)
                return string.Concat("-", new string(n));
            
            return new string(n); 
        }
        
        public static string GetLeadIntFastest(this string numint)
        {
            bool minus = false; //we pass in absolute number
            if (numint.IndexOf('-') == 0)
            {
                minus = true;
                numint = numint.Substring(1); //we expect this to be in first place
            }
            else if (numint.IndexOf('+') == 0)
                numint = numint.Substring(1);


            char[] n = numint.ToCharArray(); //faster indexing that string indexing
            int exit = 0;
            for (int i = 0; i < n.Length; i++)
            {
                if (!char.IsDigit(n[i]))
                {
                    exit = i;
                    break;
                }
            }
            Array.Resize(ref n, exit);

            if (minus)
                return string.Concat("-",  new string(n));

            return new string(n); 
        }

        public static int TryParser(this string numint)
        {
            int y = 0;
            //This is a fair comparison, we must get digits only
            Int32.TryParse(numint.GetLeadIntFast(), out y); //this alone is fast at 502 ticks
            return y;

        }

        public static int TryParserFaster(this string numint)
        {
            int y = 0;
            //This is a fair comparison, we must get digits only
            Int32.TryParse(numint.GetLeadIntFaster(), out y); //this alone is fast at 502 ticks
            return y;

        }

        public static int TryParserFastest(this string numint)
        {
            int y = 0;
            //This is a fair comparison, we must get digits only
            Int32.TryParse(numint.GetLeadIntFastest(), out y); //this alone is fast at 502 ticks
            return y;

        }

        public static int TryParserRegex(this string numint)
        {
            int y = 0;
            //This is a fair comparison, we must get digits only
            Int32.TryParse(numint.GetAllIntRegex(), out y); //this alone is fast at 502 ticks
            return y;


        }
        /// <summary>
        /// Fastest wat to convert a numeric string into an int
        /// </summary>
        /// <param name="numint">must be numbers and signs only</param>
        /// <returns></returns>
        public static int StartsWithInt(this string numint)
        {
            bool minus = false; //we pass in absolute number
            if (numint.IndexOf('-') == 0)
            {
                minus = true;
                numint = numint.Replace("-", ""); //we expect this to be in first place, not bullet proof but GE
            }
            else if (numint.IndexOf('+') == 0)
                numint = numint.Replace("+", "");

            int total = 0; //must be zero
            for (int i = 0; i < numint.Length; i++)
            {
                if (numint[i] >= '0' && numint[i] <= '9')  //get lead number only
                {
                    if (total * 10 + (numint[i] - '0') < 0) //we get into negative, if over int.MaxValue
                        return int.MaxValue; //return what you want or throw error

                    total = total * 10 + (numint[i] - '0');
                }
                else
                    break;
                //return int.MinValue;

            }

            if (minus) total *= -1;
            return total;

        }

        /// <summary>
        /// Fastest way to convert lead digits in a string into an int
        /// </summary>
        /// <param name="intStr">must be numbers and signs only</param>
        /// <returns></returns>
        public static int StartsWithIntFaster(this string intStr)
        {
 
            bool minus = false; //we pass in absolute number
            if (intStr.IndexOf('-') == 0)
            {
                minus = true;
                intStr = intStr.Replace("-", ""); //we expect this to be in first place, not bullet proof but GE
            }
            else if (intStr.IndexOf('+') == 0)
                intStr = intStr.Replace("+", "");

            int sum = 0; //must be zero
            char[] n = intStr.ToCharArray();
       
            for (int i = 0; i < n.Length; i++)
            {
                if (n[i] >= 48 && n[i] <= 57)  //'0'=48 and '9'=57 get lead number only
                {
                    int z = sum * 10 + (n[i] - 48);  
                    if (z < 0) //we get into negative, if over int.MaxValue
                        return int.MaxValue; //return what you want or throw error
                    sum = z;
                }
                else
                    break; //returns lead digits
                           //or return int.MinValue;

            }

            if (minus) sum *= -1;
            return sum;

        }
        
        //WINNER  
        /// <summary>
        /// Fastest way to convert lead digits in a string into an int
        /// </summary>
        /// <param name="intStr">must be numbers and signs only</param>
        /// <returns></returns>
        public static int StartsWithIntFastest(this string intStr)
        {
 
            bool minus = false; //we need to pass in absolute number
            if (intStr.IndexOf('-') == 0)
            {
                minus = true;
                intStr = intStr.Substring(1); //we expect this to be in first place, not bullet proof but GE
            }
            else if (intStr.IndexOf('+') == 0)
                intStr = intStr.Substring(1);

            int sum = 0; //must be zer0
            char[] n = intStr.ToCharArray();

            for (int i = 0; i < n.Length; i++)
            {
                if (n[i] >= 48 && n[i] <= 57)  //'0'=48 and '9'=57 get lead number only
                {
                    int z = sum * 10 + (n[i] - 48);
                    if (z < 0) //we get into negative, if over int.MaxValue
                        return int.MaxValue; //return what you want or throw error
                    sum = z;
                }
                else
                    break; //returns lead digits
                           //return int.MinValue; //or throw error

            }

            if (minus) sum *= -1;
            return sum;

        }

}

public class Program
{
 
 public static void Main()
 {
  
            Stopwatch sw = new Stopwatch();
            sw.Start();
            Console.WriteLine("12451a 1".StartsWithInt());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("12452a 2".StartsWithIntFaster());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("Winner");
            Console.WriteLine("12453a 3".StartsWithIntFastest());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("12454a 4".TryParser());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("12455a 5".TryParserFaster());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("12456a 6".TryParserFastest());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("This grabs all numbers and NOT JUST LEAD digits!");
            Console.WriteLine("12457a 7".TryParserRegex());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
  
  Console.WriteLine("0".StartsWithIntFastest());
  Console.WriteLine("10".StartsWithIntFastest());
  Console.WriteLine("-12345".StartsWithIntFastest());
  Console.WriteLine("1234".StartsWithIntFastest());
  Console.WriteLine("123".StartsWithIntFastest());
  Console.WriteLine("+12346".StartsWithIntFastest());
  Console.WriteLine("-1234".StartsWithIntFastest());
  Console.WriteLine("21474813649".StartsWithIntFastest()); 
 }
}

No comments:

Post a Comment