Tuesday, June 23, 2020

C# - Fastest way to find a number/integer in a string

After an extensive review of getting a lead integer from a string, see my post here.

Here's the fastest way in C# to get the 1st integer in a string. This is not Unicode compliant, but works for English speaking populations using ASCII numerals 0-9. For Unicode use [\p{N}]+ and choose first occurrence.

Code
using System;
using System.Diagnostics;

public static class StringExtensions {
	
        /// <summary>
        /// Fastest way to to find 1st number in a string and convert it into an integer
        /// </summary>
        /// <param name="intStr"></param>
        /// <returns></returns>
	    //https://metadataconsulting.blogspot.com/2020/06/CSharp-Fastest-way-to-find-a-number-or-integer-in-a-string.html
        public static int GetFirstIntFast(this string intStr)
        {
 
            int sum = 0; //must be zero
            char[] n = intStr.ToCharArray(); //fastest way to index a string
			int idxFirstDigit = -1;  

            for (int i = 0; i < n.Length; i++)
            {
                if (n[i] >= 48 && n[i] <= 57)  //'0'=48 and '9'=57 get lead number only
                {
                    if (idxFirstDigit == -1) idxFirstDigit = i; 
					int z = sum * 10 + (n[i] - 48);
                    if (z < 0) //we get into negative, if over int.MaxValue
                        return int.MaxValue; //or throw error
                    sum = z;
                }
                else if (idxFirstDigit>-1)
                    break;
                    //return int.MinValue; //or throw error
			}

            if (intStr.IndexOf('-') == idxFirstDigit-1) //chek for neg sign
				sum *= -1;
            
            return sum;

        }
	
		public static int GetFirstIntFaster(this string n)
        {
 
            int sum = 0; //must be zero
        	int idxFirstDigit = -1;  

            for (int i = 0; i < n.Length; i++)
            {
                if (n[i] >= 48 && n[i] <= 57)  //'0'=48 and '9'=57 get lead number only
                {
                    if (idxFirstDigit == -1) idxFirstDigit = i; 
					int z = sum * 10 + (n[i] - 48);
                    if (z < 0) //we get into negative, if over int.MaxValue
                        return int.MaxValue; //or throw error
                    sum = z;
                }
                else if (idxFirstDigit>-1)
                    break;
                    //return int.MinValue; //or throw error
			}

            if (n.IndexOf('-') == idxFirstDigit-1) //chek for neg sign
				sum *= -1;
            
            return sum;

        }
	
}

public class Program
{
	
	public static void Main()
	{
		    int a = 0; 
            int b = 0; 
            Stopwatch sw = new Stopwatch();
            sw.Start();
			a = "aaaa12451a 1".GetFirstIntFast();
            sw.Stop();
			Console.WriteLine(a + " " +sw.ElapsedTicks.ToString() + " ticks");
			sw.Reset();    
			sw.Start();
			b = "aaaa12451a 1".GetFirstIntFaster();
            sw.Stop();
			Console.WriteLine(b + " " +sw.ElapsedTicks.ToString() + " ticks");
	        sw.Reset();
		
		    sw.Start();
			a = "aaaaa-12452a 2".GetFirstIntFast();
            sw.Stop();
			Console.WriteLine(a + " " +sw.ElapsedTicks.ToString() + " ticks");
			sw.Reset();    
			sw.Start();
			b = "aaaaa-12452a 2".GetFirstIntFaster();
            sw.Stop();
			Console.WriteLine(b + " " +sw.ElapsedTicks.ToString() + " ticks");
	        sw.Reset();
		
		
		    sw.Start();
			a = "aaaaaaccccccccccccccccccccccccccccccccc+12453a 3".GetFirstIntFast();
            sw.Stop();
			Console.WriteLine(a + " " +sw.ElapsedTicks.ToString() + " ticks");
			sw.Reset();    
			sw.Start();
			b = "aaaaaaccccccccccccccccccccccccccccccccc+12453a 3".GetFirstIntFaster();
            sw.Stop();
			Console.WriteLine(b + " " +sw.ElapsedTicks.ToString() + " ticks");
	        sw.Reset();
		
		
			sw.Start();
			a = "aaaaaaaaaaaaaaaaa 4".GetFirstIntFast();
            sw.Stop();
			Console.WriteLine(a + " " +sw.ElapsedTicks.ToString() + " ticks");
			sw.Reset();    
			sw.Start();
			b = "aaaaaaaaaaaaaaaaa 4".GetFirstIntFaster();
            sw.Stop();
			Console.WriteLine(b + " " +sw.ElapsedTicks.ToString() + " ticks");
	        sw.Reset();
		
			sw.Start();
			a = "1122222222222222222222 5".GetFirstIntFast();
            sw.Stop();
			Console.WriteLine(a + " " +sw.ElapsedTicks.ToString() + " ticks");
			sw.Reset();    
			sw.Start();
			b = "1122222222222222222222 5".GetFirstIntFaster();
            sw.Stop();
			Console.WriteLine(b + " " +sw.ElapsedTicks.ToString() + " ticks");
	        sw.Reset();
			
	}
}

1 comment:

  1. .ToCharArray() will allocate an array of characters that's as big as the initial string. Your method will be fast(now) but it will create garbage that later needs to be GC-ed (slow later).
    With this pattern:
    sw.Start();
    Console.WriteLine("aaaaaa".GetFirstIntFast());
    sw.Stop();
    you are also measuring Console.WriteLine.

    ReplyDelete