Tuesday, June 23, 2020

C# - Fastest way to find a number/integer in a string

After an extensive review of getting a lead integer from a string, see my post here.

Here's the fastest way in C# to get the 1st integer in a string. This is not Unicode compliant, but works for English speaking populations using ASCII numerals 0-9.

Code

using System;
using System.Diagnostics;
using System.Text; 
using System.Text.RegularExpressions; 

public static class StringExtensions {
 
        /// <summary>
        /// Fastest way to to find 1st number in a string and convert it into an integer
        /// </summary>
        /// <param name="intStr"></param>
        /// <returns></returns>
        //  https://metadataconsulting.blogspot.com/2020/06/CSharp-Fastest-way-to-find-a-number-or-integer-in-a-string.html
        public static int GetFirstIntFast(this string intStr)
        {
 
            int sum = 0; //must be zero
            char[] n = intStr.ToCharArray(); //fastest way to index a string
            int idxFirstDigit = -1;  

            for (int i = 0; i < n.Length; i++)
            {
                if (n[i] >= 48 && n[i] <= 57)  //'0'=48 and '9'=57 get lead number only
                {
                    if (idxFirstDigit == -1) idxFirstDigit = i; 
                    int z = sum * 10 + (n[i] - 48);
                    if (z < 0) //we get into negative, if over int.MaxValue
                        return int.MaxValue; //or throw error
                    sum = z;
                }
                else if (idxFirstDigit>-1)
                    break;
                    //return int.MinValue; //or throw error
            }

            if (intStr.IndexOf('-') == idxFirstDigit-1) //chek for neg sign
               sum *= -1;
            
            return sum;

        }

}

public class Program
{
 
 public static void Main()
 {
  
            Stopwatch sw = new Stopwatch();
            sw.Start();
            Console.WriteLine("aaaa12451a 1".GetFirstIntFast());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("aaaaa-12452a 2".GetFirstIntFast());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("aaaaaa+12453a 3".GetFirstIntFast());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            sw.Reset();
            sw.Start();
            Console.WriteLine("aaaaaa".GetFirstIntFast());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
            Console.WriteLine("1122222222222222222222".GetFirstIntFast());
            sw.Stop();
            Console.WriteLine(sw.ElapsedTicks.ToString() + " ticks");
 
 }
}

1 comment:

  1. .ToCharArray() will allocate an array of characters that's as big as the initial string. Your method will be fast(now) but it will create garbage that later needs to be GC-ed (slow later).
    With this pattern:
    sw.Start();
    Console.WriteLine("aaaaaa".GetFirstIntFast());
    sw.Stop();
    you are also measuring Console.WriteLine.

    ReplyDelete