Monday, July 22, 2019

C Sharp Remove Whitespace common methods speed tests

Here's some common methods that are timed for removing white-space from a string.


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
using System;using System.Text; using System.Diagnostics; using System.Linq; 

public static class Program
{
 public static string RemoveWhiteSpace(this string s)
 {
  char[] r = new char[s.Length]; 
  int idxr = 0; 
  for (int i = 0; i < s.Length; i++) {
   if ( !Char.IsWhiteSpace(s[i])) {
    r[idxr] = s[i];
    idxr++;
   }
  }
  return new string(r); 
 }
 
 public static string StringBuilderRemoveWhiteSpace(this string s) {
  
  StringBuilder sb = new StringBuilder();
  for (int i = 0; i < s.Length; i++) {
   if ( !Char.IsWhiteSpace(s[i])) {
    sb.Append(s[i]);
   }
    
  }
  return sb.ToString(); 
 }
 
 public static string LinqRemoveWhiteSpace(this string s) {
 
  return new string(s.ToCharArray().Where(c => !Char.IsWhiteSpace(c)).ToArray());
  
 } 
 
 //public static string NormalizeWhiteSpaceForLoop(this string input)
    public static string FastRemoveWhiteSpace(this string input)
    {
        int len = input.Length,
            index = 0,
            i = 0;
        var src = input.ToCharArray();
        //bool skip = false;
        char ch;
        for (; i < len; i++)
        {
            ch = src[i];
            switch (ch)
            {
                case '\u0020':
                case '\u00A0':
                case '\u1680':
                case '\u2000':
                case '\u2001':
                case '\u2002':
                case '\u2003':
                case '\u2004':
                case '\u2005':
                case '\u2006':
                case '\u2007':
                case '\u2008':
                case '\u2009':
                case '\u200A':
                case '\u202F':
                case '\u205F':
                case '\u3000':
                case '\u2028':
                case '\u2029':
                case '\u0009':
                case '\u000A':
                case '\u000B':
                case '\u000C':
                case '\u000D':
                case '\u0085':
                    //if (skip) continue;
                    //src[index++] = ch;
                    //skip = true;
                    continue;
                default:
                    //skip = false;
                    src[index++] = ch;
                continue;
            }
        }

        return new string(src, 0, index);
    }
 
 
 public static void Main()
 {
  Stopwatch sw = new Stopwatch(); 
  string test = "China lashes out at Hong Kong protest targeting its office Mon 22-Jul-19 10:33am"; 
  sw.Start();
  Console.WriteLine(test.RemoveWhiteSpace());
  sw.Stop(); 
  Console.WriteLine(sw.ElapsedTicks.ToString()+" ticks.");
  sw.Restart();
  Console.WriteLine(test.StringBuilderRemoveWhiteSpace());
  sw.Stop(); 
  Console.WriteLine(sw.ElapsedTicks.ToString()+" ticks.");
  sw.Restart();
  Console.WriteLine(test.LinqRemoveWhiteSpace());
  sw.Stop(); 
  Console.WriteLine(sw.ElapsedTicks.ToString()+" ticks.");
  sw.Restart();
  Console.WriteLine(test.FastRemoveWhiteSpace());
  sw.Stop(); 
  Console.WriteLine(sw.ElapsedTicks.ToString()+" ticks.");
  
 }
}

No comments:

Post a Comment