Tuesday, March 19, 2019

A Unicode "ReplaceAt" string extension method handles Unicode string properly

Most "ReplaceAt" methods fail when using a Unicode string. They enumerate a Unicode character as 2 'places' wide instead of 1 place wide. Therefore, the resultant string has the replaced character in the wrong position.

Below is an large enumeration of common ReplaceAt implementations. They all fail, except for the last method made to handler Unicode characters. 

UnicocodeReplaceAt method replaces a character in a string at specific zero-based index and handles handle null char '\0' properly, by removing if from resultant string.



using System.Linq;
using System.Diagnostics;
using System;
using System.Text;
using System.Globalization;

public static class Program
{
    const char cEMPTY = '\0';
    static readonly string EMPTY = cEMPTY.ToString(); 
    public static string UnicodeReplaceAt(this string str, int offset, char replaceChar)
    {
        int count = 1;
        string replaceBy = replaceChar.ToString();
        return new StringInfo(str).ReplaceByPosition(replaceBy, offset, count).String;
    }

    public static StringInfo ReplaceByPosition(this StringInfo str, string replaceBy, int offset, int count)
    {
        if (replaceBy != EMPTY)
            return str.RemoveByTextElements(offset, count).InsertByTextElements(offset, replaceBy);
        else
            return str.RemoveByTextElements(offset, count);
    }

    public static StringInfo RemoveByTextElements(this StringInfo str, int offset, int count)
    {
        return new StringInfo(string.Concat(
            str.SubstringByTextElements(0, offset),
            offset + count < str.LengthInTextElements
                ? str.SubstringByTextElements(offset + count, str.LengthInTextElements - count - offset)
                : ""
            ));
    }
    public static StringInfo InsertByTextElements(this StringInfo str, int offset, string insertStr)
    {
        if (string.IsNullOrEmpty(str.String))
            return new StringInfo(insertStr);

        return new StringInfo(string.Concat(
            str.SubstringByTextElements(0, offset),
            insertStr,
            str.LengthInTextElements - offset > 0 ? str.SubstringByTextElements(offset, str.LengthInTextElements - offset) : ""
        ));
    }

    public static string SubsituteStringStringBuilder(this string s, int idx, char replaceChar)
    {
        if (string.IsNullOrEmpty(s) || idx >= s.Length || idx < 0)
            return s;

        return new StringBuilder(s).Remove(idx, 1).Insert(idx, replaceChar.ToString()).ToString();
    }

    public static string ReplaceAtSubstring(this string s, int idx, char replaceChar)
    {
        if (string.IsNullOrEmpty(s) || idx >= s.Length || idx < 0)
            return s;

        return s.Substring(0, idx) + replaceChar.ToString() + s.Substring(idx + replaceChar.ToString().Length, s.Length - (idx + replaceChar.ToString().Length));

    }

    public static string ReplaceAtStringManipulation(this string s, int idx, char replaceChar)
    {
        if (string.IsNullOrEmpty(s) || idx >= s.Length || idx < 0)
            return s;

        return s.Remove(idx, 1).Insert(idx, replaceChar.ToString());
    }

    public static string ReplaceAtLinq(this string value, int index, char newchar)
    {
        if (value.Length <= index)
            return value;
        else
            return string.Concat(value.Select((c, i) => i == index ? newchar : c));
    }

    public static string ReplaceAtCharArray(this string input, uint index, char newChar)
    {
        if (string.IsNullOrEmpty(input) || index >= input.Length)
            return input;

        char[] chars = input.ToCharArray();
        chars[index] = newChar;
        return new string(chars);
    }

    public static void Main()
    {
        //In .NET 4.5 and later also UTF-16 is supported
        //Console.OutputEncoding = System.Text.Encoding.Unicode;  

        Stopwatch sw = new Stopwatch();
        sw.Start();
        Console.WriteLine("🎶🔥--ReplaceAtCharArray".ReplaceAtCharArray(4, 'X'));
        sw.Stop();
        Console.WriteLine("in {0} ticks.", sw.ElapsedTicks.ToString("N0"));

        sw.Restart();
        Console.WriteLine("🎶🔥--ReplaceAtLinq".ReplaceAtLinq(4, 'Y'));
        sw.Stop();
        Console.WriteLine("in {0} ticks.", sw.ElapsedTicks.ToString("N0"));

        sw.Restart();
        Console.WriteLine("🎶🔥--ReplaceAtStringManipulation".ReplaceAtStringManipulation(4, 'Z'));
        sw.Stop();
        Console.WriteLine("in {0} ticks.", sw.ElapsedTicks.ToString("N0"));

        sw.Restart();
        Console.WriteLine("🎶🔥--ReplaceAtSubstring".ReplaceAtSubstring(4, 'R'));
        sw.Stop();
        Console.WriteLine("in {0} ticks.", sw.ElapsedTicks.ToString("N0"));

        sw.Restart();
        Console.WriteLine("🎶🔥--SubsituteStringStringBuilder".SubsituteStringStringBuilder(4, 'W'));
        sw.Stop();
        Console.WriteLine("in {0} ticks.", sw.ElapsedTicks.ToString("N0"));

        sw.Restart();
        Console.WriteLine("c:\\\\dir\\🎶🔥--UnicodeReplaceAt".UnicodeReplaceAt(4, 'a'));
        sw.Stop();
        Console.WriteLine("in {0} ticks.", sw.ElapsedTicks.ToString("N0"));

    

    }
}

Output

🎶🔥X-ReplaceAtCharArray
in 1,802 ticks.
🎶🔥Y-ReplaceAtLinq
in 33,156 ticks.
🎶🔥Z-ReplaceAtStringManipulation
in 1,579 ticks.
🎶🔥R-ReplaceAtSubstring
in 1,892 ticks.
🎶🔥W-SubsituteStringStringBuilder
in 1,821 ticks.
c:\\air\🎶🔥--UnicodeReplaceAt
in 5,745 ticks.


No comments:

Post a Comment