Saturday, October 26, 2024

Remove all Unicode Control Characters with Special Subset in General Punctuation

 Remove all Unicode Control Characters with Special Subset in General Punctuation

   /// <summary>
        /// remove all Unicode Control Chars Plus General Punctuation
        /// </summary>
        /// <param name="s"></param>
        /// <returns></returns>
        public static string RemoveAllUnicodeControlCharsPlusGenPunc(this string s)
        {

            StringBuilder sb = new StringBuilder(s.Length);

            for (int i = 0; i < s.Length; i++)
            {
                int c = s[i];
                if (Char.IsControl(s[i]))
                    continue; 
                else if (c >= 8192 && c <= 8207) //https://unicode-table.com/en/blocks/general-punctuation/
                    continue;
                else if (c >= 8232 && c <= 8239)
                    continue;
                else if (c >= 8287 && c <= 8303)
                    continue;
                else
                    sb.Append(s[i]);
            }

            return sb.ToString();
        }


No comments:

Post a Comment