The code below brackets the first likely hex number that matches, from the list of many hex formats that are specified with leading hex prefixes. Then it proceeds to remove prefixes and apply the TryParse functions.
C# TryParse function with NumberStyles.HexNumber requires many hex prefixes to be removed first in order to works, such as "0x".
This will fail
UInt32.TryParse("0x20",
UInt32.TryParse("0x20",
NumberStyles.HexNumber, // AllowHexSpecifier - Strings that are parsed using this style cannot be prefixed with "0x" or "&h".
CultureInfo.InvariantCulture, // I've also tried CurrentCulture
out number));
//See https://docs.microsoft.com/en-us/dotnet/api/system.globalization.numberstyles?view=netcore-3.1#System_Globalization_NumberStyles_AllowHexSpecifier
Note: It is tempting to optimize first regex from
0x[0-9a-f]{2,}
to
0x([0-9a-f]{2,}) and use a group capture
but because we are capturing multiple expressions the overlap is troublesome.
This is an update to my last post about this - C# .NET How to get integer from hexadecimal string, many hex formats supported
This code removes many hex formats prefixes as listed here - https://en.wikipedia.org/wiki/Hexadecimal
Source Code
using System; using System.Globalization; using System.Text.RegularExpressions; public static class Program { const string strRegHexPrefixCandidates = @"0x[0-9a-f]{2,}|%x[0-9a-f]{2,}|\\u[0-9a-f]{2,}|&#x([0-9a-f]){1,6};|&#([0-9a-f]){1,6};|\\x[0-9a-f]{2,}|\\s[0-9a-f]{2,}|U\+[0-9a-f]{2,}|X'[0-9a-f]{2,}|16#([0-9a-f]){2,}|#x([0-9a-f]){2,}|#16r([0-9a-f]){2,6}|&H([0-9a-f]){2,}|0h([0-9a-f]){2,}|#([0-9a-f]){1,6}|%[0-9a-f]{2,}"; const string strRegGetHexNumber = @"[0-9a-f]{2,}|[«‹»›„‚“‟‘‛”’""""❛❜❝❞〝〞〟"""""'‘][0-9a-f]{2,}[’'""""«‹»›„‚“‟‘‛”’""""❛❜❝❞〝〞〟"]"; private static readonly Regex rgxHexPre = new Regex(strRegHexPrefixCandidates, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled); private static readonly Regex rgxGetHexAgressive = new Regex(strRegGetHexNumber, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled); public static void Main() { //string unicodeText = "UTF-16 (hex) 0x0023 (0023)"; string unicodeText = @"In XML and XHTML, characters can be expressed as hexadecimal numeric character references using the notation ode;, for instance ’ represents the character U+2019 (the right single quotation mark). If there is no x the number is decimal (thus ’ is the same character).[3]"; //string unicodeText = "8E2"; //string unicodeText = "this is the end"; string firstCandidateHexVal = string.Empty; //https://en.wikipedia.org/wiki/Hexadecimal --remove possible prefixes foreach (Match p in rgxHexPre.Matches(unicodeText)) { if (p.Success) { firstCandidateHexVal = p.Value; break; } } string prefixfree = string.Empty; if (!string.IsNullOrEmpty(firstCandidateHexVal)) { //same prefixes as in Regex string[] prefixHexs = new string[] { "0x", "%x", "\\u", "&#x", "&#", "\\x", "\\s", "U+", "X'", "16#", "#x", "#16r", "&H", "0h", "#", "%" }; foreach (var pre in prefixHexs) { if (firstCandidateHexVal.IndexOf(pre) > -1) { prefixfree = firstCandidateHexVal.Substring(firstCandidateHexVal.IndexOf(pre) + pre.Length); break; } } } string finalHexCandy = string.Empty; if (string.IsNullOrEmpty(prefixfree)) finalHexCandy = unicodeText; else finalHexCandy = prefixfree; Match m = rgxGetHexAgressive.Match(finalHexCandy); bool success = false; ulong number = 0; string hex_value = string.Empty; if (m.Success) { try { hex_value = m.Value; //long number = Convert.ToInt64(hex_value, 16); //base 16 - hex.... //https://stackoverflow.com/questions/2801509/uint32-tryparse-hex-number-not-working -> remove prefixes success = ulong.TryParse(hex_value, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out number); } catch (FormatException) { unicodeText = string.Format("{0} is not in the correct format for a hexadecimal number.", m.Value); } catch (OverflowException) { unicodeText = string.Format("{0} is outside the range of an Int64.", m.Value); } catch (ArgumentException) { unicodeText = string.Format("{0} is invalid in base 16.", m.Value); } catch (Exception ex) { unicodeText = string.Format("{0} return error\r\n{2}", m.Value, ex.Message); } } else unicodeText = "Could not find a hex number in \"" + unicodeText + "\". Select the hex number only."; if (!string.IsNullOrEmpty(firstCandidateHexVal)) hex_value = firstCandidateHexVal; if (success) unicodeText = string.Format("{0} integer from found {1:N0} hex number in string: {2}", number, hex_value, unicodeText); else unicodeText = "Could not find a hex number in string: \"" + unicodeText + "\". Select the hex number only."; Console.WriteLine(unicodeText); } }
No comments:
Post a Comment