Grabbing data from clip using Clipboard.GetText in C# .NET is straightforward, but there's a an issue with .NET 4 Framework or less. DataFormats.Html specification states it's encoded in UTF-8. But there's a bug in .NET 4 Framework (Windows XP supported) and lower, and it actually reads as UTF-8 but the control sends data encoded as Windows-1252. Some of these characters are wide, taking 2 or more characters to represent 1 character. Specifically, they have byte count of 2 or more.
You get allot of wrong encodings, which leading to funny/bad characters such as 'Å','‹','Å’','Ž','Å¡','Å“','ž','Ÿ','Â','¡','¢','£','¤','Â¥','¦','§','¨','©'
For example '€' is encoded as '€' in Windows-1252.
Full explanation here at this dedicated website Debugging Chart Mapping Windows-1252 Characters to UTF-8 Bytes to Latin-1 Characters
Below is C# source code for mapping Windows-1252 to UTF-8 array that captures the affected character to code page ending at 0xffff. There are more after that, but i only needed 1 width character. |
Update Tue 11-Jun-19
Turns out it's an error in WinForms itself. Here's how to get HTML data from the clipboard directly.using System; using System.Runtime.InteropServices; using System.Text; //-------------------------------------------------------------------------------- http://metadataconsulting.blogspot.com/2019/06/How-to-get-HTML-from-the-Windows-system-clipboard-directly-using-PInvoke-Win32-Native-methods-avoiding-bad-funny-characters.html //-------------------------------------------------------------------------------- public class ClipboardHelper { #region Win32 Native PInvoke [DllImport("User32.dll", SetLastError = true)] private static extern uint RegisterClipboardFormat(string lpszFormat); //or specifically - private static extern uint RegisterClipboardFormatA(string lpszFormat); [DllImport("User32.dll", SetLastError = true)] [return: MarshalAs(UnmanagedType.Bool)] private static extern bool IsClipboardFormatAvailable(uint format); [DllImport("User32.dll", SetLastError = true)] private static extern IntPtr GetClipboardData(uint uFormat); [DllImport("User32.dll", SetLastError = true)] [return: MarshalAs(UnmanagedType.Bool)] private static extern bool OpenClipboard(IntPtr hWndNewOwner); [DllImport("User32.dll", SetLastError = true)] [return: MarshalAs(UnmanagedType.Bool)] private static extern bool CloseClipboard(); [DllImport("Kernel32.dll", SetLastError = true)] private static extern IntPtr GlobalLock(IntPtr hMem); [DllImport("Kernel32.dll", SetLastError = true)] [return: MarshalAs(UnmanagedType.Bool)] private static extern bool GlobalUnlock(IntPtr hMem); [DllImport("Kernel32.dll", SetLastError = true)] private static extern int GlobalSize(IntPtr hMem); #endregion public static string GetHTMLWin32Native() { string strHTMLUTF8 = string.Empty; uint CF_HTML = RegisterClipboardFormatA("HTML Format"); if (CF_HTML != null || CF_HTML == 0) return null; if (!IsClipboardFormatAvailable(CF_HTML)) return null; try { if (!OpenClipboard(IntPtr.Zero)) return null; IntPtr handle = GetClipboardData(CF_HTML); if (handle == IntPtr.Zero) return null; IntPtr pointer = IntPtr.Zero; try { pointer = GlobalLock(handle); if (pointer == IntPtr.Zero) return null; uint size = GlobalSize(handle); byte[] buff = new byte[size]; Marshal.Copy(pointer, buff, 0, (int)size); strHTMLUTF8 = System.Text.Encoding.UTF8.GetString(buff); } finally { if (pointer != IntPtr.Zero) GlobalUnlock(handle); } } finally { CloseClipboard(); } return strHTMLUTF8; } }
My old desperate solution;
{'\u0081' , "\u00c2\u0081" }, //1. UTF-8 chr='' -> win1252 chr='Â'. {'\u008d' , "\u00c2\u008d" }, //2. UTF-8 chr='' -> win1252 chr='Â'. {'\u008f' , "\u00c2\u008f" }, //3. UTF-8 chr='' -> win1252 chr='Â'. {'\u0090' , "\u00c2\u0090" }, //4. UTF-8 chr='' -> win1252 chr='Â'. {'\u009d' , "\u00c2\u009d" }, //5. UTF-8 chr='' -> win1252 chr='Â'. {'\u00a0' , "\u00c2\u00a0" }, //6. UTF-8 chr=' ' -> win1252 chr=' '. {'\u00a1' , "\u00c2\u00a1" }, //7. UTF-8 chr='¡' -> win1252 chr='¡'. {'\u00a2' , "\u00c2\u00a2" }, //8. UTF-8 chr='¢' -> win1252 chr='¢'. {'\u00a3' , "\u00c2\u00a3" }, //9. UTF-8 chr='£' -> win1252 chr='£'. {'\u00a4' , "\u00c2\u00a4" }, //10. UTF-8 chr='¤' -> win1252 chr='¤'. {'\u00a5' , "\u00c2\u00a5" }, //11. UTF-8 chr='¥' -> win1252 chr='Â¥'. {'\u00a6' , "\u00c2\u00a6" }, //12. UTF-8 chr='¦' -> win1252 chr='¦'. {'\u00a7' , "\u00c2\u00a7" }, //13. UTF-8 chr='§' -> win1252 chr='§'. {'\u00a8' , "\u00c2\u00a8" }, //14. UTF-8 chr='¨' -> win1252 chr='¨'. {'\u00a9' , "\u00c2\u00a9" }, //15. UTF-8 chr='©' -> win1252 chr='©'. {'\u00aa' , "\u00c2\u00aa" }, //16. UTF-8 chr='ª' -> win1252 chr='ª'. {'\u00ab' , "\u00c2\u00ab" }, //17. UTF-8 chr='«' -> win1252 chr='«'. {'\u00ac' , "\u00c2\u00ac" }, //18. UTF-8 chr='¬' -> win1252 chr='¬'. {'\u00ad' , "\u00c2\u00ad" }, //19. UTF-8 chr='' -> win1252 chr='Â'. {'\u00ae' , "\u00c2\u00ae" }, //20. UTF-8 chr='®' -> win1252 chr='®'. {'\u00af' , "\u00c2\u00af" }, //21. UTF-8 chr='¯' -> win1252 chr='¯'. {'\u00b0' , "\u00c2\u00b0" }, //22. UTF-8 chr='°' -> win1252 chr='°'. {'\u00b1' , "\u00c2\u00b1" }, //23. UTF-8 chr='±' -> win1252 chr='±'. {'\u00b2' , "\u00c2\u00b2" }, //24. UTF-8 chr='²' -> win1252 chr='²'. {'\u00b3' , "\u00c2\u00b3" }, //25. UTF-8 chr='³' -> win1252 chr='³'. {'\u00b4' , "\u00c2\u00b4" }, //26. UTF-8 chr='´' -> win1252 chr='´'. {'\u00b5' , "\u00c2\u00b5" }, //27. UTF-8 chr='µ' -> win1252 chr='µ'. {'\u00b6' , "\u00c2\u00b6" }, //28. UTF-8 chr='¶' -> win1252 chr='¶'. {'\u00b7' , "\u00c2\u00b7" }, //29. UTF-8 chr='·' -> win1252 chr='·'. {'\u00b8' , "\u00c2\u00b8" }, //30. UTF-8 chr='¸' -> win1252 chr='¸'. {'\u00b9' , "\u00c2\u00b9" }, //31. UTF-8 chr='¹' -> win1252 chr='¹'. {'\u00ba' , "\u00c2\u00ba" }, //32. UTF-8 chr='º' -> win1252 chr='º'. {'\u00bb' , "\u00c2\u00bb" }, //33. UTF-8 chr='»' -> win1252 chr='»'. {'\u00bc' , "\u00c2\u00bc" }, //34. UTF-8 chr='¼' -> win1252 chr='¼'. {'\u00bd' , "\u00c2\u00bd" }, //35. UTF-8 chr='½' -> win1252 chr='½'. {'\u00be' , "\u00c2\u00be" }, //36. UTF-8 chr='¾' -> win1252 chr='¾'. {'\u00bf' , "\u00c2\u00bf" }, //37. UTF-8 chr='¿' -> win1252 chr='¿'. {'\u00c0' , "\u00c3\u0080" }, //38. UTF-8 chr='À' -> win1252 chr='Ã'. {'\u00c1' , "\u00c3\u0081" }, //39. UTF-8 chr='Á' -> win1252 chr='Ã'. {'\u00c2' , "\u00c3\u0082" }, //40. UTF-8 chr='Â' -> win1252 chr='Ã'. {'\u00c3' , "\u00c3\u0083" }, //41. UTF-8 chr='Ã' -> win1252 chr='Ã'. {'\u00c4' , "\u00c3\u0084" }, //42. UTF-8 chr='Ä' -> win1252 chr='Ã'. {'\u00c5' , "\u00c3\u0085" }, //43. UTF-8 chr='Å' -> win1252 chr='à '. {'\u00c6' , "\u00c3\u0086" }, //44. UTF-8 chr='Æ' -> win1252 chr='Ã'. {'\u00c7' , "\u00c3\u0087" }, //45. UTF-8 chr='Ç' -> win1252 chr='Ã'. {'\u00c8' , "\u00c3\u0088" }, //46. UTF-8 chr='È' -> win1252 chr='Ã'. {'\u00c9' , "\u00c3\u0089" }, //47. UTF-8 chr='É' -> win1252 chr='Ã'. {'\u00ca' , "\u00c3\u008a" }, //48. UTF-8 chr='Ê' -> win1252 chr='Ã'. {'\u00cb' , "\u00c3\u008b" }, //49. UTF-8 chr='Ë' -> win1252 chr='Ã'. {'\u00cc' , "\u00c3\u008c" }, //50. UTF-8 chr='Ì' -> win1252 chr='Ã'. {'\u00cd' , "\u00c3\u008d" }, //51. UTF-8 chr='Í' -> win1252 chr='Ã'. {'\u00ce' , "\u00c3\u008e" }, //52. UTF-8 chr='Î' -> win1252 chr='Ã'. {'\u00cf' , "\u00c3\u008f" }, //53. UTF-8 chr='Ï' -> win1252 chr='Ã'. {'\u00d0' , "\u00c3\u0090" }, //54. UTF-8 chr='Ð' -> win1252 chr='Ã'. {'\u00d1' , "\u00c3\u0091" }, //55. UTF-8 chr='Ñ' -> win1252 chr='Ã'. {'\u00d2' , "\u00c3\u0092" }, //56. UTF-8 chr='Ò' -> win1252 chr='Ã'. {'\u00d3' , "\u00c3\u0093" }, //57. UTF-8 chr='Ó' -> win1252 chr='Ã'. {'\u00d4' , "\u00c3\u0094" }, //58. UTF-8 chr='Ô' -> win1252 chr='Ã'. {'\u00d5' , "\u00c3\u0095" }, //59. UTF-8 chr='Õ' -> win1252 chr='Ã'. {'\u00d6' , "\u00c3\u0096" }, //60. UTF-8 chr='Ö' -> win1252 chr='Ã'. {'\u00d7' , "\u00c3\u0097" }, //61. UTF-8 chr='×' -> win1252 chr='Ã'. {'\u00d8' , "\u00c3\u0098" }, //62. UTF-8 chr='Ø' -> win1252 chr='Ã'. {'\u00d9' , "\u00c3\u0099" }, //63. UTF-8 chr='Ù' -> win1252 chr='Ã'. {'\u00da' , "\u00c3\u009a" }, //64. UTF-8 chr='Ú' -> win1252 chr='Ã'. {'\u00db' , "\u00c3\u009b" }, //65. UTF-8 chr='Û' -> win1252 chr='Ã'. {'\u00dc' , "\u00c3\u009c" }, //66. UTF-8 chr='Ü' -> win1252 chr='Ã'. {'\u00dd' , "\u00c3\u009d" }, //67. UTF-8 chr='Ý' -> win1252 chr='Ã'. {'\u00de' , "\u00c3\u009e" }, //68. UTF-8 chr='Þ' -> win1252 chr='Ã'. {'\u00df' , "\u00c3\u009f" }, //69. UTF-8 chr='ß' -> win1252 chr='Ã'. {'\u00e0' , "\u00c3\u00a0" }, //70. UTF-8 chr='à' -> win1252 chr='à '. {'\u00e1' , "\u00c3\u00a1" }, //71. UTF-8 chr='á' -> win1252 chr='á'. {'\u00e2' , "\u00c3\u00a2" }, //72. UTF-8 chr='â' -> win1252 chr='â'. {'\u00e3' , "\u00c3\u00a3" }, //73. UTF-8 chr='ã' -> win1252 chr='ã'. {'\u00e4' , "\u00c3\u00a4" }, //74. UTF-8 chr='ä' -> win1252 chr='ä'. {'\u00e5' , "\u00c3\u00a5" }, //75. UTF-8 chr='å' -> win1252 chr='Ã¥'. {'\u00e6' , "\u00c3\u00a6" }, //76. UTF-8 chr='æ' -> win1252 chr='æ'. {'\u00e7' , "\u00c3\u00a7" }, //77. UTF-8 chr='ç' -> win1252 chr='ç'. {'\u00e8' , "\u00c3\u00a8" }, //78. UTF-8 chr='è' -> win1252 chr='è'. {'\u00e9' , "\u00c3\u00a9" }, //79. UTF-8 chr='é' -> win1252 chr='é'. {'\u00ea' , "\u00c3\u00aa" }, //80. UTF-8 chr='ê' -> win1252 chr='ê'. {'\u00eb' , "\u00c3\u00ab" }, //81. UTF-8 chr='ë' -> win1252 chr='ë'. {'\u00ec' , "\u00c3\u00ac" }, //82. UTF-8 chr='ì' -> win1252 chr='ì'. {'\u00ed' , "\u00c3\u00ad" }, //83. UTF-8 chr='í' -> win1252 chr='Ã'. {'\u00ee' , "\u00c3\u00ae" }, //84. UTF-8 chr='î' -> win1252 chr='î'. {'\u00ef' , "\u00c3\u00af" }, //85. UTF-8 chr='ï' -> win1252 chr='ï'. {'\u00f0' , "\u00c3\u00b0" }, //86. UTF-8 chr='ð' -> win1252 chr='ð'. {'\u00f1' , "\u00c3\u00b1" }, //87. UTF-8 chr='ñ' -> win1252 chr='ñ'. {'\u00f2' , "\u00c3\u00b2" }, //88. UTF-8 chr='ò' -> win1252 chr='ò'. {'\u00f3' , "\u00c3\u00b3" }, //89. UTF-8 chr='ó' -> win1252 chr='ó'. {'\u00f4' , "\u00c3\u00b4" }, //90. UTF-8 chr='ô' -> win1252 chr='ô'. {'\u00f5' , "\u00c3\u00b5" }, //91. UTF-8 chr='õ' -> win1252 chr='õ'. {'\u00f6' , "\u00c3\u00b6" }, //92. UTF-8 chr='ö' -> win1252 chr='ö'. {'\u00f7' , "\u00c3\u00b7" }, //93. UTF-8 chr='÷' -> win1252 chr='÷'. {'\u00f8' , "\u00c3\u00b8" }, //94. UTF-8 chr='ø' -> win1252 chr='ø'. {'\u00f9' , "\u00c3\u00b9" }, //95. UTF-8 chr='ù' -> win1252 chr='ù'. {'\u00fa' , "\u00c3\u00ba" }, //96. UTF-8 chr='ú' -> win1252 chr='ú'. {'\u00fb' , "\u00c3\u00bb" }, //97. UTF-8 chr='û' -> win1252 chr='û'. {'\u00fc' , "\u00c3\u00bc" }, //98. UTF-8 chr='ü' -> win1252 chr='ü'. {'\u00fd' , "\u00c3\u00bd" }, //99. UTF-8 chr='ý' -> win1252 chr='ý'. {'\u00fe' , "\u00c3\u00be" }, //100. UTF-8 chr='þ' -> win1252 chr='þ'. {'\u00ff' , "\u00c3\u00bf" }, //101. UTF-8 chr='ÿ' -> win1252 chr='ÿ'. {'\u0110' , "\u00c3\u0090" }, //102. UTF-8 chr='Đ' -> win1252 chr='Ã'. {'\u0152' , "\u00c5\u0092" }, //103. UTF-8 chr='Œ' -> win1252 chr='Å'. {'\u0153' , "\u00c5\u0093" }, //104. UTF-8 chr='œ' -> win1252 chr='Å'. {'\u0160' , "\u00c5\u00a0" }, //105. UTF-8 chr='Š' -> win1252 chr='Å '. {'\u0161' , "\u00c5\u00a1" }, //106. UTF-8 chr='š' -> win1252 chr='Å¡'. {'\u0178' , "\u00c5\u00b8" }, //107. UTF-8 chr='Ÿ' -> win1252 chr='Ÿ'. {'\u017d' , "\u00c5\u00bd" }, //108. UTF-8 chr='Ž' -> win1252 chr='Ž'. {'\u017e' , "\u00c5\u00be" }, //109. UTF-8 chr='ž' -> win1252 chr='ž'. {'\u0189' , "\u00c3\u0090" }, //110. UTF-8 chr='Ɖ' -> win1252 chr='Ã'. {'\u0191' , "\u00c6\u0092" }, //111. UTF-8 chr='Ƒ' -> win1252 chr='Æ'. {'\u0192' , "\u00c6\u0092" }, //112. UTF-8 chr='ƒ' -> win1252 chr='Æ'. {'\u02c6' , "\u00cb\u0086" }, //113. UTF-8 chr='ˆ' -> win1252 chr='Ë'. {'\u02c9' , "\u00c2\u00af" }, //114. UTF-8 chr='ˉ' -> win1252 chr='¯'. {'\u02ca' , "\u00c2\u00b4" }, //115. UTF-8 chr='ˊ' -> win1252 chr='´'. {'\u02da' , "\u00c2\u00b0" }, //116. UTF-8 chr='˚' -> win1252 chr='°'. {'\u02dc' , "\u00cb\u009c" }, //117. UTF-8 chr='˜' -> win1252 chr='Ë'. {'\u0301' , "\u00c2\u00b4" }, //118. UTF-8 chr='́' -> win1252 chr='´'. {'\u0304' , "\u00c2\u00af" }, //119. UTF-8 chr='̄' -> win1252 chr='¯'. {'\u0305' , "\u00c2\u00af" }, //120. UTF-8 chr='̅' -> win1252 chr='¯'. {'\u0308' , "\u00c2\u00a8" }, //121. UTF-8 chr='̈' -> win1252 chr='¨'. {'\u030a' , "\u00c2\u00b0" }, //122. UTF-8 chr='̊' -> win1252 chr='°'. {'\u0327' , "\u00c2\u00b8" }, //123. UTF-8 chr='̧' -> win1252 chr='¸'. {'\u03b2' , "\u00c3\u009f" }, //124. UTF-8 chr='β' -> win1252 chr='Ã'. {'\u03bc' , "\u00c2\u00b5" }, //125. UTF-8 chr='μ' -> win1252 chr='µ'. {'\u2013' , "\u00e2\u0080\u0093" }, //126. UTF-8 chr='–' -> win1252 chr='â'. {'\u2014' , "\u00e2\u0080\u0094" }, //127. UTF-8 chr='—' -> win1252 chr='â'. {'\u2018' , "\u00e2\u0080\u0098" }, //128. UTF-8 chr='‘' -> win1252 chr='â'. {'\u2019' , "\u00e2\u0080\u0099" }, //129. UTF-8 chr='’' -> win1252 chr='â'. {'\u201a' , "\u00e2\u0080\u009a" }, //130. UTF-8 chr='‚' -> win1252 chr='â'. {'\u201c' , "\u00e2\u0080\u009c" }, //131. UTF-8 chr='“' -> win1252 chr='â'. {'\u201d' , "\u00e2\u0080\u009d" }, //132. UTF-8 chr='”' -> win1252 chr='â'. {'\u201e' , "\u00e2\u0080\u009e" }, //133. UTF-8 chr='„' -> win1252 chr='â'. {'\u2020' , "\u00e2\u0080\u00a0" }, //134. UTF-8 chr='†' -> win1252 chr='â '. {'\u2021' , "\u00e2\u0080\u00a1" }, //135. UTF-8 chr='‡' -> win1252 chr='â¡'. {'\u2022' , "\u00e2\u0080\u00a2" }, //136. UTF-8 chr='•' -> win1252 chr='â¢'. {'\u2024' , "\u00c2\u00b7" }, //137. UTF-8 chr='․' -> win1252 chr='·'. {'\u2026' , "\u00e2\u0080\u00a6" }, //138. UTF-8 chr='…' -> win1252 chr='â¦'. {'\u2030' , "\u00e2\u0080\u00b0" }, //139. UTF-8 chr='‰' -> win1252 chr='â°'. {'\u2039' , "\u00e2\u0080\u00b9" }, //140. UTF-8 chr='‹' -> win1252 chr='â¹'. {'\u203a' , "\u00e2\u0080\u00ba" }, //141. UTF-8 chr='›' -> win1252 chr='âº'. {'\u2070' , "\u00c2\u00b0" }, //142. UTF-8 chr='⁰' -> win1252 chr='°'. {'\u20a1' , "\u00c2\u00a2" }, //143. UTF-8 chr='₡' -> win1252 chr='¢'. {'\u20a4' , "\u00c2\u00a3" }, //144. UTF-8 chr='₤' -> win1252 chr='£'. {'\u20ac' , "\u00e2\u0082\u00ac" }, //145. UTF-8 chr='€' -> win1252 chr='â¬'. {'\u2122' , "\u00e2\u0084\u00a2" }, //146. UTF-8 chr='™' -> win1252 chr='â¢'. {'\u212b' , "\u00c3\u0085" }, //147. UTF-8 chr='Å' -> win1252 chr='à '. {'\u2205' , "\u00c3\u0098" }, //148. UTF-8 chr='∅' -> win1252 chr='Ã'. {'\u2213' , "\u00c2\u00b1" }, //149. UTF-8 chr='∓' -> win1252 chr='±'. {'\u2218' , "\u00c2\u00b0" }, //150. UTF-8 chr='∘' -> win1252 chr='°'. {'\u2219' , "\u00c2\u00b7" }, //151. UTF-8 chr='∙' -> win1252 chr='·'. {'\u2248' , "\u00cb\u009c" }, //152. UTF-8 chr='≈' -> win1252 chr='Ë'. {'\u226a' , "\u00c2\u00ab" }, //153. UTF-8 chr='≪' -> win1252 chr='«'. {'\u226b' , "\u00c2\u00bb" }, //154. UTF-8 chr='≫' -> win1252 chr='»'. {'\u22c5' , "\u00c2\u00b7" }, //155. UTF-8 chr='⋅' -> win1252 chr='·'. {'\u2302' , "\u00c2\u00a6" }, //156. UTF-8 chr='⌂' -> win1252 chr='¦'. {'\u2310' , "\u00c2\u00ac" }, //157. UTF-8 chr='⌐' -> win1252 chr='¬'. {'\u2502' , "\u00c2\u00a6" }, //158. UTF-8 chr='│' -> win1252 chr='¦'. {'\u2524' , "\u00c2\u00a6" }, //159. UTF-8 chr='┤' -> win1252 chr='¦'. {'\u2551' , "\u00c2\u00a6" }, //160. UTF-8 chr='║' -> win1252 chr='¦'. {'\u255e' , "\u00c2\u00a6" }, //161. UTF-8 chr='╞' -> win1252 chr='¦'. {'\u255f' , "\u00c2\u00a6" }, //162. UTF-8 chr='╟' -> win1252 chr='¦'. {'\u2560' , "\u00c2\u00a6" }, //163. UTF-8 chr='╠' -> win1252 chr='¦'. {'\u2561' , "\u00c2\u00a6" }, //164. UTF-8 chr='╡' -> win1252 chr='¦'. {'\u2562' , "\u00c2\u00a6" }, //165. UTF-8 chr='╢' -> win1252 chr='¦'. {'\u2563' , "\u00c2\u00a6" }, //166. UTF-8 chr='╣' -> win1252 chr='¦'. {'\u2580' , "\u00c2\u00af" }, //167. UTF-8 chr='▀' -> win1252 chr='¯'. {'\u2588' , "\u00c2\u00a6" }, //168. UTF-8 chr='█' -> win1252 chr='¦'. {'\u258c' , "\u00c2\u00a6" }, //169. UTF-8 chr='▌' -> win1252 chr='¦'. {'\u2590' , "\u00c2\u00a6" }, //170. UTF-8 chr='▐' -> win1252 chr='¦'. {'\u2591' , "\u00c2\u00a6" }, //171. UTF-8 chr='░' -> win1252 chr='¦'. {'\u2592' , "\u00c2\u00a6" }, //172. UTF-8 chr='▒' -> win1252 chr='¦'. {'\u2593' , "\u00c2\u00a6" }, //173. UTF-8 chr='▓' -> win1252 chr='¦'. {'\u25a0' , "\u00c2\u00a6" }, //174. UTF-8 chr='■' -> win1252 chr='¦'. {'\u263c' , "\u00c2\u00a4" }, //175. UTF-8 chr='☼' -> win1252 chr='¤'. {'\u300a' , "\u00c2\u00ab" }, //176. UTF-8 chr='《' -> win1252 chr='«'. {'\u300b' , "\u00c2\u00bb" }, //177. UTF-8 chr='》' -> win1252 chr='»'. {'\u30fb' , "\u00c2\u00b7" }, //178. UTF-8 chr='・' -> win1252 chr='·'.