How to count table columns and rows using HTML Agility Pack.
Code in-case above JIT service fails.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | using System; using HtmlAgilityPack; using System.Xml; using System.Collections.Generic; using System.Linq; public class Program { public static void Main() { string html ="<table style='box-sizing: border-box; border-collapse: collapse; border-spacing: 0px; max-width: 100%; position: relative; table-layout: fixed; width: 2034px; z-index: 1; color: rgb(0, 0, 0); font-family: Lato, Roboto, Arial, Tahoma, sans-serif; font-size: 10px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration-style: initial; text-decoration-color: initial; border-top: 0px none; margin-left: 0px; margin-right: 0px; margin-top: 0px; margin-bottom: 1.5em'> <caption style='box-sizing: border-box'>Most Popular Content Management Systems</caption> <thead style='box-sizing: border-box'> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> CMS</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Usage *</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Change Since Jan 1</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Market Share *</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Change Since Jan 1</th> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Type</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Percentage</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Date</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Percentage</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Change Since Jan 1</th> </tr> </thead> <tfoot style='box-sizing: border-box'> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Totals</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 33.3%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> </td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 76%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> </td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td colspan='5' style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> * <strong style='box-sizing: border-box; font-weight: bold'>Usage</strong> is percentage of surveyed websites that use the associated CMS. <strong style='box-sizing: border-box; font-weight: bold'>Market Share</strong> is the percentage of surveyed websites powered by a CMS that use the associated CMS. For example, 25.8% of all surveyed websites use WordPress, and WordPress commands 59.1% market share of the total CMS market.</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td colspan='5' style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> The data in this table is provided courtesy of <a target='_blank' style='box-sizing: border-box; color: rgb(63, 136, 197); text-decoration: none; transition: all 0.2s ease-in-out; background:' href='http://w3techs.com/'>W3Techs</a> and was captured in February 2016. To learn more about this topic visit the <a target='_blank' style='box-sizing: border-box; color: rgb(63, 136, 197); text-decoration: none; transition: all 0.2s ease-in-out; background:' href='http://w3techs.com/technologies/overview/content_management/all'>overview of content management systems</a> from W3Techs.</td> </tr> </tfoot> <tbody style='box-sizing: border-box'> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> WordPress</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 25.8%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.2%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 59.1%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.3%</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Joomla</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.8%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> No Change</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 6.4%</td> <td rowspan='4' style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> No Change</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Drupal</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.2%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.1%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 4.9%</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Magento</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 1.3%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.1%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.9%</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Blogger</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 1.2%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> No Change</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.7%</td> </tr> </tbody> </table>"; HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(html); var table = htmlDoc.DocumentNode.SelectSingleNode("//table[1]"); //select first table int columnUpperBound = 0; HtmlNodeCollection rows = table.SelectNodes("//tr"); //select are rows in first table int colCnt = 0; int x=1; int y=1; foreach (HtmlNode row in rows) { colCnt = 0; foreach (HtmlNode cell in row.ChildNodes) if (cell.Name == "td") { colCnt++; Console.WriteLine(" "+(y++).ToString()+ ". "+cell.OuterHtml); } y=1; Console.WriteLine("row "+(x++).ToString()+ ". Column count="+colCnt); columnUpperBound = Math.Max(colCnt, columnUpperBound); } Console.WriteLine("Final column Count="+columnUpperBound); } } |
No comments:
Post a Comment