Thursday, February 1, 2018

C# .NET HTML Agility Pack - Get HTML Table Column Counts




How to count table columns and rows using HTML Agility Pack. 




Code in-case above JIT service fails.


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
using System;
using HtmlAgilityPack;
using System.Xml;
using System.Collections.Generic;
using System.Linq;
     
public class Program
{
 public static void Main()
 {
    
  string html ="<table style='box-sizing: border-box; border-collapse: collapse; border-spacing: 0px; max-width: 100%; position: relative; table-layout: fixed; width: 2034px; z-index: 1; color: rgb(0, 0, 0); font-family: Lato, Roboto, Arial, Tahoma, sans-serif; font-size: 10px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration-style: initial; text-decoration-color: initial; border-top: 0px none; margin-left: 0px; margin-right: 0px; margin-top: 0px; margin-bottom: 1.5em'> <caption style='box-sizing: border-box'>Most Popular Content Management Systems</caption> <thead style='box-sizing: border-box'> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> CMS</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Usage *</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Change Since Jan 1</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Market Share *</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Change Since Jan 1</th> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Type</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Percentage</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Date</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Percentage</th> <th style='box-sizing: border-box; font-weight: bold; text-align: left; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px; background: rgba(0 0 0.05)'> Change Since Jan 1</th> </tr> </thead> <tfoot style='box-sizing: border-box'> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Totals</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 33.3%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'>&nbsp;</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 76%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'>&nbsp;</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td colspan='5' style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> *&nbsp;<strong style='box-sizing: border-box; font-weight: bold'>Usage</strong>&nbsp;is percentage of surveyed websites that use the associated CMS.&nbsp;<strong style='box-sizing: border-box; font-weight: bold'>Market Share</strong>&nbsp;is the percentage of surveyed websites powered by a CMS that use the associated CMS. For example, 25.8% of all surveyed websites use WordPress, and WordPress commands 59.1% market share of the total CMS market.</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td colspan='5' style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> The data in this table is provided courtesy of&nbsp;<a target='_blank' style='box-sizing: border-box; color: rgb(63, 136, 197); text-decoration: none; transition: all 0.2s ease-in-out; background:' href='http://w3techs.com/'>W3Techs</a>&nbsp;and was captured in February 2016. To learn more about this topic visit the&nbsp;<a target='_blank' style='box-sizing: border-box; color: rgb(63, 136, 197); text-decoration: none; transition: all 0.2s ease-in-out; background:' href='http://w3techs.com/technologies/overview/content_management/all'>overview of content management systems</a>&nbsp;from W3Techs.</td> </tr> </tfoot> <tbody style='box-sizing: border-box'> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> WordPress</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 25.8%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.2%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 59.1%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.3%</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Joomla</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.8%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> No Change</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 6.4%</td> <td rowspan='4' style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> No Change</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Drupal</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.2%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.1%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 4.9%</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Magento</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 1.3%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> +0.1%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.9%</td> </tr> <tr style='box-sizing: border-box; border-bottom: 1px solid rgb(240, 240, 240)'> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> Blogger</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 1.2%</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> No Change</td> <td style='box-sizing: border-box; font-family: inherit; font-size: inherit; vertical-align: middle; border-bottom: 0.05) solid rgba(0; padding: 12px'> 2.7%</td> </tr> </tbody> </table>";  
  
  HtmlDocument htmlDoc = new HtmlDocument();
  
  htmlDoc.LoadHtml(html);
  
  var table = htmlDoc.DocumentNode.SelectSingleNode("//table[1]"); //select first table
 
                int columnUpperBound = 0;
  HtmlNodeCollection rows = table.SelectNodes("//tr"); //select are rows in first table
  
  int colCnt = 0;
  int x=1;
                int y=1;  
  foreach (HtmlNode row in rows)
  {
   colCnt = 0;
  
   foreach (HtmlNode cell in row.ChildNodes) 
          if (cell.Name == "td")
   {
          colCnt++;
                               Console.WriteLine("  "+(y++).ToString()+ ". "+cell.OuterHtml); 
    
   }
     
   y=1; 
   Console.WriteLine("row "+(x++).ToString()+ ". Column count="+colCnt);
   columnUpperBound = Math.Max(colCnt, columnUpperBound);
            
  }
  
  Console.WriteLine("Final column Count="+columnUpperBound);
 }
}

No comments:

Post a Comment