2012年2月7日 星期二

HtmlAgilityPack 抓取HTML上的資訊 asp.net c#

HtmlAgilityPack 是可以簡單抓取html上特定位置資訊。

先在project>add reference 加上HtmlAgilityPack.dll
再配合firebug 的XPATH


//apple store iphone 4s 為例
                    string str = "http://store.apple.com/hk/browse/home/shop_iphone/family/iphone/iphone4s";


 HttpWebRequest httpWebRequest = WebRequest.Create(str) as HttpWebRequest;
                try
                {
                    
                    HttpWebResponse httpWebResponse = httpWebRequest.GetResponse() as HttpWebResponse;

                    Stream stream = httpWebResponse.GetResponseStream();

                    StreamReader reader = new StreamReader(stream, Encoding.UTF8);
                    string s = reader.ReadToEnd();
                    reader.Close();
                    stream.Close();
                    httpWebResponse.Close();

                    HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

                    htmlDoc.LoadHtml(s);
                    //16GB 32GB 64GB 的運送時間XPATH
                    HtmlNode anchors = htmlDoc.DocumentNode.SelectSingleNode(
"/html/body/div[2]/div[3]/div/div[2]/div[2]/div[3]/ul/li/label/span/span[3]/span");
                    HtmlNode anchors32 = htmlDoc.DocumentNode.SelectSingleNode(
"/html/body/div[2]/div[3]/div/div[2]/div[2]/div[3]/ul/li[2]/label/span/span[3]/span");
                    HtmlNode anchors64 = htmlDoc.DocumentNode.SelectSingleNode(
"/html/body/div[2]/div[3]/div/div[2]/div[2]/div[3]/ul/li[3]/label/span/span[3]/span");

//output

txtStatus.Text += anchors.InnerText + " " + anchors32.InnerText + " " + anchors64.InnerText + " ";

                }
                catch (WebException web)
                {
                    
                    //error message
                }

沒有留言:

張貼留言