string HTML = @"<html><head><title>简单解析测试</title></head><body>
<div id='div1' title='div1'>
<table>
<tr>
<td>1</td>
<td title='cn'>cn</td>
</tr>
</table>
</div>
</body></html>";
var doc = new HtmlDocument();
doc.LoadHtml(HTML);
//输出页面标题
Console.WriteLine("页面title:"+doc.DocumentNode.SelectSingleNode("/html/head/title").InnerText);
//获取div1节点 方式1
HtmlNode divNode1 = doc.GetElementbyId("div1");
//获取div1节点 方式2
HtmlNode divNode2 = doc.DocumentNode.SelectSingleNode("//div[@id='div1']");
//判断节点1和节点2是否相同
Console.WriteLine("断节点1和节点2是否相同:" + (divNode1 == divNode2));
//获取页面所有table
HtmlNodeCollection tableCollection = doc.DocumentNode.SelectNodes("//table");
Console.WriteLine("页面table数量:"+tableCollection.Count);
//获取table下所有td并输出信息
HtmlNodeCollection tdCollection = tableCollection[0].SelectNodes("tr/td");
foreach (var td in tdCollection)
{
HtmlAttribute atr = td.Attributes["title"];
Console.WriteLine("td InnerText:" + td.InnerText + " | td title属性值:" + (atr == null ? "" : atr.Value));
}
Console.Read();
/// <summary>
/// 代理使用示例
/// </summary>
/// <param name="Url"></param>
/// <param name="type"></param>
/// <returns></returns>
public static string GetUrltoHtml(string Url, string type)
{
try
{
System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url);
WebProxy myProxy = new WebProxy("192.168.15.11", 8015);
//建议连接(代理需要身份认证,才需要用户名密码)
myProxy.Credentials = new NetworkCredential("admin", "123456");
//设置请求使用代理信息
wReq.Proxy = myProxy;
// Get the response instance.
System.Net.WebResponse wResp = wReq.GetResponse();
System.IO.Stream respStream = wResp.GetResponseStream();
// Dim reader As StreamReader = New StreamReader(respStream)
using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type)))
{
return reader.ReadToEnd();
}
}
catch (System.Exception ex)
{
//errorMsg = ex.Message;
}
return "";
}
/// <summary>
/// 获取总页数
/// </summary>
/// <returns>总页数</returns>
private static int GetTotalPage(string IPURL, string ProxyIp)
{
var doc = new HtmlDocument();
doc.LoadHtml(GetHTML(IPURL, ProxyIp));
var res = doc.DocumentNode.SelectNodes(@"//div[@class='pagination']/a");
if (res != null && res.Count > 2)
{
int page;
if (int.TryParse(res[res.Count - 2].InnerText, out page))
{
return page;
}
}
return 1;
}
/// <summary>
/// 解析每一页数据
/// </summary>
/// <param name="param"></param>
private static void DoWork(object param)
{
//参数还原
Hashtable table = param as Hashtable;
int start = Convert.ToInt32(table["start"]);
int end = Convert.ToInt32(table["end"]);
List<IPProxy> list = table["list"] as List<IPProxy>;
ProxyParam Param = table["param"] as ProxyParam;
//页面地址
string url = string.Empty;
string ip = string.Empty;
IPProxy item = null;
HtmlNodeCollection nodes = null;
HtmlNode node = null;
HtmlAttribute atr = null;
for (int i = start; i <= end; i++)
{
LogHelper.WriteLog(string.Format("开始解析,页码{0}~{1},当前页码{2}", start, end, i));
url = string.Format("{0}/{1}", Param.IPUrl, i);
var doc = new HtmlDocument();
doc.LoadHtml(GetHTML(url, Param.ProxyIp));
//获取所有数据节点tr
var trs = doc.DocumentNode.SelectNodes(@"//table[@id='ip_list']/tr");
if (trs != null && trs.Count > 1)
{
LogHelper.WriteLog(string.Format("当前页码{0},请求地址{1},共{2}条数据", i, url, trs.Count));
for (int j = 1; j < trs.Count; j++)
{
nodes = trs[j].SelectNodes("td");
if (nodes != null && nodes.Count > 9)
{
ip = nodes[2].InnerText.Trim();
if (Param.IsPingIp && !Ping(ip))
{
continue;
}
//有效的IP才添加
item = new IPProxy();
node = nodes[1].FirstChild;
if (node != null)
{
atr = node.Attributes["alt"];
if (atr != null)
{
item.Country = atr.Value.Trim();
}
}
item.IP = ip;
item.Port = nodes[3].InnerText.Trim();
item.ProxyIp = GetIP(item.IP, item.Port);
item.Position = nodes[4].InnerText.Trim();
item.Anonymity = nodes[5].InnerText.Trim();
item.Type = nodes[6].InnerText.Trim();
node = nodes[7].SelectSingleNode("div[@class='bar']");
if (node != null)
{
atr = node.Attributes["title"];
if (atr != null)
{
item.Speed = atr.Value.Trim();
}
}
node = nodes[8].SelectSingleNode("div[@class='bar']");
if (node != null)
{
atr = node.Attributes["title"];
if (atr != null)
{
item.ConnectTime = atr.Value.Trim();
}
}
item.VerifyTime = nodes[9].InnerText.Trim();
list.Add(item);
}
}
LogHelper.WriteLog(string.Format("当前页码{0},共{1}条数据", i, trs.Count));
}
LogHelper.WriteLog(string.Format("结束解析,页码{0}~{1},当前页码{2}", start, end, i));
}
}
private void button2_Click(object sender, EventArgs e)
{
string proxy = this.textBox1.Text;
RefreshIESettings(proxy);
IEProxy ie = new IEProxy(proxy);
ie.RefreshIESettings();
//MessageBox.Show(ie.RefreshIESettings().ToString());
}
#endregion
#region 取消代理IP
private void button3_Click(object sender, EventArgs e)
{
IEProxy ie = new IEProxy(null);
ie.DisableIEProxy();
}
#endregion
#region 打开网页
private void button1_Click(object sender, EventArgs e)
{
string url = txt_url.Text.Trim();
if (string.IsNullOrEmpty(url))
{
MessageBox.Show("请输入要打开的网址");
return;
}
this.webBrowser1.Navigate(url, null, null, null);
}
#endregion
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有