private void DispatchWork()
{
if (_stop) //判断是否中止下载
{
return;
}
for (int i = 0; i < _reqCount; i++)
{
if (!_reqsBusy[i]) //判断此编号的工作实例是否空闲
{
RequestResource(i); //让此工作实例请求资源
}
}
}
private bool[] _reqsBusy = null; //每个元素代表一个工作实例是否正在工作 private int _reqCount = 4; //工作实例的数量
private void RequestResource(int index)
{
int depth;
string url = "";
try
{
lock (_locker)
{
if (_urlsUnload.Count <= 0) //判断是否还有未下载的URL
{
_workingSignals.FinishWorking(index); //设置工作实例的状态为Finished
return;
}
_reqsBusy[index] = true;
_workingSignals.StartWorking(index); //设置工作状态为Working
depth = _urlsUnload.First().Value; //取出第一个未下载的URL
url = _urlsUnload.First().Key;
_urlsLoaded.Add(url, depth); //把该URL加入到已下载里
_urlsUnload.Remove(url); //把该URL从未下载中移除
}
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
req.Method = _method; //请求方法
req.Accept = _accept; //接受的内容
req.UserAgent = _userAgent; //用户代理
RequestState rs = new RequestState(req, url, depth, index); //回调方法的参数
var result = req.BeginGetResponse(new AsyncCallback(ReceivedResource), rs); //异步请求
ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle, //注册超时处理方法
TimeoutCallback, rs, _maxTime, true);
}
catch (WebException we)
{
MessageBox.Show("RequestResource " + we.Message + url + we.Status);
}
}
class RequestState
{
private const int BUFFER_SIZE = 131072; //接收数据包的空间大小
private byte[] _data = new byte[BUFFER_SIZE]; //接收数据包的buffer
private StringBuilder _sb = new StringBuilder(); //存放所有接收到的字符
public HttpWebRequest Req { get; private set; } //请求
public string Url { get; private set; } //请求的URL
public int Depth { get; private set; } //此次请求的相对深度
public int Index { get; private set; } //工作实例的编号
public Stream ResStream { get; set; } //接收数据流
public StringBuilder Html
{
get
{
return _sb;
}
}
public byte[] Data
{
get
{
return _data;
}
}
public int BufferSize
{
get
{
return BUFFER_SIZE;
}
}
public RequestState(HttpWebRequest req, string url, int depth, int index)
{
Req = req;
Url = url;
Depth = depth;
Index = index;
}
}
private void TimeoutCallback(object state, bool timedOut)
{
if (timedOut) //判断是否是超时
{
RequestState rs = state as RequestState;
if (rs != null)
{
rs.Req.Abort(); //撤销请求
}
_reqsBusy[rs.Index] = false; //重置工作状态
DispatchWork(); //分配新任务
}
}
private void ReceivedResource(IAsyncResult ar)
{
RequestState rs = (RequestState)ar.AsyncState; //得到请求时传入的参数
HttpWebRequest req = rs.Req;
string url = rs.Url;
try
{
HttpWebResponse res = (HttpWebResponse)req.EndGetResponse(ar); //获取响应
if (_stop) //判断是否中止下载
{
res.Close();
req.Abort();
return;
}
if (res != null && res.StatusCode == HttpStatusCode.OK) //判断是否成功获取响应
{
Stream resStream = res.GetResponseStream(); //得到资源流
rs.ResStream = resStream;
var result = resStream.BeginRead(rs.Data, 0, rs.BufferSize, //异步请求读取数据
new AsyncCallback(ReceivedData), rs);
}
else //响应失败
{
res.Close();
rs.Req.Abort();
_reqsBusy[rs.Index] = false; //重置工作状态
DispatchWork(); //分配新任务
}
}
catch (WebException we)
{
MessageBox.Show("ReceivedResource " + we.Message + url + we.Status);
}
}
private void ReceivedData(IAsyncResult ar)
{
RequestState rs = (RequestState)ar.AsyncState; //获取参数
HttpWebRequest req = rs.Req;
Stream resStream = rs.ResStream;
string url = rs.Url;
int depth = rs.Depth;
string html = null;
int index = rs.Index;
int read = 0;
try
{
read = resStream.EndRead(ar); //获得数据读取结果
if (_stop)//判断是否中止下载
{
rs.ResStream.Close();
req.Abort();
return;
}
if (read > 0)
{
MemoryStream ms = new MemoryStream(rs.Data, 0, read); //利用获得的数据创建内存流
StreamReader reader = new StreamReader(ms, _encoding);
string str = reader.ReadToEnd(); //读取所有字符
rs.Html.Append(str); // 添加到之前的末尾
var result = resStream.BeginRead(rs.Data, 0, rs.BufferSize, //再次异步请求读取数据
new AsyncCallback(ReceivedData), rs);
return;
}
html = rs.Html.ToString();
SaveContents(html, url); //保存到本地
string[] links = GetLinks(html); //获取页面中的链接
AddUrls(links, depth + 1); //过滤链接并添加到未下载集合中
_reqsBusy[index] = false; //重置工作状态
DispatchWork(); //分配新任务
}
catch (WebException we)
{
MessageBox.Show("ReceivedData Web " + we.Message + url + we.Status);
}
}
private void StartDownload()
{
_checkTimer = new Timer(new TimerCallback(CheckFinish), null, 0, 300);
DispatchWork();
}
private void CheckFinish(object param)
{
if (_workingSignals.IsFinished()) //检查是否所有工作实例都为Finished
{
_checkTimer.Dispose(); //停止定时器
_checkTimer = null;
if (DownloadFinish != null && _ui != null) //判断是否注册了完成事件
{
_ui.Dispatcher.Invoke(DownloadFinish, _index); //调用事件
}
}
}
public delegate void DownloadFinishHandler(int count); /// <summary> /// 全部链接下载分析完毕后触发 /// </summary> public event DownloadFinishHandler DownloadFinish = null;
private void SaveContents(string html, string url)
{
if (string.IsNullOrEmpty(html)) //判断html字符串是否有效
{
return;
}
string path = string.Format("{0}\\{1}.txt", _path, _index++); //生成文件名
try
{
using (StreamWriter fs = new StreamWriter(path))
{
fs.Write(html); //写文件
}
}
catch (IOException ioe)
{
MessageBox.Show("SaveContents IO" + ioe.Message + " path=" + path);
}
if (ContentsSaved != null)
{
_ui.Dispatcher.Invoke(ContentsSaved, path, url); //调用保存文件事件
}
}
public delegate void ContentsSavedHandler(string path, string url); /// <summary> /// 文件被保存到本地后触发 /// </summary> public event ContentsSavedHandler ContentsSaved = null;
private string[] GetLinks(string html)
{
const string pattern = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
Regex r = new Regex(pattern, RegexOptions.IgnoreCase); //新建正则模式
MatchCollection m = r.Matches(html); //获得匹配结果
string[] links = new string[m.Count];
for (int i = 0; i < m.Count; i++)
{
links[i] = m[i].ToString(); //提取出结果
}
return links;
}
//判断链接是否已经下载或者已经处于未下载集合中
private bool UrlExists(string url)
{
bool result = _urlsUnload.ContainsKey(url);
result |= _urlsLoaded.ContainsKey(url);
return result;
}
private bool UrlAvailable(string url)
{
if (UrlExists(url))
{
return false; //已经存在
}
if (url.Contains(".jpg") || url.Contains(".gif")
|| url.Contains(".png") || url.Contains(".css")
|| url.Contains(".js"))
{
return false; //去掉一些图片之类的资源
}
return true;
}
private void AddUrls(string[] urls, int depth)
{
if (depth >= _maxDepth)
{
return; //深度过大
}
foreach (string url in urls)
{
string cleanUrl = url.Trim(); //去掉前后空格
cleanUrl = cleanUrl.TrimEnd('/'); //统一去掉最后面的'/'
if (UrlAvailable(cleanUrl))
{
if (cleanUrl.Contains(_baseUrl))
{
_urlsUnload.Add(cleanUrl, depth); //是内链,直接加入未下载集合
}
else
{
// 外链处理
}
}
}
}
/// <summary>
/// 下载根Url
/// </summary>
public string RootUrl
{
get
{
return _rootUrl;
}
set
{
if (!value.Contains("http://"))
{
_rootUrl = "http://" + value;
}
else
{
_rootUrl = value;
}
_baseUrl = _rootUrl.Replace("www.", ""); //全站的话去掉www
_baseUrl = _baseUrl.Replace("http://", ""); //去掉协议名
_baseUrl = _baseUrl.TrimEnd('/'); //去掉末尾的'/'
}
}
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有