<div class="post_item"> <div class="digg"> <div class="diggit" onclick="DiggIt(3439076,120879,1)"> <span class="diggnum" id="digg_count_3439076">4</span> </div> <div class="clear"></div> <div id="digg_tip_3439076" class="digg_tip"></div> </div> <div class="post_item_body"> <h3><a class="titlelnk" href="http://www.cnblogs.com/swq6413/p/3439076.html" target="_blank">分享完整的项目工程目录结构</a></h3> <p class="post_item_summary"> <a href="http://www.cnblogs.com/swq6413/" target="_blank"><img width="48" height="48" class="pfs" src="http://pic.cnitblog.com/face/142964/20131116170946.png" alt=""/></a> 在项目开发过程中,如何有序的保存项目中的各类数据文件,建立一个分类清晰、方便管理的目录结构是非常重要的。 综合以前的项目和一些朋友的项目结构,我整理了一份我觉得还不错的项目目录结构。 在这里分享给大家,欢迎各位提出你宝贵的意见和建议。如果喜欢请“推荐”则个,感激万分!! 整个目录设置到4级子目录,实... </p> <div class="post_item_foot"> <a href="http://www.cnblogs.com/swq6413/" class="lightblue">七少爷</a> 发布于 2013-11-23 15:48 <span class="article_comment"><a href="http://www.cnblogs.com/swq6413/p/3439076.html#commentform" title="2013-11-23 16:40" class="gray"> 评论(4)</a></span><span class="article_view"><a href="http://www.cnblogs.com/swq6413/p/3439076.html" class="gray">阅读(206)</a></span></div> </div> <div class="clear"></div> </div>
class Program
{
static void Main(string[] args)
{
string content = HttpUtility.HttpGetHtml();
HttpUtility.GetArticles(content);
}
}
internal class HttpUtility
{
//默认获取第一页数据
public static string HttpGetHtml()
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.cnblogs.com/");
request.Accept = "text/plain, */*; q=0.01";
request.Method = "GET";
request.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
request.ContentLength = 0;
request.Host = "www.cnblogs.com";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Maxthon/4.1.3.5000 Chrome/26.0.1410.43 Safari/537.1";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream responStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responStream, Encoding.UTF8);
string content = reader.ReadToEnd();
return content;
}
public static List<Article> GetArticles(string htmlString)
{
List<Article> articleList = new List<Article>();
Regex regex = null;
Article article = null;
regex = new Regex("<div class=\"post_item\">(?<content>.*?)(?=<div class=\"clear\">" + @"</div>\s*</div>)",
RegexOptions.Singleline);
if (regex.IsMatch(htmlString))
{
MatchCollection aritcles = regex.Matches(htmlString);
foreach (Match item in aritcles)
{
article = new Article();
//取推荐
regex =
new Regex(
"<div class=\"digg\">.*<span.*>(?<digNum>.*)" + @"</span>" +
".*<div class=\"post_item_body\">", RegexOptions.Singleline);
article.DiggNum = regex.Match(item.Value).Groups["digNum"].Value;
//取文章标题 需要去除转义字符
regex = new Regex("<h3>(?<a>.*)</h3>", RegexOptions.Singleline);
string a = regex.Match(item.Value).Groups["a"].Value;
regex = new Regex("<a\\s.*href=\"(?<href>.*?)\".*>(?<summary>.*)</a>", RegexOptions.Singleline);
article.AritcleUrl = regex.Match(a).Groups["href"].Value;
article.AritcleTitle = regex.Match(a).Groups["summary"].Value;
//取作者图片
regex = new Regex("<a.*>(?<img><img[^>].*>)</a>", RegexOptions.Singleline);
article.AuthorImg = regex.Match(item.Value).Groups["img"].Value;
//取作者博客URL及链接的target属性
regex = new Regex("<a\\s*?href=\"(?<href>.*)\"\\s*?target=\"(?<target>.*?)\">.*</a>",
RegexOptions.Singleline);
article.AuthorUrl = regex.Match(item.Value).Groups["href"].Value;
string urlTarget = regex.Match(item.Value).Groups["target"].Value;
//取文章简介
//1 先取summary Div中所有内容
regex = new Regex("<p class=\"post_item_summary\">(?<summary>.*)</p>", RegexOptions.Singleline);
string summary = regex.Match(item.Value).Groups["summary"].Value;
//2 取简介
regex = new Regex("(?<indroduct>(?<=</a>).*)", RegexOptions.Singleline);
article.AritcleInto = regex.Match(summary).Groups["indroduct"].Value;
//取发布人与发布时间
regex =
new Regex(
"<div class=\"post_item_foot\">\\s*<a.*?>(?<publishName>.*)</a>(?<publishTime>.*)<span class=\"article_comment\">",
RegexOptions.Singleline);
article.Author = regex.Match(item.Value).Groups["publishName"].Value;
article.PublishTime = regex.Match(item.Value).Groups["publishTime"].Value.Trim();
//取评论数
regex =
new Regex(
"<span class=\"article_comment\"><a.*>(?<comment>.*)</a></span><span class=\"article_view\">",
RegexOptions.Singleline);
article.CommentNum = regex.Match(item.Value).Groups["comment"].Value;
//取阅读数
regex = new Regex("<span\\s*class=\"article_view\"><a.*>(?<readNum>.*)</a>", RegexOptions.Singleline);
article.ReadNum = regex.Match(item.Value).Groups["readNum"].Value;
articleList.Add(article);
}
}
return articleList;
}
public static string ClearSpecialTag(string htmlString)
{
string htmlStr = Regex.Replace(htmlString, "\n", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, "\t", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, "\r", "", RegexOptions.IgnoreCase);
htmlStr = Regex.Replace(htmlStr, "\"", "'", RegexOptions.IgnoreCase);
return htmlStr;
}
}
public class Article
{
/// <summary>
/// 文章标题
/// </summary>
public string AritcleTitle { get; set; }
/// <summary>
/// 文章链接
/// </summary>
public string AritcleUrl { get; set; }
/// <summary>
/// 文章简介
/// </summary>
public string AritcleInto { get; set; }
/// <summary>
/// 作者名
/// </summary>
public string Author { get; set; }
/// <summary>
/// 作者地址
/// </summary>
public string AuthorUrl { get; set; }
/// <summary>
/// 作者图片
/// </summary>
public string AuthorImg { get; set; }
/// <summary>
/// 发布时间
/// </summary>
public string PublishTime { get; set; }
/// <summary>
/// 推荐数
/// </summary>
public string DiggNum { get; set; }
/// <summary>
/// 评论数
/// </summary>
public string CommentNum { get; set; }
/// <summary>
/// 阅读数
/// </summary>
public string ReadNum { get; set; }
}
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有