源码网商城,靠谱的源码在线交易网站 我的订单 购物车 帮助

源码网商城

Java获取网络文件并插入数据库的代码

  • 时间:2020-10-28 04:32 编辑: 来源: 阅读:
  • 扫一扫,手机访问
摘要:Java获取网络文件并插入数据库的代码
获取百度的歌曲名,歌手和链接!!
[u]复制代码[/u] 代码如下:
package webTools; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import dbTools.DBTools; public class IOTOWeb { public String getHtmlContent(String htmlURL) { URL url = null; String rowContent = ""; StringBuffer htmlContent = new StringBuffer(); try { url = new URL(htmlURL); BufferedReader in = new BufferedReader(new InputStreamReader(url .openStream(), "gb2312")); while ((rowContent = in.readLine()) != null) { htmlContent.append(rowContent); } in.close(); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return htmlContent.toString(); } public List getLink(String htmlContent) { ArrayList listLink = new ArrayList(); String regex = "<td[^>]*>[\\(]*<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)[\\)]*[\\s]*</td>"; Pattern pattern = Pattern.compile(regex, Pattern.DOTALL); Matcher matcher = pattern.matcher(htmlContent); while (matcher.find()) { listLink.add(matcher.group()); } return listLink; } public List<String> getHref(String htmlContent) { String regex; List listtHref = new ArrayList(); regex = "href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))\""; Pattern pa = Pattern.compile(regex, Pattern.DOTALL); Matcher ma = pa.matcher(htmlContent); while (ma.find()) { listtHref.add(ma.group().replaceFirst("href=\"", "").replace("\"", "")); } return listtHref; } public List<String> getPerson(String htmlContent) { String regex; List list = new ArrayList(); regex = "\\(<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)\\)"; Pattern pa = Pattern.compile(regex, Pattern.DOTALL); Matcher ma = pa.matcher(htmlContent); while (ma.find()) { list.add(ma.group().replaceFirst("href=\"", "").replace("\"", "")); } return list; } public List<String> getSongName(String htmlContent) { String regex; List listPerson = new ArrayList(); regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>\\s"; Pattern pa = Pattern.compile(regex, Pattern.DOTALL); Matcher ma = pa.matcher(htmlContent); while (ma.find()) { listPerson.add(ma.group()); } return listPerson; } public String getMainContent(String htmlContent) { String regex = "<table width=\"100%\" align=\"center\" cellpadding=\"0\" cellspacing=\"0\" class=\"list\">(.*?)</table>"; StringBuffer mainContent = new StringBuffer(); Pattern pattern = Pattern.compile(regex, Pattern.DOTALL); Matcher matcher = pattern.matcher(htmlContent); while (matcher.find()) { mainContent.append(matcher.group()); } return mainContent.toString(); } public String outTag(final String s) { return s.replaceAll("<.*?>", ""); } DBTools dbTools = new DBTools(); public void getFromBaiduMap3(String htmlURL) throws Throwable { HashMap htmlContentMap = new HashMap(); String htmlContent = getHtmlContent(htmlURL); String mainContent = getMainContent(htmlContent); List listLink = getLink(mainContent); for (int j = 0; j < listLink.size(); j++) { String tdTag = listLink.get(j).toString(); List songNameList = getSongName(tdTag); String songName = outTag(songNameList.get(0).toString()); List personList = getPerson(tdTag); String songPerson = ""; if (personList.size() != 0) { for (int n = 0; n < personList.size(); n++) { // System.out.println(personList.get(n).toString()); songPerson = outTag(personList.get(n).toString()); } } else { songPerson = "无"; } // System.out.print(songNameList.get(0).toString()); List hrefList = getHref(songNameList.get(0).toString()); String songHref = hrefList.get(0).toString(); System.out.println(); String sql = "insert into song(songName,songPerson,songHref) values(?,?,?)"; ArrayList list_values = new ArrayList(); list_values.add(songName); list_values.add(songPerson); list_values.add(songHref); dbTools.update(sql, list_values); } } }
DBTools数据库链接类:
[u]复制代码[/u] 代码如下:
package dbTools; import java.util.ArrayList; import java.sql.*; public class DBTools { private PreparedStatement preparedStatement; private ResultSet resultSet; private Connection connection; public DBTools() { try { Class.forName("com.mysql.jdbc.Driver"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { connection = DriverManager.getConnection( "jdbc:mysql://localhost:3306/TestURL", "root", "zhuyi"); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public ArrayList query(String sql, ArrayList list_values) throws Throwable { ArrayList listRows = new ArrayList(); preparedStatement = connection.prepareStatement(sql); for (int i = 0; i < list_values.size(); i++) { preparedStatement.setObject(i + 1, list_values.get(i)); } resultSet = preparedStatement.executeQuery(); while (resultSet.next()) { String[] rowinfo = new String[resultSet.getMetaData() .getColumnCount()]; for (int i = 0; i < rowinfo.length; i++) { rowinfo[i] = resultSet.getString(i + 1); } listRows.add(rowinfo); } return listRows; } public void update(String sql, ArrayList list_values) throws Throwable { preparedStatement = connection.prepareStatement(sql); for (int i = 0; i < list_values.size(); i++) { preparedStatement.setObject(i + 1, list_values.get(i)); } preparedStatement.executeUpdate(); preparedStatement.close(); } }
Servlet调用:
[u]复制代码[/u] 代码如下:
package controller; import java.io.IOException; import java.io.PrintWriter; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import webTools.IOTOWeb; public class TestURL extends HttpServlet { /** * Constructor of the object. */ public TestURL() { super(); } /** * Destruction of the servlet. <br> */ public void destroy() { super.destroy(); // Just puts "destroy" string in log // Put your code here } /** * The doGet method of the servlet. <br> * * This method is called when a form has its tag value method equals to get. * * @param request * the request send by the client to the server * @param response * the response send by the server to the client * @throws ServletException * if an error occurred * @throws IOException * if an error occurred */ public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { try { IOTOWeb iotoWeb = new IOTOWeb(); iotoWeb.getFromBaiduMap3("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2"); } catch (Throwable e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * The doPost method of the servlet. <br> * * This method is called when a form has its tag value method equals to * post. * * @param request * the request send by the client to the server * @param response * the response send by the server to the client * @throws ServletException * if an error occurred * @throws IOException * if an error occurred */ public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html"); PrintWriter out = response.getWriter(); out .println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"); out.println("<HTML>"); out.println(" <HEAD><TITLE>A Servlet</TITLE></HEAD>"); out.println(" <BODY>"); out.print(" This is "); out.print(this.getClass()); out.println(", using the POST method"); out.println(" </BODY>"); out.println("</HTML>"); out.flush(); out.close(); } /** * Initialization of the servlet. <br> * * @throws ServletException * if an error occurs */ public void init() throws ServletException { // Put your code here } }
获取金书网的图书名:
[u]复制代码[/u] 代码如下:
package webTools; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import dbTools.DBTools; public class GetBook { public String getHtmlContent(String htmlURL) throws Throwable { URL url = null; String rowContent = ""; StringBuffer htmlContent = new StringBuffer(); url = new URL(htmlURL); BufferedReader in = new BufferedReader(new InputStreamReader(url .openStream(), "gb2312")); while ((rowContent = in.readLine()) != null) { htmlContent.append(rowContent); } in.close(); return htmlContent.toString(); } public String getBookName(String htmlContent) { String bookName = ""; String regex = "<span class=\"style15\">[^>]*</span>"; Pattern pattern = Pattern.compile(regex, Pattern.DOTALL); Matcher matcher = pattern.matcher(htmlContent); if (matcher.find()) { bookName = matcher.group(); } return bookName; } public String outTag(final String s) { return s.replaceAll("<.*?>", ""); } DBTools dbtools = new DBTools(); public void getFromJINSHU(String htmlURL) throws Throwable { String htmlContent = getHtmlContent(htmlURL); String bookName = outTag(getBookName(htmlContent)); if (bookName != null && !"".equals(bookName)) { System.out.println(bookName); String sql = "insert into bookinfo(bookName) values(?)"; ArrayList list_values = new ArrayList(); list_values.add(bookName); dbtools.update(sql, list_values); } } }
调用Servlet:
[u]复制代码[/u] 代码如下:
package controller; import java.io.IOException; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import webTools.GetBook; public class TestBook extends HttpServlet { /** * Constructor of the object. */ public TestBook() { super(); } /** * Destruction of the servlet. <br> */ public void destroy() { super.destroy(); // Just puts "destroy" string in log // Put your code here } /** * The doGet method of the servlet. <br> * * This method is called when a form has its tag value method equals to get. * * @param request * the request send by the client to the server * @param response * the response send by the server to the client * @throws ServletException * if an error occurred * @throws IOException * if an error occurred */ int i = 1; public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { GetBook bookinfo = new GetBook(); for (; i < 10000; i++) { String bookURL = "http://www.golden-book.com/booksinfo/12/" + i + ".html"; try { bookinfo.getFromJINSHU(bookURL); } catch (Throwable e) { i++; doPost(request, response); } } } /** * The doPost method of the servlet. <br> * * This method is called when a form has its tag value method equals to * post. * * @param request * the request send by the client to the server * @param response * the response send by the server to the client * @throws ServletException * if an error occurred * @throws IOException * if an error occurred */ public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { GetBook bookinfo = new GetBook(); for (; i < 10000; i++) { String bookURL = "http://www.golden-book.com/booksinfo/12/" + i + ".html"; try { bookinfo.getFromJINSHU(bookURL); } catch (Throwable e) { i++; doGet(request, response); } } } /** * Initialization of the servlet. <br> * * @throws ServletException * if an error occurs */ public void init() throws ServletException { // Put your code here } }
每种功能的实现方法有很多,希望各位可以交流不同的思想和方法。可以加QQ412546724。呵呵
  • 全部评论(0)
联系客服
客服电话:
400-000-3129
微信版

扫一扫进微信版
返回顶部