import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
/*
* 获取京东评论url列表
*/
public class MyThreading {
private static String p_id = null;
private static Url urls = null;
public MyThreading(String p_id){
this.p_id = p_id ; // 京东商品的id
urls = new Url(p_id);
}
public List<String> getUriList(){
ExecutorService executor = Executors.newCachedThreadPool();
for (int i = 0 ; i < 600 ; i ++){
executor.execute(new AddUrl(i)); // 添加任务到线程池
}
executor.shutdown();
while (!executor.isTerminated()){}
return urls.getList();
}
public static class AddUrl implements Runnable{
int page;
public AddUrl(int page){
this.page = page;
}
public void run(){
urls.addList(page); // 启动多线程任务
}
}
public static class Url {
private static Lock lock = new ReentrantLock(); // 开启显式家锁
private static List<String> urlList = new ArrayList();
private String p_id;
public Url(String p_id ){
this.p_id = p_id ;
}
public List<String> getList(){
return urlList;
}
public void addList(int page){
lock.lock();
try{
String url = "http://club.jd.com/productpage/p-" + p_id + "-s-0-t-0-p-" + String.valueOf(page) + ".html";
// Thread.sleep(5);
urlList.add(url); //添加url到url列表
}catch(Exception ex ){
}
finally {
lock.unlock(); // 解锁
}
}
}
public static void main(String[] args) {
String p_id = "2441288";
MyThreading myThreading = new MyThreading(p_id);
List <String> urlList = myThreading.getUriList();
for(String url : urlList){
System.out.println(url);
}
System.out.println(urlList.size());
}
}
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ThreadingCrawel {
private static Content content = null;
private static List<String> urlList = null;
public ThreadingCrawel(List<String> urlList){
this.urlList = urlList;
content = new Content();
}
public List<String> getContent(){
ExecutorService executor = Executors.newCachedThreadPool();
for (String url : urlList){
executor.execute(new AddContent(url));
}
executor.shutdown();
while(!executor.isTerminated()){}
return content.getContent();
}
public static class AddContent implements Runnable{
String url;
public AddContent(String url){
this.url = url;
}
public void run(){
content.addContent(url);
}
}
public static class Content {
private static Lock lock = new ReentrantLock();
private static List<String> contentList = new ArrayList();
public void addContent(String url){
String content = "";
BufferedReader in = null;
try{
URL realUrl = new URL(url);
URLConnection connection = realUrl.openConnection();
in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "gbk"));
String line;
while( (line = in.readLine()) != null){
content += line +"n";
}
}catch(Exception e){
e.printStackTrace();
}
finally{
try{
if (in != null){
in.close();
}
}catch(Exception e2){
e2.printStackTrace();
}
}
Pattern p = Pattern.compile("content":".*?"");
Matcher match = p.matcher(content);
String tmp;
lock.lock();
while(match.find()){
tmp = match.group();
tmp = tmp.replaceAll(""", "");
tmp = tmp.replace("content:", "");
tmp = tmp.replaceAll("<.*?>", "");
contentList.add(tmp);
try {
Thread.sleep(1);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
lock.unlock();
}
public List getContent(){
return contentList;
}
}
public static void main(String[] args){
long start = System.currentTimeMillis();
String p_id = "2441288";
MyThreading myThreading = new MyThreading(p_id);
List <String> urlList = myThreading.getUriList();
ThreadingCrawel threadingCrawel = new ThreadingCrawel(urlList);
List <String> contentList = threadingCrawel.getContent();
for(String content : contentList){
System.out.println(content);
}
long end = System.currentTimeMillis();
System.out.println(end - start);
}
}
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有