String htmlPage = Jsoup.connect("https://www.baidu.com").get().toString();
String url = "https://item.jd.com/11476104681.html"; 完全可以替换成 String url = "https://item.jd.com/"+skuId+".html";
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
/**
* HTTP请求工具类.
* @author LuoLong
* @since 20150513
*
*/
public class HttpClientUtils {
/**
* post方式请求.
* @param url 请求地址.
* @param params 请求参数
* @return String
*/
public static String post(String url, Map<String, String> params) {
DefaultHttpClient httpclient = new DefaultHttpClient();
String body = null;
HttpPost post = postForm(url, params);
body = invoke(httpclient, post);
httpclient.getConnectionManager().shutdown();
return body;
}
/**
* get方式请求.
* @param url 请求地址.
* @return String
*/
public static String get(String url) {
DefaultHttpClient httpclient = new DefaultHttpClient();
String body = null;
HttpGet get = new HttpGet(url);
body = invoke(httpclient, get);
httpclient.getConnectionManager().shutdown();
return body;
}
/**
* 请求方法.
* @param httpclient DefaultHttpClient.
* @param httpost 请求方式.
* @return String
*/
private static String invoke(DefaultHttpClient httpclient,
HttpUriRequest httpost) {
HttpResponse response = sendRequest(httpclient, httpost);
String body = paseResponse(response);
return body;
}
/**
*
* @param response
* @return
*/
@SuppressWarnings({ "deprecation", "unused" })
private static String paseResponse(HttpResponse response) {
HttpEntity entity = response.getEntity();
String charset = EntityUtils.getContentCharSet(entity);
String body = null;
try {
body = EntityUtils.toString(entity);
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return body;
}
private static HttpResponse sendRequest(DefaultHttpClient httpclient,
HttpUriRequest httpost) {
HttpResponse response = null;
try {
response = httpclient.execute(httpost);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
}
@SuppressWarnings("deprecation")
private static HttpPost postForm(String url, Map<String, String> params) {
HttpPost httpost = new HttpPost(url);
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
Set<String> keySet = params.keySet();
for (String key : keySet) {
nvps.add(new BasicNameValuePair(key, params.get(key)));
}
try {
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return httpost;
}
}
String content = HttpClientUtils.get(url); 或者你可以直接把页面下载到本地,然后解析此html文档获取 File input = new File(FilePath); Document doc = Jsoup.parse(input, "UTF-8", url);
</form>
<div id="m"></div>
</div>
</div>
<div id="u">
<a class="toindex" href="/" rel="external nofollow" >百度首页</a>
<a href="javascript:;" rel="external nofollow" name="tj_settingicon" class="pf">设置<i class="c-icon c-icon-triangle-down"></i></a>
<a href="https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" rel="external nofollow" rel="external nofollow" name="tj_login" class="lb" onclick="return false;">登录</a>
</div>
<div id="u1">
<a href="http://news.baidu.com" rel="external nofollow" name="tj_trnews" class="mnav">新闻</a>
<a href="http://www.hao123.com" rel="external nofollow" name="tj_trhao123" class="mnav">hao123</a>
<a href="http://map.baidu.com" rel="external nofollow" name="tj_trmap" class="mnav">地图</a>
<a href="http://v.baidu.com" rel="external nofollow" name="tj_trvideo" class="mnav">视频</a>
<a href="http://tieba.baidu.com" rel="external nofollow" name="tj_trtieba" class="mnav">贴吧</a>
<a href="http://xueshu.baidu.com" rel="external nofollow" name="tj_trxueshu" class="mnav">学术</a>
<a href="https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" rel="external nofollow" rel="external nofollow" name="tj_login" class="lb" onclick="return false;">登录</a>
<a href="http://www.baidu.com/gaoji/preferences.html" rel="external nofollow" name="tj_settingicon" class="pf">设置</a>
<a href="http://www.baidu.com/more/" rel="external nofollow" name="tj_briicon" class="bri" style="display: block;">更多产品</a>
</div>
</div>
</div>
<div class="s_tab" id="s_tab">
<b>网页</b>
<a href="http://news.baidu.com/ns?cl=2&rn=20&tn=news&word=" rel="external nofollow" wdfield="word" onmousedown="return c({'fm':'tab','tab':'news'})">新闻</a>
<a href="http://tieba.baidu.com/f?kw=&fr=wwwt" rel="external nofollow" wdfield="kw" onmousedown="return c({'fm':'tab','tab':'tieba'})">贴吧</a>
<a href="http://zhidao.baidu.com/q?ct=17&pn=0&tn=ikaslist&rn=10&word=&fr=wwwt" rel="external nofollow" wdfield="word" onmousedown="return c({'fm':'tab','tab':'zhidao'})">知道</a>
<a href="http://music.baidu.com/search?fr=ps&ie=utf-8&key=" rel="external nofollow" wdfield="key" onmousedown="return c({'fm':'tab','tab':'music'})">音乐</a>
<a href="http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=" rel="external nofollow" wdfield="word" onmousedown="return c({'fm':'tab','tab':'pic'})">图片</a>
<a href="http://v.baidu.com/v?ct=301989888&rn=20&pn=0&db=0&s=25&ie=utf-8&word=" rel="external nofollow" wdfield="word" onmousedown="return c({'fm':'tab','tab':'video'})">视频</a>
<a href="http://map.baidu.com/m?word=&fr=ps01000" rel="external nofollow" wdfield="word" onmousedown="return c({'fm':'tab','tab':'map'})">地图</a>
<a href="http://wenku.baidu.com/search?word=&lm=0&od=0&ie=utf-8" rel="external nofollow" wdfield="word" onmousedown="return c({'fm':'tab','tab':'wenku'})">文库</a>
<a href="//www.baidu.com/more/" rel="external nofollow" onmousedown="return c({'fm':'tab','tab':'more'})">更多»</a>
</div>
<div class="qrcodeCon">
<div id="qrcode">
<div class="qrcode-item qrcode-item-1">
<div class="qrcode-img"></div>
<div class="qrcode-text">
<p><b>手机百度</b></p>
</div>
</div>
</div>
</div>
<div id="ftCon">
<div class="ftCon-Wrapper">
<div id="ftConw">
<p id="lh"><a id="setf" href="//www.baidu.com/cache/sethelp/help.html" rel="external nofollow" onmousedown="return ns_c({'fm':'behs','tab':'favorites','pos':0})" target="_blank">把百度设为主页</a><a onmousedown="return ns_c({'fm':'behs','tab':'tj_about'})" href="http://home.baidu.com" rel="external nofollow" >关于百度</a><a onmousedown="return ns_c({'fm':'behs','tab':'tj_about_en'})" href="http://ir.baidu.com" rel="external nofollow" >About Baidu</a><a onmousedown="return ns_c({'fm':'behs','tab':'tj_tuiguang'})" href="http://e.baidu.com/?refer=888" rel="external nofollow" >百度推广</a></p>
<p id="cp">©2017 Baidu <a href="http://www.baidu.com/duty/" rel="external nofollow" onmousedown="return ns_c({'fm':'behs','tab':'tj_duty'})">使用百度前必读</a> <a href="http://jianyi.baidu.com/" rel="external nofollow" class="cp-feedback" onmousedown="return ns_c({'fm':'behs','tab':'tj_homefb'})">意见反馈</a> 京ICP证030173号 <i class="c-icon-icrlogo"></i> <a id="jgwab" target="_blank" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=11000002000001" rel="external nofollow" >京公网安备11000002000001号</a> <i class="c-icon-jgwablogo"></i></p>
</div>
</div>
</div>
<div id="wrapper_wrapper">
</div>
</div>
<div class="c-tips-container" id="c-tips-container"></div>
method description getElementsByClass() 通过Class属性来定位元素,获取的是所有带这个class属性的集合 getElementsByTag(); 通过标签名字来定位元素,获取的是所有带有这个标签名字的元素结合 getElementById(); 通过标签的ID来定位元素,这个是精准定位,因为页面的ID基本不会重复 getElementsByAttributeValue(); 通过属性和属性名来定位元素,获取的也是一个满足条件的集合; getElementsByAttributeValueMatching() 通过正则匹配属性
//获取页面对象
String startPage="https://www.baidu.com";
Document document = Jsoup.connect(startPage).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36").get();
//定位元素父级
Element parentElement = document.getElementById("u");
//定位具体元素
Element titleElement = parentElement.getElementsByTag("a").get(0);
//获取所需数据
String title = titleElement.text();
System.out.println(title);
String startPage="https://www.baidu.com";
Document document = Jsoup.connect(startPage).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36").get();
Element elementById = document.getElementById("qrcode");
String text = elementById.getAllElements().get(0).getAllElements().get(1).getElementsByTag("b").text();
System.out.println(text);
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有