多线程异步返回值
发布日期:2022-03-18 05:04:14 浏览次数:3 分类:技术文章

本文共 13739 字,大约阅读时间需要 45 分钟。

package com.zg.controller;import com.zg.jsoup.JsoupList;import org.apache.http.HttpEntity;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.tomcat.util.http.fileupload.IOUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.List;import java.util.UUID;import java.util.concurrent.Callable;import java.util.concurrent.FutureTask;public class ZgJsoupDemo {
public static void main1(String[] args){
try {
for (int i = 0; i < 107; i++) {
String url = "http://college.gaokao.com/schlist/p" + i + "/"; Document document = Jsoup.connect(url).get(); Elements dl1 = document.select("dl>dt>a");//.select("dl").select("dt"); File file = new File("D:/jsoupimg/uuidName.txt"); for (Element element2 : dl1) {
String href = element2.select("a[href]").attr("href"); Document doc = Jsoup.connect(href).get(); Elements college_msg = doc.getElementsByClass("college_msg"); for (Element element3 : college_msg) {
String imgs = element3.getElementsByClass("left").select("img[src]").attr("src"); CloseableHttpClient httpClient = HttpClients.createDefault(); HttpGet httpGet = new HttpGet(imgs); CloseableHttpResponse httpResponse = httpClient.execute(httpGet); HttpEntity entity = httpResponse.getEntity(); InputStream is = entity.getContent(); String imgName = "schoolImg"+UUID.randomUUID().toString().replaceAll("-", "") + imgs.substring(imgs.lastIndexOf(".")); //String uuid = UUID.randomUUID().toString().replaceAll("-", "")+imgs.substring(imgs.lastIndexOf(".")); System.out.println(imgName); FileOutputStream out1 = new FileOutputStream("D:/jsoupimg/jsoupSchoolName/" + imgName); FileOutputStream out = new FileOutputStream(file); out.write(imgName.getBytes()); //IOUtils.copy(is, out); IOUtils.copy(is, out1); is.close(); out1.close(); } } } } catch (IOException e) {
e.printStackTrace(); } } public static void main2(String[] args) {
List
jsoupLists = new ArrayList<>(); String http = "https://www.amazon.com/s?k=cigarette&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&ref=nb_sb_noss_1"; try {
Document document = Jsoup.connect(http).get(); Elements elementsByClass = document.getElementsByClass("s-main-slot"); File file = new File("E:/jsoupimg/uuidName.txt"); for (Element byClass : elementsByClass) {
Elements elementsByClass1 = byClass.getElementsByClass("sg-col-4-of-12"); for (Element element : elementsByClass1) {
Elements elementsByClass2 = element.getElementsByClass("a-spacing-medium"); //新建对象存储爬取的数据 //第一层获取image for (Element element1 : elementsByClass2) {
Elements elementsByClass3 = element1.getElementsByClass("s-image-square-aspect"); for (Element element2 : elementsByClass3) {
// Elements img = element2.getElementsByTag("img");// String src = img.attr("src").trim();// //System.out.println(img); //拿到封面图// CloseableHttpClient httpClient = HttpClients.createDefault();// HttpGet httpGet = new HttpGet(src);// CloseableHttpResponse httpResponse = httpClient.execute(httpGet);// HttpEntity entity = httpResponse.getEntity();// InputStream is = entity.getContent();// String imgName = ""+UUID.randomUUID().toString().replaceAll("-", "") + src.substring(src.lastIndexOf("."));// try {
// Thread.sleep(1000);// } catch (InterruptedException e) {
// e.printStackTrace();// }// FileOutputStream out1 = new FileOutputStream("E:/jsoupimg/jsoupName/" + imgName);// FileOutputStream out = new FileOutputStream(file);//// out.write(imgName.getBytes());// //IOUtils.copy(is, out);// IOUtils.copy(is, out1);// is.close();// out1.close(); } } //第二层第一子层,描述层 for (Element element1 : elementsByClass2) {
JsoupList jsoupList = new JsoupList(); String img = element1.getElementsByTag("img").attr("src").trim(); //System.out.println(img); jsoupList.setImage(img); Elements elementsByClass3 = element1.getElementsByClass("a-size-base-plus"); //System.out.println(elementsByClass3.text()); //拿到描述 jsoupList.setTitle(elementsByClass3.text()); Elements elementsByClass4 = element1.getElementsByClass("a-color-information"); //System.out.println(elementsByClass4.text()); //这个是部分有 Pack of Elements elementsByClass5 = element1.getElementsByClass("a-icon-alt"); //System.out.println(elementsByClass5.text()); //部分无评分 拿到评分 Elements elementsByClass6 = element1.getElementsByClass("a-offscreen"); //System.out.println(elementsByClass6.text()); //拿到商品的价格 price 美元 jsoupList.setPrice(elementsByClass6.text()); Elements elementsByClass7 = element1.getElementsByClass("s-align-children-center"); //Element span = element1.getElementsByTag("span").last();// for (Element element2 : elementsByClass7) {
// String span = element2.getElementsByTag("span").text();// System.out.println(span); //发往中国// } System.out.println("----------------------------------------------------------------------------"); jsoupLists.add(jsoupList); } } } } catch (IOException e) {
e.printStackTrace(); }// //多线程爬虫1// Callable
> infocallable1 = new Callable
>() {
// public List
call() throws Exception { //// List
element = new ArrayList<>();//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }////// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }////// return element;// }// };//// //多线程爬虫1// Callable
> infocallable2 = new Callable
>() { // public List
call() throws Exception { //// List
element = new ArrayList<>();//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }////// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }////// return element;// }// };//// //多线程爬虫1// Callable
> infocallable3 = new Callable
>() { // public List
call() throws Exception { //// List
element = new ArrayList<>();//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }////// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }//// //1,学校1// try { //// String s = "http://www.51meishu.com/artexam/news/";// Document document = Jsoup.connect(s).get();// Elements listright_ = document.getElementsByClass("listright");// for (Element element1 : listright_) { // Elements select = element1.getElementsByTag("ul").select("li");// element.add(select);// }//// } catch (IOException e) { // e.printStackTrace();// }////// return element;// }// };// FutureTask
> info1 = new FutureTask<>(infocallable1);// FutureTask
> info2 = new FutureTask<>(infocallable2);// FutureTask
> info3 = new FutureTask<>(infocallable3);//// new Thread(info1).start();// new Thread(info2).start();// new Thread(info3).start();// try { // List
integer1 = info1.get();// List
integer2 = info2.get();// List
integer3 = info2.get();//// System.out.println(integer1);// System.out.println(integer2);// System.out.println(integer3);//// } catch (Exception e) { // e.printStackTrace();// } } public static void main8(String[] args) { String lt = "https://search.rakuten.co.jp/search/mall/Ploom+tech/?p=1"; try { Document document = Jsoup.connect(lt).get(); //Elements searchresults = document.getElementsByClass("searchresultitems"); System.out.println(document); System.out.println("----------------------------------"); } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args) { String lt = "https://t.me/s/BandwagonHostNews"; try { Document document = Jsoup.connect(lt).get(); //Elements searchresults = document.getElementsByClass("searchresultitems"); System.out.println(document); System.out.println("----------------------------------"); } catch (Exception e) { e.printStackTrace(); }// 54.254.161.51:63894 }}

转载地址:https://blog.csdn.net/qq_37430469/article/details/122829293 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:Docker搭建logstash同步mysql数据到elasticsearch_EELK大数据可视化分析
下一篇:Java多线程下载文件

发表评论

最新留言

路过,博主的博客真漂亮。。
[***.116.15.85]2024年03月21日 03时22分05秒