爬车系图片
parent
1a71fa0dac
commit
304112ccf1
@ -1,13 +1,159 @@
|
||||
package cj.reptile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
/**
|
||||
* Hello world!
|
||||
*
|
||||
*/
|
||||
public class App
|
||||
{
|
||||
public static void main( String[] args )
|
||||
{
|
||||
System.out.println( "Hello World!" );
|
||||
}
|
||||
public class App {
|
||||
|
||||
public static String pp(String input) {
|
||||
// 定义正则表达式
|
||||
String regex = "\\d+";
|
||||
|
||||
// 创建Pattern对象
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
|
||||
// 创建Matcher对象
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
|
||||
// 查找匹配的内容
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group();
|
||||
return match;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public static String urlName(String filename) {
|
||||
filename = filename.replaceAll(":", "");
|
||||
filename = filename.replaceAll("\\*", "");
|
||||
filename = filename.replaceAll("\\?", "");
|
||||
filename = filename.replaceAll("<", "");
|
||||
filename = filename.replaceAll(">", "");
|
||||
filename = filename.replaceAll("|", "");
|
||||
return filename;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException, URISyntaxException {
|
||||
/*
|
||||
* String folderPath = "D:\\db"; File folder = new File(folderPath);
|
||||
*
|
||||
* if (!folder.exists()) { if (folder.mkdirs()) {
|
||||
* System.out.println("文件夹创建成功!"); } else { System.out.println("文件夹创建失败!"); } }
|
||||
* else { System.out.println("文件夹已存在!"); }
|
||||
*/
|
||||
|
||||
/*
|
||||
* if(src != null) { InputStream inputStream = getFileInputStream("https:"+src);
|
||||
* ByteArrayOutputStream bytestream = new ByteArrayOutputStream(); int ch;while
|
||||
* ((ch = inputStream.read()) != -1) { bytestream.write(ch); } String fileName =
|
||||
* BaseInfoS.getFileName(bytestream.toByteArray(),
|
||||
* src.substring(src.lastIndexOf("/") + 1)); // bytestream.toByteArray()
|
||||
* inputStream.close(); bytestream.close();
|
||||
* System.out.println("fileName:"+fileName); brand.put("brand_logo", fileName);
|
||||
* }
|
||||
*/
|
||||
|
||||
String letter = "";
|
||||
for (int a = 0; a < 26; a++) {
|
||||
char chr = (char) ((int) 'A' + a);
|
||||
letter = chr + "";
|
||||
Connection connect = Jsoup.connect("https://www.autohome.com.cn/grade/carhtml/" + letter + ".html");
|
||||
Document document = connect.get();
|
||||
String sp = "";
|
||||
Elements els = document.select("dl");
|
||||
for (int i = 0; i < els.size(); i++) {
|
||||
Element e = els.get(i);
|
||||
String pai = e.select("dt div a").text();
|
||||
String src = e.select("dt img").attr("src");
|
||||
|
||||
// Elements nodes = e.select("dd ul a");
|
||||
Elements nodes = e.select("dd h4 a");
|
||||
for (int j = 0; j < nodes.size(); j++) {
|
||||
Element s = nodes.get(j);
|
||||
// s.attr("href")
|
||||
// href="//www.autohome.com.cn/5998/#levelsource=000000000_0&pvareaid=101594"
|
||||
String f = s.text();
|
||||
|
||||
pai = urlName(pai);
|
||||
f = urlName(f);
|
||||
String path = "D:\\db\\" + pai + "\\" + f;
|
||||
|
||||
|
||||
// path = path.replaceAll(":", "");
|
||||
|
||||
|
||||
File folder = new File(path);
|
||||
|
||||
|
||||
// folder.mkdirs();
|
||||
// atk_5998
|
||||
|
||||
Connection href = Jsoup.connect("https:" + s.attr("href"));
|
||||
Document documentt = href.get();
|
||||
Elements ee = documentt.select(".pic-main a");
|
||||
String imgsrc = ee.select("img").attr("src");
|
||||
// Element ee = e.getElementById("atk_"+pp(s.attr("href")));
|
||||
// path += "\\";
|
||||
if (imgsrc != null && imgsrc.length() > 0) {
|
||||
String url="https:" +imgsrc;
|
||||
|
||||
String savePath=path;
|
||||
String filename=pai+f;
|
||||
folder.mkdirs();
|
||||
download(url,savePath,filename.hashCode()+".jpg");
|
||||
|
||||
}
|
||||
//System.out.println(imgsrc);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void download(String urlString, String savePath, String filename) throws IOException {
|
||||
System.out.println(urlString);
|
||||
System.out.println(savePath+filename);
|
||||
// 构造URL
|
||||
URL url = new URL(urlString);
|
||||
// 打开连接
|
||||
URLConnection con = url.openConnection();
|
||||
// 设置请求超时为20s
|
||||
con.setConnectTimeout(20 * 1000);
|
||||
// 文件路径不存在 则创建
|
||||
File sf = new File(savePath);
|
||||
if (!sf.exists()) {
|
||||
sf.mkdirs();
|
||||
}
|
||||
InputStream in = con.getInputStream();
|
||||
OutputStream out = new FileOutputStream(sf.getPath() + "\\" + filename);
|
||||
// 创建缓冲区
|
||||
byte[] buff = new byte[1024];
|
||||
int n;
|
||||
// 开始读取
|
||||
while ((n = in.read(buff)) >= 0) {
|
||||
out.write(buff, 0, n);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
Binary file not shown.
Loading…
Reference in New Issue