爬车系图片

main
anna1795 2 years ago
parent 1a71fa0dac
commit 304112ccf1

@ -15,6 +15,13 @@
</properties>
<dependencies>
<!-- html 解析 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

@ -1,13 +1,159 @@
package cj.reptile;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
System.out.println( "Hello World!" );
}
public class App {
public static String pp(String input) {
// 定义正则表达式
String regex = "\\d+";
// 创建Pattern对象
Pattern pattern = Pattern.compile(regex);
// 创建Matcher对象
Matcher matcher = pattern.matcher(input);
// 查找匹配的内容
while (matcher.find()) {
String match = matcher.group();
return match;
}
return "";
}
public static String urlName(String filename) {
filename = filename.replaceAll(":", "");
filename = filename.replaceAll("\\*", "");
filename = filename.replaceAll("\\?", "");
filename = filename.replaceAll("<", "");
filename = filename.replaceAll(">", "");
filename = filename.replaceAll("|", "");
return filename;
}
public static void main(String[] args) throws IOException, URISyntaxException {
/*
* String folderPath = "D:\\db"; File folder = new File(folderPath);
*
* if (!folder.exists()) { if (folder.mkdirs()) {
* System.out.println("文件夹创建成功!"); } else { System.out.println("文件夹创建失败!"); } }
* else { System.out.println("文件夹已存在!"); }
*/
/*
* if(src != null) { InputStream inputStream = getFileInputStream("https:"+src);
* ByteArrayOutputStream bytestream = new ByteArrayOutputStream(); int ch;while
* ((ch = inputStream.read()) != -1) { bytestream.write(ch); } String fileName =
* BaseInfoS.getFileName(bytestream.toByteArray(),
* src.substring(src.lastIndexOf("/") + 1)); // bytestream.toByteArray()
* inputStream.close(); bytestream.close();
* System.out.println("fileName:"+fileName); brand.put("brand_logo", fileName);
* }
*/
String letter = "";
for (int a = 0; a < 26; a++) {
char chr = (char) ((int) 'A' + a);
letter = chr + "";
Connection connect = Jsoup.connect("https://www.autohome.com.cn/grade/carhtml/" + letter + ".html");
Document document = connect.get();
String sp = "";
Elements els = document.select("dl");
for (int i = 0; i < els.size(); i++) {
Element e = els.get(i);
String pai = e.select("dt div a").text();
String src = e.select("dt img").attr("src");
// Elements nodes = e.select("dd ul a");
Elements nodes = e.select("dd h4 a");
for (int j = 0; j < nodes.size(); j++) {
Element s = nodes.get(j);
// s.attr("href")
// href="//www.autohome.com.cn/5998/#levelsource=000000000_0&pvareaid=101594"
String f = s.text();
pai = urlName(pai);
f = urlName(f);
String path = "D:\\db\\" + pai + "\\" + f;
// path = path.replaceAll(":", "");
File folder = new File(path);
// folder.mkdirs();
// atk_5998
Connection href = Jsoup.connect("https:" + s.attr("href"));
Document documentt = href.get();
Elements ee = documentt.select(".pic-main a");
String imgsrc = ee.select("img").attr("src");
// Element ee = e.getElementById("atk_"+pp(s.attr("href")));
// path += "\\";
if (imgsrc != null && imgsrc.length() > 0) {
String url="https:" +imgsrc;
String savePath=path;
String filename=pai+f;
folder.mkdirs();
download(url,savePath,filename.hashCode()+".jpg");
}
//System.out.println(imgsrc);
}
}
}
}
public static void download(String urlString, String savePath, String filename) throws IOException {
System.out.println(urlString);
System.out.println(savePath+filename);
// 构造URL
URL url = new URL(urlString);
// 打开连接
URLConnection con = url.openConnection();
// 设置请求超时为20s
con.setConnectTimeout(20 * 1000);
// 文件路径不存在 则创建
File sf = new File(savePath);
if (!sf.exists()) {
sf.mkdirs();
}
InputStream in = con.getInputStream();
OutputStream out = new FileOutputStream(sf.getPath() + "\\" + filename);
// 创建缓冲区
byte[] buff = new byte[1024];
int n;
// 开始读取
while ((n = in.read(buff)) >= 0) {
out.write(buff, 0, n);
}
}
}

@ -1,5 +1,5 @@
#Generated by Maven Integration for Eclipse
#Wed Nov 29 11:52:31 CST 2023
#Wed Nov 29 14:44:11 CST 2023
m2e.projectLocation=D\:\\code\\reptile
m2e.projectName=cj-reptile
groupId=cj

@ -15,6 +15,13 @@
</properties>
<dependencies>
<!-- html 解析 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

Loading…
Cancel
Save