diff --git a/pom.xml b/pom.xml index d3a1d5b..dffb9c9 100644 --- a/pom.xml +++ b/pom.xml @@ -15,6 +15,13 @@ + + + org.jsoup + jsoup + 1.10.2 + + junit junit diff --git a/src/main/java/cj/reptile/App.java b/src/main/java/cj/reptile/App.java index f9469c1..7f85aa5 100644 --- a/src/main/java/cj/reptile/App.java +++ b/src/main/java/cj/reptile/App.java @@ -1,13 +1,159 @@ package cj.reptile; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + /** * Hello world! * */ -public class App -{ - public static void main( String[] args ) - { - System.out.println( "Hello World!" ); - } +public class App { + + public static String pp(String input) { + // 定义正则表达式 + String regex = "\\d+"; + + // 创建Pattern对象 + Pattern pattern = Pattern.compile(regex); + + // 创建Matcher对象 + Matcher matcher = pattern.matcher(input); + + // 查找匹配的内容 + while (matcher.find()) { + String match = matcher.group(); + return match; + } + return ""; + } + + public static String urlName(String filename) { + filename = filename.replaceAll(":", ""); + filename = filename.replaceAll("\\*", ""); + filename = filename.replaceAll("\\?", ""); + filename = filename.replaceAll("<", ""); + filename = filename.replaceAll(">", ""); + filename = filename.replaceAll("|", ""); + return filename; + } + + public static void main(String[] args) throws IOException, URISyntaxException { + /* + * String folderPath = "D:\\db"; File folder = new File(folderPath); + * + * if (!folder.exists()) { if (folder.mkdirs()) { + * System.out.println("文件夹创建成功!"); } else { System.out.println("文件夹创建失败!"); } } + * else { System.out.println("文件夹已存在!"); } + */ + + /* + * if(src != null) { InputStream inputStream = getFileInputStream("https:"+src); + * ByteArrayOutputStream bytestream = new ByteArrayOutputStream(); int ch;while + * ((ch = inputStream.read()) != -1) { bytestream.write(ch); } String fileName = + * BaseInfoS.getFileName(bytestream.toByteArray(), + * src.substring(src.lastIndexOf("/") + 1)); // bytestream.toByteArray() + * inputStream.close(); bytestream.close(); + * System.out.println("fileName:"+fileName); brand.put("brand_logo", fileName); + * } + */ + + String letter = ""; + for (int a = 0; a < 26; a++) { + char chr = (char) ((int) 'A' + a); + letter = chr + ""; + Connection connect = Jsoup.connect("https://www.autohome.com.cn/grade/carhtml/" + letter + ".html"); + Document document = connect.get(); + String sp = ""; + Elements els = document.select("dl"); + for (int i = 0; i < els.size(); i++) { + Element e = els.get(i); + String pai = e.select("dt div a").text(); + String src = e.select("dt img").attr("src"); + + // Elements nodes = e.select("dd ul a"); + Elements nodes = e.select("dd h4 a"); + for (int j = 0; j < nodes.size(); j++) { + Element s = nodes.get(j); + // s.attr("href") + // href="//www.autohome.com.cn/5998/#levelsource=000000000_0&pvareaid=101594" + String f = s.text(); + + pai = urlName(pai); + f = urlName(f); + String path = "D:\\db\\" + pai + "\\" + f; + + + // path = path.replaceAll(":", ""); + + + File folder = new File(path); + + + // folder.mkdirs(); + // atk_5998 + + Connection href = Jsoup.connect("https:" + s.attr("href")); + Document documentt = href.get(); + Elements ee = documentt.select(".pic-main a"); + String imgsrc = ee.select("img").attr("src"); +// Element ee = e.getElementById("atk_"+pp(s.attr("href"))); +// path += "\\"; + if (imgsrc != null && imgsrc.length() > 0) { + String url="https:" +imgsrc; + + String savePath=path; + String filename=pai+f; + folder.mkdirs(); + download(url,savePath,filename.hashCode()+".jpg"); + + } + //System.out.println(imgsrc); + + + } + } + } + } + + public static void download(String urlString, String savePath, String filename) throws IOException { + System.out.println(urlString); + System.out.println(savePath+filename); + // 构造URL + URL url = new URL(urlString); + // 打开连接 + URLConnection con = url.openConnection(); + // 设置请求超时为20s + con.setConnectTimeout(20 * 1000); + // 文件路径不存在 则创建 + File sf = new File(savePath); + if (!sf.exists()) { + sf.mkdirs(); + } + InputStream in = con.getInputStream(); + OutputStream out = new FileOutputStream(sf.getPath() + "\\" + filename); + // 创建缓冲区 + byte[] buff = new byte[1024]; + int n; + // 开始读取 + while ((n = in.read(buff)) >= 0) { + out.write(buff, 0, n); + } + + } + } diff --git a/target/classes/META-INF/maven/cj/cj-reptile/pom.properties b/target/classes/META-INF/maven/cj/cj-reptile/pom.properties index 71bb959..fced898 100644 --- a/target/classes/META-INF/maven/cj/cj-reptile/pom.properties +++ b/target/classes/META-INF/maven/cj/cj-reptile/pom.properties @@ -1,5 +1,5 @@ #Generated by Maven Integration for Eclipse -#Wed Nov 29 11:52:31 CST 2023 +#Wed Nov 29 14:44:11 CST 2023 m2e.projectLocation=D\:\\code\\reptile m2e.projectName=cj-reptile groupId=cj diff --git a/target/classes/META-INF/maven/cj/cj-reptile/pom.xml b/target/classes/META-INF/maven/cj/cj-reptile/pom.xml index d3a1d5b..dffb9c9 100644 --- a/target/classes/META-INF/maven/cj/cj-reptile/pom.xml +++ b/target/classes/META-INF/maven/cj/cj-reptile/pom.xml @@ -15,6 +15,13 @@ + + + org.jsoup + jsoup + 1.10.2 + + junit junit diff --git a/target/classes/cj/reptile/App.class b/target/classes/cj/reptile/App.class index 15b3bb3..adf6b8c 100644 Binary files a/target/classes/cj/reptile/App.class and b/target/classes/cj/reptile/App.class differ