如何解决使用 openCSV
我在使用 JSoup 时遇到问题,因为它给我一个格式错误的 URL 错误。如果我将 URL 硬编码到程序中,它可以正常工作,但是如果我将 csv 文件读入 List
堆栈跟踪是
Exception in thread "restartedMain" java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.springframework.boot.devtools.restart.RestartLauncher.run(RestartLauncher.java:49)
Caused by: java.lang.IllegalArgumentException: Malformed URL: http://www.clubmark.org.uk/
at org.jsoup.helper.httpconnection.url(httpconnection.java:131)
at org.jsoup.helper.httpconnection.connect(httpconnection.java:70)
at org.jsoup.Jsoup.connect(Jsoup.java:73)
at com.domainModel.DownloadImages.findImages(DownloadImages.java:43)
at com.workingprojects.WebScraperApplication.main(WebScraperApplication.java:40)
我的主要课程是
@SpringBootApplication
@EntityScan({"com.bootstrap","com.domainModel"})
@ComponentScan({"com.bootstrap","com.domainModel"})
public class WebScraperApplication {
public static void main(String[] args) throws IOException,Csvexception {
SpringApplication.run(WebScraperApplication.class,args);
DownloadImages downloadImages = new DownloadImages();
ReadCSV readCSV = new ReadCSV();
ArrayList<String[]> urls = (ArrayList<String[]>) readCSV.csvReader("C:\\link1.csv");
for (int i = 0; i < 1; i++) {
String[] thisURLObject = urls.get(0);
String thisURL =thisURLObject[0];
String status = downloadImages.findImages(thisURL,"C:\\Users\\xxx\\images");
System.out.println(thisURL + status);
}
;
System.out.println("finished");
}
}
我获取图像和发现问题的类是
package com.domainModel;
import org.jsoup.Jsoup;
public class DownloadImages {
//The url of the website.
@Getter @Setter
private String webSiteURL;
//The path of the folder that you want to save the images to
@Getter @Setter
private String folderPath;
public String findImages(String webSiteURL,String folderPath ) {
try {
//Connect to the website and get the html
Document doc = Jsoup.connect(webSiteURL).get();
//Get all elements with img tag,Elements img = doc.getElementsByTag("img");
System.out.println("Images is" + img.size());
String folderNameWk2 = webSiteURL.replace(".html","");
String folderNameWk3 = folderNameWk2.replace("http://","");
Path path = Paths.get(folderPath + folderNameWk3);
Files.createDirectories(path);
String path1 = path.toString();
System.out.println("The path is " + path1);
int counter = 0;
for (Element el : img) {
String docName = String.valueOf(counter)+".jpeg";
//for each element get the srs url
String src = el.absUrl("src");
System.out.println("Image Found!");
System.out.println("src attribute is : "+src);
getimages(src,path1,docName);
counter = counter+1;
}
} catch (IOException ex) {
System.err.println("There was an error");
System.out.println(ex);
// Logger.getLogger(DownloadImages.class.getName()).log(Level.SEVERE,null,ex);
}
return "complete";
}
private void getimages(String src,String folderPath,String docName) throws IOException {
// String folder = null;
//Exctract the name of the image from the src attribute
int indexname = src.lastIndexOf("/");
if (indexname == src.length()) {
src = src.substring(1,indexname);
}
indexname = src.lastIndexOf("/");
String name = src.substring(indexname,src.length());
System.out.println(name);
//Open a URL Stream
URL url = new URL(src);
InputStream in = url.openStream();
OutputStream out = new bufferedoutputstream(new FileOutputStream(folderPath+"/" + docName));
for (int b; (b = in.read()) != -1;) {
out.write(b);
}
out.close();
in.close();
}
/**
* @param webSiteURL
* @param folderPath
*/
public DownloadImages(String webSiteURL,String folderPath) {
super();
this.webSiteURL = webSiteURL;
this.folderPath = folderPath;
}
/**
*
*/
public DownloadImages() {
super();
}
}
And the class which gets the CSV file is
package com.domainModel;
public class ReadCSV {
public List<String[]> csvReader(String fileName) throws IOException,Csvexception{
try (CSVReader reader = new CSVReader(new FileReader(fileName))) {
List<String[]> r = reader.readAll();
return r;
}
}
}
读入 CSV 文件的班级
public class ReadCSV {
public List<String[]> csvReader(String fileName) throws IOException,Csvexception{
try (CSVReader reader = new CSVReader(new FileReader(fileName))) {
List<String[]> r = reader.readAll();
return r;
}
}
}
我有理由确定问题在于我从列表中传递的内容的格式,但是当我查看这些值时,它们肯定是字符串
csv 文件的前两行
http://www.clubmark.org.uk/,http://www.designit-uk.com/,
记事本前两行数据的图片
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。