/* Copyright (c) 2014 BREDEX GmbH. All rights reserved. This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html */ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.jsoup.*; import org.jsoup.nodes.*; import org.jsoup.select.Elements; public class HtmlImageLister { public static List<File> getFilesRecursive(String path) { List<File> fileList = new ArrayList<File>(); File[] fileArray = new File(path).listFiles(); for(File f : fileArray) { if(f.isDirectory()) { fileList.addAll(getFilesRecursive(f.getPath())); } else { fileList.add(f); } } return fileList; } // Searches for images in html files and outputs the paths to these files in stdout public static void main(String[] args) { try { if(args.length == 0) { System.err.println("Please provide a path!\n Usage: java -jar HtmlImageLister.jar <path>"); System.exit(-1); } List<File> fileList = HtmlImageLister.getFilesRecursive(args[0]); Set<String> imageSet = new HashSet<>(); for(File f : fileList) { Document doc = Jsoup.parse(f, "UTF-8"); Elements els = doc.getElementsByTag("img"); for(Element el : els) { imageSet.add((f.getParentFile() + "/").concat(el.attr("src"))); } } for(String s : imageSet) { System.out.println(s); } } catch (IOException e) { e.printStackTrace(); } } }