/* * Copyright (C) 2012 Sebastian Straub <sebastian-straub@gmx.net> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.nx42.wotcrawler.util; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.nx42.wotcrawler.xml.Crawler; /** * This class is used to download all relevant pages from the wot wiki and * make them available offline, so parsing errors do not force you to download * stuff again. * This method is highly recommended, as it may save a lot of bandwidth... * * @author Sebastian Straub <sebastian-straub@gmx.net> */ public class Download { private static final Logger log = LoggerFactory.getLogger(Download.class); /** the crawler, used to retrieve and generate the download urls */ protected static final Crawler cr = new Crawler(); public static final String folderLists = "lists"; public static final String folderModules = "modules"; public static final String folderTanks = "tanks"; /** * Downloads all relevant wiki pages (tank overview, modules and tanks) * to the specified folder * @param downloadFolder the folder where the html pages are stored */ public static void downloadAll(String downloadFolder) { System.out.println("Downloading tank overview pages... "); downloadTankLists(downloadFolder + "/" + folderLists); System.out.println("\nDownloading module overview pages... "); downloadModules(downloadFolder + "/" + folderModules); System.out.println("\nDownloading single tank detail pages..."); downloadTanks(downloadFolder + "/" + folderTanks); } /** * Downloads the tank overview pages to the specified folder * @param downloadFolder the folder where the html pages are stored */ public static void downloadTankLists(String downloadFolder) { downloadPages(downloadFolder, cr.getTankOverviewURLs()); } /** * Downloads the module overview pages to the specified folder * @param downloadFolder the folder where the html pages are stored */ public static void downloadModules(String downloadFolder) { downloadPages(downloadFolder, cr.getModuleOverviewURLs()); } /** * Downloads the single tank detail pages to the specified folder * @param downloadFolder the folder where the html pages are stored */ public static void downloadTanks(String downloadFolder) { downloadPages(downloadFolder, cr.getTankURLs()); } /** * Downloads the files / pages from the specified URLs to the specified folder * Converts the name so it does not contain any illegal characters such as / or \ * @param downloadFolder the folder where the files / pages are stored * @param pages the files / pages, as a list of URLs */ public static void downloadPages(String downloadFolder, List<URL> pages) { for (URL page : pages) { String fsName = Crawler.siteToFileName(page.getPath().substring(1)); try { System.out.println(String.format("Downloading page '%s' to file '%s'", page.getPath(), fsName)); if(!new File(downloadFolder).exists()) { new File(downloadFolder).mkdirs(); } downloadFile(page, downloadFolder, fsName); } catch (MalformedURLException ex) { log.error("Downloading of page failed: invalid URL", ex); } catch (FileNotFoundException ex) { log.error("Downloading of page failed: local File not found", ex); } catch (IOException ex) { log.error("Downloading of page failed: Access error", ex); } } } /** * Downloads a single file to the specified folder with the specified filename * @param url the URL from where the file is downloaded * @param folder the folder where the file is stored in * @param filename the name the file shall have * @throws FileNotFoundException thrown if the root folder does not exist * @throws IOException thrown if the destination file is locked */ protected static void downloadFile(URL url, String folder, String filename) throws FileNotFoundException, IOException { ReadableByteChannel rbc = Channels.newChannel(url.openStream()); FileOutputStream fos = new FileOutputStream(new File(folder, filename)); fos.getChannel().transferFrom(rbc, 0, 1 << 24); fos.flush(); fos.close(); rbc.close(); } }