/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.search.paper; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apdplat.search.Tools; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author 杨尚川 */ public abstract class AbstractPaperCollector implements PaperCollector{ protected final Logger LOG = LoggerFactory.getLogger(getClass()); @Override public List<File> collect() { return collect(new Date()); } /** * 根据下载链接提取文件夹名称 * @param href 下载链接 * @return 文件夹名称 */ protected abstract String getPath(String href); /** * 根据下载链接提取文件名称 * @param href 下载链接 * @return 文件名称 */ protected abstract String getFile(String href); protected List<File> downloadPaper(List<String> hrefs){ final List<File> files = new ArrayList<>(); List<Thread> ts = new ArrayList<>(); LOG.info("报纸有"+hrefs.size()+"个版面需要下载:"); for(final String href : hrefs){ Thread t = new Thread(new Runnable(){ @Override public void run() { File file = downloadPaper(href); if(file != null){ files.add(file); } } }); t.start(); ts.add(t); } for(Thread t : ts){ try { t.join(); } catch (InterruptedException ex) { LOG.error("下载报纸出错:",ex); } } return files; } protected File downloadPaper(String href){ try{ LOG.info("下载报纸:"+href); String path = getPath(href); LOG.debug("报纸保存目录:"+path); String file = getFile(href); LOG.debug("报纸保存文件:"+file); File dir = new File(path); if(!dir.exists()){ LOG.debug("创建目录:"+dir.getAbsolutePath()); dir.mkdirs(); } File absoluteFile = new File(path, file); LOG.debug("报纸保存绝对路径:"+absoluteFile.getAbsolutePath()); Tools.copyFile(new URL(href).openStream(), absoluteFile); LOG.info("报纸下载成功:"+href); LOG.info("报纸成功保存到:"+absoluteFile.getAbsolutePath()); return absoluteFile; }catch(IOException e){ LOG.error("报纸下载失败:"+e); } return null; } protected void run() { //今天 List<File> files = collect(); int i = 1; for(File file : files){ LOG.info((i++)+" : " + file.getAbsolutePath()); } //昨天 Date date = new Date(); date.setTime(System.currentTimeMillis()-24*3600*1000); files = collect(date); i = 1; for(File file : files){ LOG.info((i++)+" : " + file.getAbsolutePath()); } //前天 date = new Date(); date.setTime(System.currentTimeMillis()-2*24*3600*1000); files = collect(date); i = 1; for(File file : files){ LOG.info((i++)+" : " + file.getAbsolutePath()); } } }