package us.codecraft.webmagic.pipeline; import org.apache.commons.codec.digest.DigestUtils; import org.apache.log4j.Logger; import us.codecraft.webmagic.ResultItems; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.utils.FilePersistentBase; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.Map; /** * 持久化到文件的接口。 * * @author code4crafter@gmail.com <br> * Date: 13-4-21 * Time: 下午6:28 */ public class FilePipeline extends FilePersistentBase implements Pipeline { private Logger logger = Logger.getLogger(getClass()); /** * 新建一个FilePipeline,使用默认保存路径"/data/webmagic/" */ public FilePipeline() { setPath("/data/webmagic/"); } /** * 新建一个FilePipeline * * @param path 文件保存路径 */ public FilePipeline(String path) { setPath(path); } @Override public void process(ResultItems resultItems, Task task) { String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR; try { PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".html"))); printWriter.println("url:\t" + resultItems.getRequest().getUrl()); for (Map.Entry<String, Object> entry : resultItems.getAll().entrySet()) { if (entry.getValue() instanceof Iterable) { Iterable value = (Iterable) entry.getValue(); printWriter.println(entry.getKey() + ":"); for (Object o : value) { printWriter.println(o); } } else { printWriter.println(entry.getKey() + ":\t" + entry.getValue()); } } printWriter.close(); } catch (IOException e) { logger.warn("write file error", e); } } }