package us.codecraft.webmagic.pipeline; import com.alibaba.fastjson.JSON; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.log4j.Logger; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.model.HasKey; import us.codecraft.webmagic.model.PageModelPipeline; import us.codecraft.webmagic.utils.FilePersistentBase; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; /** * JSON格式持久化到文件的接口。<br> * 如果持久化的文件名是乱码,请再运行的环境变量里加上LANG=zh_CN.UTF-8。<br> * * @author code4crafter@gmail.com <br> * Date: 13-4-21 * Time: 下午6:28 */ public class JsonFilePageModelPipeline extends FilePersistentBase implements PageModelPipeline { private Logger logger = Logger.getLogger(getClass()); /** * 新建一个JsonFilePageModelPipeline,使用默认保存路径"/data/webmagic/" */ public JsonFilePageModelPipeline() { setPath("/data/webmagic/"); } /** * 新建一个JsonFilePageModelPipeline * * @param path 文件保存路径 */ public JsonFilePageModelPipeline(String path) { setPath(path); } @Override public void process(Object o, Task task) { String path = this.path + "/" + task.getUUID() + "/"; try { String filename; if (o instanceof HasKey) { filename = path + ((HasKey)o).key() + ".json"; } else { filename = path + DigestUtils.md5Hex(ToStringBuilder.reflectionToString(o)) + ".json"; } PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(filename))); printWriter.write(JSON.toJSONString(o)); printWriter.close(); } catch (IOException e) { logger.warn("write file error", e); } } }