package xyz.anduo.crawler; import java.util.Map.Entry; import com.sleepycat.bind.EntryBinding; import com.sleepycat.bind.serial.SerialBinding; import com.sleepycat.collections.StoredMap; @SuppressWarnings({"rawtypes", "unchecked"}) public class BDBFrontier extends AbstractFrontier implements Frontier { private StoredMap pendingUrisDB = null; public BDBFrontier(String homeDirectory) { super(homeDirectory); EntryBinding keyBinding = new SerialBinding(javaCatalog, String.class); EntryBinding valueBinding = new SerialBinding(javaCatalog, CrawlUrl.class); pendingUrisDB = new StoredMap(database, keyBinding, valueBinding, true); } /** * 获取下一条记录 */ public CrawlUrl getNext() throws Exception { CrawlUrl result = null; if (!pendingUrisDB.isEmpty()) { Entry<String, CrawlUrl> entry = (Entry<String, CrawlUrl>) pendingUrisDB.entrySet().iterator().next(); result = entry.getValue(); delete(entry.getKey()); } return result; } /** * 存入url * * @param url * @return * @throws Exception */ public boolean putUrl(CrawlUrl url) throws Exception { put(url.getOriUrl(), url); return true; } /** * 存入数据库的方法 * * @param key * @param value */ @Override protected void put(Object key, Object value) { pendingUrisDB.put(key, value); } /** * 取出 * * @param key * @return */ @Override protected Object get(Object key) { return pendingUrisDB.get(key); } @Override protected Object delete(Object key) { return pendingUrisDB.remove(key); } /** * 根据url计算键值 ,可以使用各种压缩算法,包括MD5等压缩算法 * * @param url * @return */ @SuppressWarnings("unused") private String caculateUrl(String url) { MD5Utils md5Utils = new MD5Utils(); return md5Utils.getMD5ofStr(url); } }