package focusedCrawler.link.backlink; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.Iterator; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import focusedCrawler.target.model.Page; import focusedCrawler.util.parser.BackLinkNeighborhood; public class MozBacklinkApi implements BacklinkApi { private static final ObjectMapper jsonMapper = new ObjectMapper(); private static String queryStr = "?Filter=external&Scope=page_to_page&Limit=50&Sort=page_authority&SourceCols=5&TargetCols=4&"; private String authStr; private long sleepTime = 5000; private int connectTimeout = 30000; private int readTimeout = 30000; public MozBacklinkApi(String mozAccessId, String mozKey) { MozAuthenticator auth = new MozAuthenticator(mozAccessId, mozKey, 300); this.authStr = auth.getAuthenticationStr(); } public BackLinkNeighborhood[] downloadBacklinks(String host) throws IOException { String backlink = "http://lsapi.seomoz.com/linkscape/links/" + host + queryStr + authStr; Page page = downloadPage(newURL(backlink)); if (page == null) { return null; } BackLinkNeighborhood[] backlinks = parseResponse(page.getContentAsString()); return backlinks; } private BackLinkNeighborhood[] parseResponse(String content) throws IOException, JsonProcessingException { JsonNode root = jsonMapper.readTree(content); Iterator<JsonNode> childIterator = root.elements(); int resultSize = root.size(); BackLinkNeighborhood[] backlinks = new BackLinkNeighborhood[resultSize]; for (int i = 0; i < resultSize; i++) { JsonNode jsonNode = childIterator.next(); String link = jsonNode.get("uu").asText(); String title = jsonNode.get("ut").asText(); backlinks[i] = new BackLinkNeighborhood(); backlinks[i].setLink("http://" + link); backlinks[i].setTitle(title); } return backlinks; } private Page downloadPage(URL url) throws IOException { try { Thread.sleep(sleepTime); } catch (InterruptedException e) { throw new IllegalStateException("Interrupted while waiting sleepTime in MozBacklinkApi"); } URLConnection conn = url.openConnection(); conn.setConnectTimeout(connectTimeout); conn.setReadTimeout(readTimeout); BufferedReader inCon = new BufferedReader(new InputStreamReader(conn.getInputStream())); StringBuilder buffer = new StringBuilder(); String inputLine; while ((inputLine = inCon.readLine()) != null) { buffer.append(inputLine + " "); } inCon.close(); return new Page(url, buffer.toString()); } private URL newURL(String url) throws MalformedURLException { if (url.indexOf("http://") == -1) { return new URL("http://" + url); } else { return new URL(url); } } }