/*
############################################################################
##
## Copyright (C) 2006-2009 University of Utah. All rights reserved.
##
## This file is part of DeepPeep.
##
## This file may be used under the terms of the GNU General Public
## License version 2.0 as published by the Free Software Foundation
## and appearing in the file LICENSE.GPL included in the packaging of
## this file. Please review the following to ensure GNU General Public
## Licensing requirements will be met:
## http://www.opensource.org/licenses/gpl-license.php
##
## If you are unsure which license is appropriate for your use (for
## instance, you are interested in developing a commercial derivative
## of DeepPeep), please contact us at deeppeep@sci.utah.edu.
##
## This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
## WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
##
############################################################################
*/
package focusedCrawler.link.backlink;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import focusedCrawler.link.LinkStorageConfig.BackSurferConfig;
import focusedCrawler.util.parser.BackLinkNeighborhood;
public class BacklinkSurfer {
private static Logger logger = LoggerFactory.getLogger(BacklinkSurfer.class);
private BacklinkApi backlinkApi;
public BacklinkSurfer(BackSurferConfig config) {
if(config.getMozAccessId() != null && config.getMozKey() != null) {
logger.info("Using backlinks from Moz API.");
this.backlinkApi = new MozBacklinkApi(config.getMozAccessId(), config.getMozKey());
}
else {
logger.info("Using backlinks from Google.");
this.backlinkApi = new GoogleBacklinkApi();
}
}
public BackLinkNeighborhood[] getLNBacklinks(URL url) throws MalformedURLException, IOException {
waitTimeLimitIfNecessary();
String urlNoProtocol = URLEncoder.encode(url.toString().substring(7), "UTF-8");
BackLinkNeighborhood[] links = backlinkApi.downloadBacklinks(urlNoProtocol);
if (links != null) {
logger.info("Found {} backlinks.", links.length);
for (int i = 0; i < links.length; i++)
logger.info(links[i].getLink());
}
return links;
}
private long lastVisit = 0;
private void waitTimeLimitIfNecessary() {
if (lastVisit == 0) {
lastVisit = System.currentTimeMillis();
} else {
long diffTime = System.currentTimeMillis() - lastVisit;
if (diffTime < 10000) {
try {
logger.info("Waiting time limit to download backlinks.");
Thread.sleep(diffTime);
} catch (InterruptedException e) {
logger.warn("Interrupted while waiting for time limit");
}
}
}
lastVisit = System.currentTimeMillis();
}
}