package guang.crawler.siteManager.commandlet;
import guang.crawler.centerConfig.CenterConfig;
import guang.crawler.commons.WebURL;
import guang.crawler.jsonServer.Commandlet;
import guang.crawler.jsonServer.DataPacket;
import guang.crawler.siteManager.SiteConfig;
import guang.crawler.siteManager.SiteManager;
import guang.crawler.siteManager.jobQueue.MapQueue;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import com.alibaba.fastjson.JSON;
/**
* 爬虫工作者向站点管理器请求URL,该Commandlet将负责处理该请求.
*
* @author sun
*
*/
public class URLsGetter implements Commandlet {
/**
* 属性的key,表示需要获取的数量
*/
private static final String KEY_COUNT = "COUNT";
/**
* 属性的key,表示URL列表
*/
private static final String KEY_URL_LIST = "URL_LIST";
@Override
public DataPacket doCommand(final DataPacket request) {
HashMap<String, String> data = request.getData();
if (SiteConfig.me()
.isBackupTime())// 如果当前正在进行相关文件的备份,那么就暂时不提供url了。
{
return this.noOps(data);
}
SiteManager siteManager = SiteManager.me();
MapQueue<WebURL> todoList = siteManager.getToDoTaskList();
MapQueue<WebURL> workingList = siteManager.getWorkingTaskList();
if ((todoList.getLength() == 0) && (workingList.getLength() == 0)) {
// 当前站点没有什么需要做的,那么就设置当前站点为完成状态
try {
SiteConfig.me()
.getSiteToHandle()
.setFinished(true, true);
SiteManager.me()
.stopGathering();
SiteManager.me()
.getBackuperDaemon()
.clearBackups();
CenterConfig.me()
.getSiteManagersConfigInfo()
.getOnlineSiteManagers()
.notifyChanged();
return null;
} catch (Exception e) {
return this.noOps(data);
}
}
// 如果一切正常,那么就正常的取值处理
String count = data.get(URLsGetter.KEY_COUNT);
// 这里暂时只获取一个
int num = 1;
if (count != null) {
try {
num = Integer.parseInt(count);
} catch (NumberFormatException e) {
num = 1;
}
}
List<WebURL> urls = todoList.get(num);
DataPacket result = new DataPacket();
result.setTitle("OK");
result.setData(data);
data.put(URLsGetter.KEY_COUNT, String.valueOf(urls.size()));
int i = 0;
long currentTime = new Date().getTime();
for (WebURL url : urls) {
url.startTime(currentTime)
.increaseTryTime();
String urlString = JSON.toJSONString(url);
data.put(URLsGetter.KEY_URL_LIST + i++, urlString);
siteManager.getWorkingTaskList()
.put(url);
}
return result;
}
private DataPacket noOps(final HashMap<String, String> data) {
DataPacket result = new DataPacket();
result.setTitle("ERROR");
result.setData(data);
data.put(URLsGetter.KEY_COUNT, "0");
return result;
}
}