package guang.crawler.crawlWorker.daemon; import guang.crawler.centerConfig.CenterConfig; import guang.crawler.centerConfig.siteManagers.SiteManagerInfo; import guang.crawler.commons.WebURL; import guang.crawler.connector.JSONServerConnector; import guang.crawler.jsonServer.DataPacket; import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map.Entry; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import com.alibaba.fastjson.JSON; /** * 由于站点管理器随时都有可能增加或者减少,因此应当及时的更新站点管理器的列表,从而有效的获取URL并处理.这里的方案是监听中央配置器中的相应节点, * 在这些节点发生变化时得到通知,从而做出相应的调整. * * @author sun * */ public class SiteManagerConnectorManager implements Watcher, Runnable { /** * 当前类的单例 */ private static SiteManagerConnectorManager connectorManager; /** * 获取单例 * * @return */ public static SiteManagerConnectorManager me() { if (SiteManagerConnectorManager.connectorManager == null) { SiteManagerConnectorManager.connectorManager = new SiteManagerConnectorManager(); } return SiteManagerConnectorManager.connectorManager; } /** * 缓存的有效的站点管理器的连接 */ private HashMap<String, JSONServerConnector> connectors; /** * 中央配置器 */ private CenterConfig centerConfig; /** * 缓存的有效站点管理器的连接的遍历器 */ private Iterator<Entry<String, JSONServerConnector>> connectorIterator; /** * 事件发生的时间 */ private Date eventTime = new Date(); /** * 管理线程 */ private Thread managerThread; /** * 当前线程是否需要被关闭了 */ private boolean shutdown = false; private SiteManagerConnectorManager() { this.centerConfig = CenterConfig.me(); this.connectors = new HashMap<String, JSONServerConnector>(); } /** * 结束线程 * * @throws IOException */ public void exit() throws IOException { this.shutdown = true; } /** * 获取当前缓存的连接的数量 * * @return */ public int getSiteManagerConnectorSize() { synchronized (this.connectors) { return this.connectors.size(); } } /** * 获取一个URL * * @return 返回null,表示获取失败,应当再次获取;返回非null表示成功获取了URL;如果当前没有可以使用的连接,那么直接阻塞。 * @throws InterruptedException */ public WebURL getURL() throws InterruptedException { // 轮询方式获取下一个可用的站点管理器的连接器 JSONServerConnector connector = null; synchronized (this.connectors) { while (this.connectors.size() == 0) { this.connectors.wait(); } if ((this.connectorIterator == null) || !this.connectorIterator.hasNext()) { this.connectorIterator = this.connectors.entrySet() .iterator(); } connector = this.connectorIterator.next() .getValue(); } if (connector == null) { return null; } // 向站点管理器发送请求,获取一个URL DataPacket data = new DataPacket("/url/get", null, null); HashMap<String, String> requestData = new HashMap<String, String>(); requestData.put("COUNT", "1"); data.setData(requestData); DataPacket result = null; boolean success = connector.open(); if (success) { try { connector.send(data); result = connector.read(); } catch (IOException e) { result = null; } finally { connector.shutdown(); } } // 解析获取的数据并返回 if (result != null) { int count = Integer.parseInt(result.getData() .get("COUNT")); if (count > 0) { String url = result.getData() .get("URL_LIST" + 0); return JSON.parseObject(url, WebURL.class); } } return null; } /** * 初始化当前类 * * @return */ public SiteManagerConnectorManager init() { this.managerThread = new Thread(this, "SiteManagerConnectorDaemon"); this.managerThread.setDaemon(true); return this; } /** * 处理监听的事件 */ @Override public void process(final WatchedEvent event) { // 首先继续注册事件监听器 try { if (event.getPath() .equals(CenterConfig.me() .getWorkersInfo() .getOnlineWorkers() .getPath()) && (event.getType() != EventType.NodeChildrenChanged)) { CenterConfig.me() .getWorkersInfo() .getOnlineWorkers() .watchNode(this); } else if (event.getPath() .equals(CenterConfig.me() .getSiteManagersConfigInfo() .getOnlineSiteManagers()) && (event.getType() == EventType.NodeChildrenChanged)) { CenterConfig.me() .getSiteManagersConfigInfo() .getOnlineSiteManagers() .watchChildren(this); } } catch (Exception e) { return; } synchronized (this.eventTime) { this.eventTime.setTime(System.currentTimeMillis()); this.eventTime.notifyAll(); } } /** * 向站点管理器发送获取的新的URL列表 * * @param parent * @param outGoings * @throws IOException */ public void putData(final WebURL parent, final List<WebURL> outGoings) throws IOException { // 准备要发送的数据 DataPacket request = new DataPacket(); request.setTitle("/url/put"); HashMap<String, String> requestData = new HashMap<String, String>(); requestData.put("PARENT", JSON.toJSONString(parent)); int size = 0; if (outGoings != null) { size = outGoings.size(); } int sendSize = 0; if (size > 0) { for (WebURL url : outGoings) { requestData.put("URL" + sendSize++, JSON.toJSONString(url)); } } requestData.put("COUNT", String.valueOf(sendSize)); request.setData(requestData); // 获取目的站点管理器 JSONServerConnector connector = null; synchronized (this.connectors) { connector = this.connectors.get(parent.getSiteManagerId()); } // 发送 boolean success = connector.open(); if (success) { try { connector.send(request); } finally { connector.shutdown(); } } } /** * 更新连接器,更新到最新的事件发生的状态 * * @throws InterruptedException * @throws KeeperException * @throws IOException */ public void refreshConnectors() throws InterruptedException, KeeperException, IOException { synchronized (this.connectors) { this.connectors.clear(); this.connectorIterator = null; List<SiteManagerInfo> dispatchedSiteManagers = this.centerConfig.getSiteManagersConfigInfo() .getOnlineSiteManagers() .getAllDispatchedSiteManagers(); if ((dispatchedSiteManagers != null) && (dispatchedSiteManagers.size() > 0)) { for (SiteManagerInfo siteManagerInfo : dispatchedSiteManagers) { String siteManagerAddress = siteManagerInfo.getManagerAddress(); if (siteManagerAddress != null) { String[] addInfo = siteManagerAddress.split(":"); JSONServerConnector connector; connector = new JSONServerConnector(addInfo[0], Integer.parseInt(addInfo[1])); this.connectors.put(siteManagerInfo.getSiteManagerId(), connector); } } } this.connectors.notifyAll(); } } @Override public void run() { // 注册监听器 try { // 查看workers的通知信息 CenterConfig.me() .getWorkersInfo() .getOnlineWorkers() .watchNode(this); CenterConfig.me() .getSiteManagersConfigInfo() .getOnlineSiteManagers() .watchChildren(this); } catch (Exception e) { return; } while (!this.shutdown) { Date now = new Date(); try { // 更新所有的连接 this.refreshConnectors(); } catch (InterruptedException e) { return; } catch (Exception e) { e.printStackTrace(); try { Thread.sleep(1000); continue; } catch (InterruptedException e1) { return; } } // 最后检查一下是否需要继续更新 synchronized (this.eventTime) { if (now.after(this.eventTime)) { try { this.eventTime.wait(); } catch (InterruptedException e) { return; } } } } } /** * 启动管理器线程 */ public void start() { if (this.managerThread != null) { this.managerThread.start(); } } }