/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.searcher; import java.io.IOException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.ipc.RPC; import org.apache.nutch.parse.ParseData; import org.apache.nutch.parse.ParseText; public class DistributedSegmentBean implements SegmentBean { private static final ExecutorService executor = Executors.newCachedThreadPool(); private final ScheduledExecutorService pingService; private class DistSummmaryTask implements Callable<Summary[]> { private int id; private HitDetails[] details; private Query query; public DistSummmaryTask(int id) { this.id = id; } public Summary[] call() throws Exception { if (details == null) { return null; } return beans[id].getSummary(details, query); } public void setSummaryArgs(HitDetails[] details, Query query) { this.details = details; this.query = query; } } private class SegmentWorker implements Runnable { private int id; public SegmentWorker(int id) { this.id = id; } public void run() { try { String[] segments = beans[id].getSegmentNames(); for (String segment : segments) { segmentMap.put(segment, id); } } catch (IOException e) { // remove all segments this bean was serving Iterator<Map.Entry<String, Integer>> i = segmentMap.entrySet().iterator(); while (i.hasNext()) { Map.Entry<String, Integer> entry = i.next(); int curId = entry.getValue(); if (curId == this.id) { i.remove(); } } } } } private long timeout; private SegmentBean[] beans; private ConcurrentMap<String, Integer> segmentMap; private List<Callable<Summary[]>> summaryTasks; private List<SegmentWorker> segmentWorkers; public DistributedSegmentBean(Configuration conf, Path serversConfig) throws IOException { this.timeout = conf.getLong("ipc.client.timeout", 60000); List<SegmentBean> beanList = new ArrayList<SegmentBean>(); List<InetSocketAddress> segmentServers = NutchBean.readAddresses(serversConfig, conf); for (InetSocketAddress addr : segmentServers) { SegmentBean bean = (RPCSegmentBean) RPC.getProxy(RPCSegmentBean.class, FetchedSegments.VERSION, addr, conf); beanList.add(bean); } beans = beanList.toArray(new SegmentBean[beanList.size()]); summaryTasks = new ArrayList<Callable<Summary[]>>(beans.length); segmentWorkers = new ArrayList<SegmentWorker>(beans.length); for (int i = 0; i < beans.length; i++) { summaryTasks.add(new DistSummmaryTask(i)); segmentWorkers.add(new SegmentWorker(i)); } segmentMap = new ConcurrentHashMap<String, Integer>(); pingService = Executors.newScheduledThreadPool(beans.length); for (SegmentWorker worker : segmentWorkers) { pingService.scheduleAtFixedRate(worker, 0, 30, TimeUnit.SECONDS); } } private SegmentBean getBean(HitDetails details) { return beans[segmentMap.get(details.getValue("segment"))]; } public String[] getSegmentNames() { return segmentMap.keySet().toArray(new String[segmentMap.size()]); } public byte[] getContent(HitDetails details) throws IOException { return getBean(details).getContent(details); } public long getFetchDate(HitDetails details) throws IOException { return getBean(details).getFetchDate(details); } public ParseData getParseData(HitDetails details) throws IOException { return getBean(details).getParseData(details); } public ParseText getParseText(HitDetails details) throws IOException { return getBean(details).getParseText(details); } public void close() throws IOException { executor.shutdown(); pingService.shutdown(); for (SegmentBean bean : beans) { bean.close(); } } public Summary getSummary(HitDetails details, Query query) throws IOException { return getBean(details).getSummary(details, query); } @SuppressWarnings("unchecked") public Summary[] getSummary(HitDetails[] detailsArr, Query query) throws IOException { List<HitDetails>[] detailsList = new ArrayList[summaryTasks.size()]; for (int i = 0; i < detailsList.length; i++) { detailsList[i] = new ArrayList<HitDetails>(); } for (HitDetails details : detailsArr) { detailsList[segmentMap.get(details.getValue("segment"))].add(details); } for (int i = 0; i < summaryTasks.size(); i++) { DistSummmaryTask task = (DistSummmaryTask)summaryTasks.get(i); if (detailsList[i].size() > 0) { HitDetails[] taskDetails = detailsList[i].toArray(new HitDetails[detailsList[i].size()]); task.setSummaryArgs(taskDetails, query); } else { task.setSummaryArgs(null, null); } } List<Future<Summary[]>> summaries; try { summaries = executor.invokeAll(summaryTasks, timeout, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { throw new RuntimeException(e); } List<Summary> summaryList = new ArrayList<Summary>(); for (Future<Summary[]> f : summaries) { Summary[] summaryArray; try { summaryArray = f.get(); if (summaryArray == null) { continue; } for (Summary summary : summaryArray) { summaryList.add(summary); } } catch (Exception e) { if (e.getCause() instanceof IOException) { throw (IOException) e.getCause(); } throw new RuntimeException(e); } } return summaryList.toArray(new Summary[summaryList.size()]); } }