/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.smrf.retrieval.distributed;
import ivory.core.util.XMLTools;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import edu.umd.cloud9.io.FSProperty;
import edu.umd.cloud9.mapred.NullOutputFormat;
/**
* @author Tamer Elsayed
* @author Jimmy Lin
*/
public class RunDistributedRetrievalServers extends Configured implements Tool {
private static final Logger sLogger = Logger.getLogger(RunDistributedRetrievalServers.class);
static enum Heartbeat {
COUNT
}
public static class ServerMapper extends MapReduceBase implements
Mapper<LongWritable, Text, NullWritable, NullWritable> {
// The sole job of this thread is to increment counters once in a while
// to let the job track know we're still alive.
private static class HeartbeatRunnable implements Runnable {
Reporter mReporter;
public HeartbeatRunnable(Reporter reporter) {
mReporter = reporter;
}
public void run() {
while (true) {
try {
mReporter.incrCounter(Heartbeat.COUNT, 1);
Thread.sleep(60000);
} catch (InterruptedException e) {
}
}
}
}
private String mConfigPath;
private String mConfigFile;
private FileSystem mFS;
public void configure(JobConf conf) {
mConfigFile = conf.get("Ivory.ConfigFile");
mConfigPath = conf.get("Ivory.ConfigPath");
try {
mFS = FileSystem.get(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
public void map(LongWritable key, Text value,
OutputCollector<NullWritable, NullWritable> output, Reporter reporter)
throws IOException {
Thread pulse = new Thread(new HeartbeatRunnable(reporter));
pulse.start();
String[] parameters = value.toString().trim().split("\\s+");
String sid = parameters[0];
int port = Integer.parseInt(parameters[1]);
sLogger.info("Mapper launched!");
sLogger.info(" - host name: " + InetAddress.getLocalHost().toString());
sLogger.info(" - port: " + port);
sLogger.info(" - server id: " + sid);
sLogger.info(" - config path: " + mConfigPath);
writeIPAddressToHDFS(sid, port);
RetrievalServer server = new RetrievalServer();
server.initialize(sid, mConfigFile, mFS);
server.start(port);
// signal that the server is ready
FSProperty.writeInt(mFS, appendPath(mConfigPath, sid + ".ready"), 1);
while (true)
;
}
/**
* Writes the IP address of the current host to HDFS so that the broker
* read it to contact the server
*
* @throws IOException
* if writing to the file system fails
*/
private void writeIPAddressToHDFS(String sid, int port) throws IOException {
String hostName = InetAddress.getLocalHost().toString();
String hostIP = "";
int k = hostName.lastIndexOf("/");
if (k >= 0 && k < hostName.length())
hostIP = hostName.substring(k + 1);
else {
k = hostName.lastIndexOf("\\");
if (k >= 0 && k < hostName.length())
hostIP = hostName.substring(k + 1);
else
hostIP = hostName;
}
String fname = appendPath(mConfigPath, sid + ".host");
sLogger.info("Writing host address to " + fname);
FSProperty.writeString(mFS, fname, hostIP + ":" + port);
}
}
/**
* Creates an instance of this tool.
*/
private RunDistributedRetrievalServers() {
}
private static int printUsage() {
System.out.println("usage: [config-file] [config-path]");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
/**
* Runs this tool.
*/
public int run(String[] args) throws Exception {
if (args.length < 2) {
printUsage();
return -1;
}
String configFile = args[0];
FileSystem fs = FileSystem.get(getConf());
Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(
fs.open(new Path(configFile)));
sLogger.info("Reading configuration to determine number of servers to launch:");
List<String> sids = new ArrayList<String>();
NodeList servers = d.getElementsByTagName("server");
for (int i = 0; i < servers.getLength(); i++) {
Node node = servers.item(i);
// get server id
String sid = XMLTools.getAttributeValue(node, "id", null);
if (sid == null) {
throw new Exception("Must specify a query id attribute for every server!");
}
sLogger.info(" - sid: " + sid);
sids.add(sid);
}
int port = 7000;
int numServers = sids.size();
String configPath = args[1];
if (fs.exists(new Path(configPath))) {
fs.delete(new Path(configPath), true);
}
String fname = appendPath(configPath, "config-" + numServers + ".txt");
sLogger.info("Writing configuration to: " + fname);
StringBuffer sb = new StringBuffer();
for (int n = 0; n < numServers; n++) {
port++;
sb.append(sids.get(n) + " " + port + "\n");
}
FSDataOutputStream out = fs.create(new Path(fname), true);
out.writeBytes(sb.toString());
out.close();
JobConf conf = new JobConf(getConf(), RetrievalServer.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(0);
conf.setInputFormat(NLineInputFormat.class);
conf.setOutputFormat(NullOutputFormat.class);
conf.setMapperClass(ServerMapper.class);
FileInputFormat.setInputPaths(conf, new Path(fname));
conf.set("Ivory.ConfigFile", configFile);
conf.set("Ivory.ConfigPath", configPath);
conf.setJobName("RetrievalServers");
//conf.set("mapred.child.java.opts", "-Xmx2048m");
conf.set("mapred.child.java.opts", "-Xmx2048m");
// conf.set("mapred.job.queue.name", "search");
JobClient client = new JobClient(conf);
client.submitJob(conf);
sLogger.info("Waiting for servers to start up...");
// poll HDFS for hostnames and ports
boolean allStarted = true;
do {
allStarted = true;
for (int n = 0; n < numServers; n++) {
String f = appendPath(configPath, sids.get(n) + ".host");
if (!fs.exists(new Path(f))) {
allStarted = false;
}
}
Thread.sleep(10000);
sLogger.info(" ...");
} while (!allStarted);
// poll HDFS for ready signal that the index is ready
boolean allReady = true;
do {
allReady = true;
for (int n = 0; n < numServers; n++) {
String f = appendPath(configPath, sids.get(n) + ".ready");
if (!fs.exists(new Path(f))) {
allReady = false;
}
}
Thread.sleep(10000);
sLogger.info(" ...");
} while (!allReady);
sLogger.info("All servers ready!");
sLogger.info("Host information:");
for (int n = 0; n < numServers; n++) {
String f = appendPath(configPath, sids.get(n) + ".host");
sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f));
}
return 0;
}
private static String appendPath(String base, String file) {
return base + (base.endsWith("/") ? "" : "/") + file;
}
/**
* Dispatches command-line arguments to the tool via the
* <code>ToolRunner</code>.
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new RunDistributedRetrievalServers(), args);
System.exit(res);
}
}