/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.admin;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapred.JobTracker;
import org.apache.hadoop.mapred.JobConf;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.ExtensionPoint;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.util.NutchConfiguration;
/**
* Administration Application
*/
public class AdministrationApp extends Configured implements Tool {
private static final Log LOG = LogFactory.getLog(AdministrationApp.class.getName());
private void startJobTracker(final Configuration defaultConf) {
Runnable jobTrackerStarter = new Runnable() {
public void run() {
try {
String jobtracker = defaultConf.get("mapred.job.tracker", "local");
if (!"local".equals(jobtracker)) {
JobConf jobconf = new JobConf(getConf());
JobTracker.startTracker(jobconf);
Thread.sleep(3000);
}
} catch (IOException e) {
LOG.error(e.toString());
} catch (InterruptedException e) {
LOG.error(e.toString());
}
}
};
Thread t = new Thread(jobTrackerStarter);
t.start();
}
private AdministrationApp(Configuration conf) {
setConf(conf);
}
/**
* starts a container and deploys all gui plugins
*/
public WebContainer startContainer(Path initialInstance, Configuration defaultConf) throws Exception {
int port = defaultConf.getInt("admin.gui.port", 50060);
WebContainer webContainer = new WebContainer(port, defaultConf);
webContainer.startContainer();
System.out.println("Nutch administration interface listening on *:" + port);
NutchInstance[] nutchInstances = getInstances(defaultConf, initialInstance);
// add all general-components
Extension[] generalGuiComponents =
getComponentExtensions(defaultConf, GuiComponent.IS_GENERAL_COMPONENT);
NutchInstance generalInstance =
new NutchInstance("general", initialInstance, defaultConf);
webContainer.addComponentExtensions(
generalGuiComponents, generalInstance, nutchInstances);
// add instance-components
for (int i = 0; i < nutchInstances.length; i++) {
NutchInstance instance = nutchInstances[i];
Extension[] extensions = getComponentExtensions(
instance.getConfiguration(), GuiComponent.IS_INSTANCE_COMPONENT);
webContainer.addComponentExtensions(extensions, instance, null);
}
return webContainer;
}
/**
* @param conf
* @param attributeName
* attribute value must be set to "true" in plugin.xml
* @return extensions implementing {@link GuiComponent}
* and matching the attribute filter
*/
public static Extension[] getComponentExtensions(Configuration conf,
String attributeName) {
ArrayList<Extension> list = new ArrayList<Extension>();
ExtensionPoint extensionPoint =
PluginRepository.get(conf).getExtensionPoint(GuiComponent.X_POINT_ID);
if (extensionPoint == null) {
throw new RuntimeException("x-point "
+ GuiComponent.X_POINT_ID
+ " not found, check your plugin folder");
}
Extension[] extensions = extensionPoint.getExtensions();
for (int i = 0; i < extensions.length; i++) {
Extension extension = extensions[i];
if (extension.getAttribute(attributeName) != null
&& extension.getAttribute(attributeName).toLowerCase().equals("true")) {
list.add(extension);
}
}
return (Extension[]) list.toArray(new Extension[list.size()]);
}
/* scans the root folder for instance folders */
private NutchInstance[] getInstances(Configuration defaultConf, Path instancesRoot) throws IOException{
//Path[] files = instancesRoot.listFiles();
// Path[] files = fs.listPaths(instancesRoot);
FileSystem fs = FileSystem.get(getConf());
FileStatus[] filestatuses = fs.listStatus(instancesRoot);
int len = filestatuses.length;
Path[] files = new Path[len];
for (int i=0; i < len; i++) {
files[i] = filestatuses[i].getPath();
}
ArrayList<NutchInstance> instancesList = new ArrayList<NutchInstance>();
for (int i = 0; i < files.length; i++) {
Path folder = files[i];
if ( fs.isDirectory(folder) && !folder.getName().equals("conf")) {
try {
instancesList.add(loadNutchInstance(defaultConf, folder));
} catch (IOException e) {
LOG.warn("unable to load instance: " + e.toString());
}
}
}
return (NutchInstance[])
instancesList.toArray(new NutchInstance[instancesList.size()]);
}
/**
* creates an instance object from a instance folder
*
* @param defaultConf
* @param folder
* @return an instance representation of this folder
* @throws IOException
* in case the folder is not a valid instance folder
*/
public static NutchInstance loadNutchInstance(Configuration defaultConf, Path folder)
throws IOException {
Path instanceConfFolder = new Path(folder, "conf");
Configuration conf = NutchConfiguration.create();
FileSystem fs = FileSystem.get( conf );
if (fs.exists(instanceConfFolder) && fs.isDirectory(instanceConfFolder)) {
Path instanceSiteConf = new Path(instanceConfFolder, "nutch-site.xml");
if ( fs.exists(instanceSiteConf)) {
Configuration instanceConf = new Configuration(defaultConf);
instanceConf.addResource(instanceSiteConf.makeQualified(fs));
return new NutchInstance(folder.getName(), folder, instanceConf);
}
}
throw new IOException("not a valid instance folder: "
+ folder);
}
private void createFirstInstance(Path file) throws IOException {
GuiConfigUtil.createConfiguration(file);
Path defaultInstance = new Path(file, "default");
GuiConfigUtil.createConfiguration(defaultInstance);
}
public static void main(String[] args) throws Exception {
Configuration conf = NutchConfiguration.create();
int res = ToolRunner.run( conf, new AdministrationApp(conf), args);
System.exit(res);
}
/**
* Starts the nutch administration web interface
*
* @param args
* @throws Exception
*/
public int run (String[] args) throws Exception {
String usage = "Usage: <instancesFolder>";
if (args.length != 1) {
System.err.println(usage);
return 127;
}
Configuration defaultConf = NutchConfiguration.create();
FileSystem fs = FileSystem.get(defaultConf);
AdministrationApp app = new AdministrationApp(defaultConf);
Path file = new Path(args[0]);
if (!fs.exists(file)) {
app.createFirstInstance(file);
}
app.startJobTracker(defaultConf);
try {
WebContainer container = app.startContainer(file, defaultConf);
container.join();
while(true) { Thread.sleep(250); }
} catch (Exception e) {
LOG.warn(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
LOG.info("Exiting normally...");
return 0;
}
}