package edu.umd.cloud9.webgraph;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.collection.trecweb.TrecWebDocumentInputFormat;
public class CollectionConfigurationManager {
public static final String[] supported = { "trecweb", "gov2", "wt10g" };
private boolean userSpecifiedInputFormat = false;
private boolean userSpecifiedDocnoMapping = false;
private int tgtConf = -1;
private Class<? extends InputFormat<?, ?>> userSpecifiedInputFormatClass;
private String userSpecifiedDocnoMappingClass;
public static boolean isSupported(String tgtCollection) {
return (getCollectionIndex(tgtCollection) >= 0);
}
private static int getCollectionIndex(String tgtCollection) {
tgtCollection = tgtCollection.toLowerCase();
for (int i = 0; i < supported.length; i++)
if (tgtCollection.startsWith(supported[i]))
return i;
return -1;
}
public boolean setConfByCollection(String collectionName) {
int index = getCollectionIndex(collectionName);
if (index == -1) {
return false;
}
tgtConf = index;
return true;
}
@SuppressWarnings("unchecked")
public boolean setUserSpecifiedInputFormat(String className) {
Class<? extends InputFormat<?, ?>> userClass;
try {
userClass = (Class<? extends InputFormat<?, ?>>) Class.forName(className);
} catch (ClassNotFoundException e) {
return false;
}
// It has to be sub class of FileInputFormat
if (!FileInputFormat.class.isAssignableFrom(userClass)) {
return false;
}
userSpecifiedInputFormat = true;
userSpecifiedInputFormatClass = userClass;
return true;
}
@SuppressWarnings("unchecked")
public boolean setUserSpecifiedDocnoMappingClass(String className) {
Class<? extends DocnoMapping> userClass;
try {
userClass = (Class<? extends DocnoMapping>) Class.forName(className);
} catch (ClassNotFoundException e) {
return false;
}
// It has to be sub class of DocnoMapping
if (!DocnoMapping.class.isAssignableFrom(userClass)) {
return false;
}
userSpecifiedDocnoMapping = true;
userSpecifiedDocnoMappingClass = className;
return true;
}
public void applyJobConfig(Job job) throws Exception {
if (userSpecifiedInputFormat) {
job.setInputFormatClass(userSpecifiedInputFormatClass);
} else {
switch (tgtConf) {
case 0:
case 1:
case 2:
job.setInputFormatClass(TrecWebDocumentInputFormat.class);
break;
default:
throw new Exception("InputFormat class not specified");
}
}
}
public void applyConfig(Configuration conf) throws Exception {
if (userSpecifiedDocnoMapping) {
conf.set("Cloud9.DocnoMappingClass", userSpecifiedDocnoMappingClass);
} else {
switch (tgtConf) {
case 1:
conf.set("Cloud9.DocnoMappingClass",
edu.umd.cloud9.collection.trecweb.Gov2DocnoMapping.class.getCanonicalName());
break;
case 2:
conf.set("Cloud9.DocnoMappingClass",
edu.umd.cloud9.collection.trecweb.Wt10gDocnoMapping.class.getCanonicalName());
break;
case 0:
default:
throw new Exception("DocnoMapping class not specified");
}
}
}
}