package edu.umd.cloud9.integration;
import static org.junit.Assert.assertTrue;
import java.io.BufferedReader;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import tl.lin.data.pair.Pair;
import tl.lin.data.pair.PairOfStrings;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
public class IntegrationUtils {
public static String getJar(String path, final String prefix) {
File[] arr = new File(path).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.startsWith(prefix) && !name.contains("javadoc") && !name.contains("sources") && name.contains("fatjar");
}
});
assertTrue(arr.length == 1);
return arr[0].getAbsolutePath();
}
public static Configuration getBespinConfiguration() {
Configuration conf = new Configuration();
conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml"));
conf.addResource(new Path("/etc/hadoop/conf/yarn-site.xml"));
conf.reloadConfiguration();
return conf;
}
// How to properly shell out: http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html
public static PairOfStrings exec(String cmd) throws IOException, InterruptedException {
System.out.println("Executing command: " + cmd);
Runtime rt = Runtime.getRuntime();
Process proc = rt.exec(cmd);
// any error message?
StreamGobbler errorGobbler = new StreamGobbler(proc.getErrorStream(), "STDERR");
// any output?
StreamGobbler outputGobbler = new StreamGobbler(proc.getInputStream(), "STDOUT");
// kick them off
errorGobbler.start();
outputGobbler.start();
// any error???
int exitVal = proc.waitFor();
System.out.println("ExitValue: " + exitVal);
Joiner joiner = Joiner.on("\n");
return Pair.of(joiner.join(outputGobbler.getLines()), joiner.join(errorGobbler.getLines()));
}
public static List<Integer> execWiki(String cmd) throws IOException, InterruptedException {
System.out.println("Executing command: " + cmd);
Runtime rt = Runtime.getRuntime();
Process proc = rt.exec(cmd);
// any error message?
WikiGobbler errorGobbler = new WikiGobbler(proc.getErrorStream(), "STDERR");
// any output?
StreamGobbler outputGobbler = new StreamGobbler(proc.getInputStream(), "STDOUT");
// kick them off
errorGobbler.start();
outputGobbler.start();
// any error???
int exitVal = proc.waitFor();
System.out.println("ExitValue: " + exitVal);
return ImmutableList.of(errorGobbler.disambCount, errorGobbler.articleCount, errorGobbler.totalCount);
}
public static Map<String, Integer> execKeyValueExtractor(String cmd, Set<String> keys)
throws IOException, InterruptedException {
System.out.println("Executing command: " + cmd);
Runtime rt = Runtime.getRuntime();
Process proc = rt.exec(cmd);
// any error message?
KeyValuePairsGobbler errorGobbler = new KeyValuePairsGobbler(proc.getErrorStream(), "STDERR", keys);
// any output?
StreamGobbler outputGobbler = new StreamGobbler(proc.getInputStream(), "STDOUT");
// kick them off
errorGobbler.start();
outputGobbler.start();
// any error???
int exitVal = proc.waitFor();
System.out.println("ExitValue: " + exitVal);
return errorGobbler.map;
}
private static class StreamGobbler extends Thread {
InputStream is;
String type;
List<String> lines = Lists.newArrayList();
StreamGobbler(InputStream is, String type) {
this.is = is;
this.type = type;
}
public void run() {
try {
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
System.out.println(type + ">" + line);
lines.add(line);
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
public List<String> getLines() {
return lines;
}
}
private static class WikiGobbler extends StreamGobbler {
int disambCount = 0;
int articleCount = 0;
int totalCount = 0;
WikiGobbler(InputStream is, String type) {
super(is, type);
}
// depends on PageType names and handling in BuildWikipediaDocnoMapping
public void run() {
try {
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
System.out.println(type + ">" + line);
if (line.contains("DISAMBIGUATION=")) {
String[] arr = line.trim().split("DISAMBIGUATION=");
disambCount = Integer.parseInt(arr[1]);
} else if (line.contains("ARTICLE=") && !line.contains("NON_ARTICLE=")) {
String[] arr = line.trim().split("ARTICLE=");
articleCount = Integer.parseInt(arr[1]);
} else if (line.contains("TOTAL=")) {
String[] arr = line.trim().split("TOTAL=");
totalCount = Integer.parseInt(arr[1]);
}
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
private static class KeyValuePairsGobbler extends StreamGobbler {
Map<String, Integer> map = Maps.newHashMap();
private Set<String> keys;
KeyValuePairsGobbler(InputStream is, String type, Set<String> keys) {
super(is, type);
this.keys = keys;
}
// depends on PageType names and handling in BuildWikipediaDocnoMapping
public void run() {
try {
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
System.out.println(type + ">" + line);
for ( String key : keys) {
if (line.contains(key + "=")) {
String[] arr = line.trim().split(key + "=");
map.put(key, Integer.parseInt(arr[1]));
}
}
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
}