package edu.nd.nina.io;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import edu.nd.nina.DirectedGraph;
import edu.nd.nina.graph.DefaultDirectedGraph;
import edu.nd.nina.graph.DefaultEdge;
import edu.nd.nina.graph.DirectedFeatureGraph;
import edu.nd.nina.types.Instance;
public class FeatureGraph {
/**
* Loads the directedFeatureGraph from a file. The file should contain
* vertex definitions and directed edges in the followuing form:
*
* Vertex Definitions - 0 a b c d
*
* where 0 is an Integer id for the vertex followed by a tokenizable
* sequence of "features" (probably words)
*
* Edge Definitions - 0 -> 1
*
* where 0 and 1 are previously defined vertices and '->' denotes the
* directed edge. Reverse edges ('<-') are not allowed
*
* The first vertex defined in the featureGraphFile is declared to be the
* root node
*
* @param featureGraphFile
* File containing textual representation of a
* directedFeatureGraph
* @return The root vertex of the newly loaded directedFeatureGraph
*/
public static Instance loadFeatureGraphFromFile(File featureGraphFile, DirectedFeatureGraph<Instance, DefaultEdge> graph, String rootName) {
System.out.println("Reading " + featureGraphFile);
Instance root = null;
HashMap<Integer, Instance> tempMap = new HashMap<Integer, Instance>();
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(featureGraphFile));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
String line = "";
try {
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.isEmpty()) {
continue;
}
if (line.startsWith("#")) {
continue;
}
if (!line.contains("->")) {
Instance ins = new Instance(line, false, null, null);
tempMap.put(
Integer.parseInt(line.substring(0,
line.indexOf(" "))), ins);
graph.addVertex(ins);
int f = (line).indexOf(" ");
String ts = line.substring(f, line.indexOf(" ", f+1)).trim();
if (root == null && ts.equals(rootName)) {
root = ins;
}
} else {
String[] edges = line.split("->");
graph.addEdge(
tempMap.get(Integer.parseInt(edges[0].trim())),
tempMap.get(Integer.parseInt(edges[1].trim())));
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return root;
}
public static Instance loadFeatureGraphFromNutchHBase(
String domain,
DirectedFeatureGraph<Instance, DefaultEdge> graph) {
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "dmserv3.cs.illinois.edu");
HashMap<String, Instance> tempMap = new HashMap<String, Instance>();
Instance root = null;
HTable testTable = null;
try {
testTable = new HTable(config, "webpage");
} catch (IOException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
String domainEnd = domain.substring(0, domain.length()-1) + (char)(((int)domain.charAt(domain.length()-1)) + 1);
Scan scan = new Scan(Bytes.toBytes(domain + ":http/"),
Bytes.toBytes(domainEnd));
// scan.addFamily(family);
scan.addColumn(Bytes.toBytes("f"), Bytes.toBytes("bas"));
scan.addColumn(Bytes.toBytes("p"), Bytes.toBytes("c"));
ResultScanner rs = null;
try {
rs = testTable.getScanner(scan);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
int i=0;
for (Result r = rs.next(); r != null; r = rs.next()) {
List<KeyValue> cList = r.getColumn(Bytes.toBytes("p"),
Bytes.toBytes("c"));
String c = "";
if (cList.size() >= 1) {
c = Bytes.toString(cList.get(0).getValue());
}
List<KeyValue> basList = r.getColumn(Bytes.toBytes("f"),
Bytes.toBytes("bas"));
byte[] bas = {};
if (basList.size() >= 1) {
bas = basList.get(0).getValue();
}
String bStr = Bytes.toString(bas);
if(bStr.contains("/ ")) continue;
Instance ins = new Instance(c, false ,bStr, i);
tempMap.put(bStr, ins);
System.out.println(bStr);
if (root == null && Bytes.toString(r.getRow()).equals(domain + ":http/") ) {
root = ins;
}
graph.addVertex(ins);
i++;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
scan = new Scan(Bytes.toBytes(domain + ":http/"),
Bytes.toBytes(domainEnd));
scan.addFamily(Bytes.toBytes("ol"));
scan.addColumn(Bytes.toBytes("f"), Bytes.toBytes("bas"));
try {
rs = testTable.getScanner(scan);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
for (Result r = rs.next(); r != null; r = rs.next()) {
List<KeyValue> basList = r.getColumn(Bytes.toBytes("f"),
Bytes.toBytes("bas"));
String bas = "";
if (basList.size() >= 1) {
bas = Bytes.toString(basList.get(0).getValue());
}
Map<byte[], byte[]> valueObj = r.getFamilyMap(Bytes
.toBytes("ol"));
for (Map.Entry<byte[], byte[]> x : valueObj.entrySet()) {
String key = Bytes.toString(x.getKey());
if (tempMap.containsKey(key) && tempMap.containsKey(bas)) {
graph.addEdge(tempMap.get(bas), tempMap.get(key));
}
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
testTable.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return root;
}
public static Instance loadFeatureGraphFromWikiHbase(
String domain,
DirectedFeatureGraph<Instance, DefaultEdge> graph) {
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "dmserv3.cs.illinois.edu");
HashMap<String, Instance> tempMap = new HashMap<String, Instance>();
Instance root = null;
HTable testTable = null;
try {
testTable = new HTable(config, "wikipedia");
} catch (IOException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
Scan scan = new Scan(Bytes.toBytes("A"), Bytes.toBytes("B"));
// scan.addFamily(family);
scan.addColumn(Bytes.toBytes("p"), Bytes.toBytes("t"));
scan.addColumn(Bytes.toBytes("p"), Bytes.toBytes("text"));
ResultScanner rs = null;
try {
rs = testTable.getScanner(scan);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
int i=0;
for (Result r = rs.next(); r != null; r = rs.next()) {
List<KeyValue> cList = r.getColumn(Bytes.toBytes("p"),
Bytes.toBytes("text"));
String c = "";
if (cList.size() >= 1) {
c = Bytes.toString(cList.get(0).getValue());
}
List<KeyValue> basList = r.getColumn(Bytes.toBytes("p"),
Bytes.toBytes("t"));
byte[] bas = {};
if (basList.size() >= 1) {
bas = basList.get(0).getValue();
}
String bStr = Bytes.toString(bas);
Instance ins = new Instance(c, false ,bStr, i);
tempMap.put(bStr, ins);
System.out.println(bStr);
if(root == null && bStr.equals("A")){
root = ins;
}
graph.addVertex(ins);
i++;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
scan = new Scan(Bytes.toBytes("A"), Bytes.toBytes("B"));
scan.addFamily(Bytes.toBytes("ol"));
scan.addColumn(Bytes.toBytes("p"), Bytes.toBytes("t"));
try {
rs = testTable.getScanner(scan);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
for (Result r = rs.next(); r != null; r = rs.next()) {
List<KeyValue> basList = r.getColumn(Bytes.toBytes("p"),
Bytes.toBytes("t"));
String bas = "";
if (basList.size() >= 1) {
bas = Bytes.toString(basList.get(0).getValue());
}
Map<byte[], byte[]> olList = r.getFamilyMap(Bytes.toBytes("ol"));
for (byte[] x : olList.values()) {
String l = Bytes.toString(x);
if (tempMap.containsKey(l) && tempMap.containsKey(bas)) {
graph.addEdge(tempMap.get(bas), tempMap.get(l));
}
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
testTable.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return root;
}
}