import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class NellEdgeMapper extends Mapper <Object , Text, LongWritable, Text> {
private HashMap<String, Long> VMap= new HashMap<String, Long>();
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
try{
Path pt=new Path("hdfs:/user/nellout50/part-r-00000");
FileSystem fs = FileSystem.get(new Configuration());
BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(pt)));
String line;
line=br.readLine();
if(line==null)System.out.println("file is empty");
while (line != null){
System.out.println(line);
String value=line.toString();
String[] strs = value.trim().split(":");
String label=strs[0];
long vid=Long.parseLong(strs[1]);
System.out.println("TEST1: label="+label+" id "+vid);
VMap.put(label,vid);
System.out.println("TEST2: label="+label+" id "+VMap.get(label));
line=br.readLine();
}
}catch(Exception e){
}
}
@Override
public void run(Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.run(context);
}
@Override
protected void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line=value.toString();
if(!line.startsWith("#")){
String[] strs = line.trim().split("\\s+");
String srcstring=strs[0];
String[] srcsplit=srcstring.trim().split(":");
String src=srcsplit[srcsplit.length - 1];
String relationstr=strs[1];
String[] relationsplit=relationstr.trim().split(":");
String relation=relationsplit[relationsplit.length -1];
String deststring=strs[2];
String[] destsplit=deststring.trim().split(":");
String target=destsplit[destsplit.length - 1];
/* System.out.println("source : "+src+" relation : "+relation+" target : "+target);
long srchash=Math.abs(hash(src));
srchash=srchash%(1<<20);
long targethash=Math.abs(hash(target));
targethash=targethash%(1<<20);
*/
String v="";
if(VMap.get(src)==null){
System.out.println("TESTNULL :"+src);
}
if(VMap.get(target)==null){
System.out.println("TESTNULL :"+target);
}
long srcid=VMap.get(src);
long tid=VMap.get(target);
long relationhash=Math.abs(hash(src+relation+target));
v=srcid+":"+tid+":"+relation;
context.write(new LongWritable(relationhash), new Text(v));
}
}public static long hash(String string) {
long h = 1125899906842597L; // prime
int len = string.length();
for (int i = 0; i < len; i++) {
h = 31*h + string.charAt(i);
}
return h;
}
}