import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Mapper.Context; public class NellEdgeMapper extends Mapper <Object , Text, LongWritable, Text> { private HashMap<String, Long> VMap= new HashMap<String, Long>(); @Override protected void setup(Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub try{ Path pt=new Path("hdfs:/user/nellout50/part-r-00000"); FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(pt))); String line; line=br.readLine(); if(line==null)System.out.println("file is empty"); while (line != null){ System.out.println(line); String value=line.toString(); String[] strs = value.trim().split(":"); String label=strs[0]; long vid=Long.parseLong(strs[1]); System.out.println("TEST1: label="+label+" id "+vid); VMap.put(label,vid); System.out.println("TEST2: label="+label+" id "+VMap.get(label)); line=br.readLine(); } }catch(Exception e){ } } @Override public void run(Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.run(context); } @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line=value.toString(); if(!line.startsWith("#")){ String[] strs = line.trim().split("\\s+"); String srcstring=strs[0]; String[] srcsplit=srcstring.trim().split(":"); String src=srcsplit[srcsplit.length - 1]; String relationstr=strs[1]; String[] relationsplit=relationstr.trim().split(":"); String relation=relationsplit[relationsplit.length -1]; String deststring=strs[2]; String[] destsplit=deststring.trim().split(":"); String target=destsplit[destsplit.length - 1]; /* System.out.println("source : "+src+" relation : "+relation+" target : "+target); long srchash=Math.abs(hash(src)); srchash=srchash%(1<<20); long targethash=Math.abs(hash(target)); targethash=targethash%(1<<20); */ String v=""; if(VMap.get(src)==null){ System.out.println("TESTNULL :"+src); } if(VMap.get(target)==null){ System.out.println("TESTNULL :"+target); } long srcid=VMap.get(src); long tid=VMap.get(target); long relationhash=Math.abs(hash(src+relation+target)); v=srcid+":"+tid+":"+relation; context.write(new LongWritable(relationhash), new Text(v)); } }public static long hash(String string) { long h = 1125899906842597L; // prime int len = string.length(); for (int i = 0; i < len; i++) { h = 31*h + string.charAt(i); } return h; } }