package mia.recommender.ch06; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.math.VarIntWritable; import org.apache.mahout.math.VarLongWritable; public final class WikipediaItemIDIndexMapper extends Mapper<LongWritable,Text,VarIntWritable, VarLongWritable> { private static final Pattern NUMBERS = Pattern.compile("(\\d+)"); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); Matcher m = NUMBERS.matcher(line); m.find(); VarIntWritable index = new VarIntWritable(); VarLongWritable itemID = new VarLongWritable(); while (m.find()) { long item = Long.parseLong(m.group()); itemID.set(item); index.set(idToIndex(item)); context.write(index, itemID); } } static int idToIndex(long itemID) { return 0x7FFFFFFF & ((int) itemID ^ (int) (itemID >>> 32)); } }