package ${package}; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import org.lilyproject.client.LilyClient; import org.lilyproject.mapreduce.LilyMapReduceUtil; import org.lilyproject.repository.api.*; import org.lilyproject.util.io.Closer; import java.io.IOException; /** * Sample MapReduce reducer which writes its output to Lily. This is simply done using * LilyClient, like any other client application would do. */ public class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private LilyClient lilyClient; private LRepository repository; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); this.lilyClient = LilyMapReduceUtil.getLilyClient(context.getConfiguration()); try { this.repository = lilyClient.getDefaultRepository(); } catch (RepositoryException e) { throw new RuntimeException("Failed to get repository", e); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { Closer.close(lilyClient); super.cleanup(context); } @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } try { // Create a record per word, with a field containing the count. // If there is already a record for this word, it will be overwritten. // The word is used a record id, we assume it only contains allowed characters. RecordId recordId = repository.getIdGenerator().newRecordId(key.toString()); LTable table = repository.getDefaultTable(); Record record = table.newRecord(recordId); record.setRecordType(new QName("mrsample", "Summary")); record.setField(new QName("mrsample", "wordcount"), sum); table.createOrUpdate(record); } catch (InterruptedException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } } }