DefaultBlurReducer.java example

Explorer
incubator-blur-master
package org.apache.blur.mapreduce.lib;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * This class is to be used in conjunction with {@link BlurOutputFormat}
 * .</br></br>
 * 
 * Here is a basic example of how to use both the {@link BlurOutputFormat} and
 * the {@link DefaultBlurReducer} together to build indexes.</br></br>
 * 
 * Once this job has successfully completed the indexes will be imported by the
 * running shard servers and be placed online. This is a polling mechicism in
 * the shard servers and by default they poll every 10 seconds.
 * 
 * 
 * </br></br>
 * 
 * Job job = new Job(conf, "blur index");</br>
 * job.setJarByClass(BlurOutputFormatTest.class);</br>
 * job.setMapperClass(CsvBlurMapper.class);</br>
 * job.setReducerClass(DefaultBlurReducer.class);</br>
 * job.setNumReduceTasks(1);</br>
 * job.setInputFormatClass(TrackingTextInputFormat.class);</br>
 * job.setOutputKeyClass(Text.class);
 * </br>job.setOutputValueClass(BlurMutate.class);</br>
 * job.setOutputFormatClass(BlurOutputFormat.class);</br> </br>
 * FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));</br>
 * CsvBlurMapper.addColumns(job, "cf1", "col");</br> </br> TableDescriptor
 * tableDescriptor = new TableDescriptor();</br>
 * tableDescriptor.setShardCount(1)
 * ;</br>tableDescriptor.setAnalyzerDefinition(new
 * AnalyzerDefinition());</br>tableDescriptor.setTableUri(new Path(TEST_ROOT_DIR
 * + "/out").toString());</br>BlurOutputFormat.setTableDescriptor(job,
 * tableDescriptor);</br>
 * 
 * 
 */
public class DefaultBlurReducer extends Reducer<Writable, BlurMutate, Writable, BlurMutate> {

  @Override
  protected void setup(final Context context) throws IOException, InterruptedException {
    BlurOutputFormat.setProgressable(context);
    BlurOutputFormat.setGetCounter(new GetCounter() {
      @Override
      public Counter getCounter(Enum<?> counterName) {
        return context.getCounter(counterName);
      }
    });
  }

  @Override
  protected void reduce(Writable key, Iterable<BlurMutate> values, Context context) throws IOException,
      InterruptedException {
    Text textKey = getTextKey(key);
    for (BlurMutate value : values) {
      context.write(textKey, value);
    }
  }

  protected Text getTextKey(Writable key) {
    if (key instanceof Text) {
      return (Text) key;
    }
    throw new IllegalArgumentException("Key is not of type Text, you will need to "
        + "override DefaultBlurReducer and implement \"getTextKey\" method.");
  }
}