package org.apache.blur.mapreduce.lib;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Reducer;
/**
* This class is to be used in conjunction with {@link BlurOutputFormat}
* .</br></br>
*
* Here is a basic example of how to use both the {@link BlurOutputFormat} and
* the {@link DefaultBlurReducer} together to build indexes.</br></br>
*
* Once this job has successfully completed the indexes will be imported by the
* running shard servers and be placed online. This is a polling mechicism in
* the shard servers and by default they poll every 10 seconds.
*
*
* </br></br>
*
* Job job = new Job(conf, "blur index");</br>
* job.setJarByClass(BlurOutputFormatTest.class);</br>
* job.setMapperClass(CsvBlurMapper.class);</br>
* job.setReducerClass(DefaultBlurReducer.class);</br>
* job.setNumReduceTasks(1);</br>
* job.setInputFormatClass(TrackingTextInputFormat.class);</br>
* job.setOutputKeyClass(Text.class);
* </br>job.setOutputValueClass(BlurMutate.class);</br>
* job.setOutputFormatClass(BlurOutputFormat.class);</br> </br>
* FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));</br>
* CsvBlurMapper.addColumns(job, "cf1", "col");</br> </br> TableDescriptor
* tableDescriptor = new TableDescriptor();</br>
* tableDescriptor.setShardCount(1)
* ;</br>tableDescriptor.setAnalyzerDefinition(new
* AnalyzerDefinition());</br>tableDescriptor.setTableUri(new Path(TEST_ROOT_DIR
* + "/out").toString());</br>BlurOutputFormat.setTableDescriptor(job,
* tableDescriptor);</br>
*
*
*/
public class DefaultBlurReducer extends Reducer<Writable, BlurMutate, Writable, BlurMutate> {
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
BlurOutputFormat.setProgressable(context);
BlurOutputFormat.setGetCounter(new GetCounter() {
@Override
public Counter getCounter(Enum<?> counterName) {
return context.getCounter(counterName);
}
});
}
@Override
protected void reduce(Writable key, Iterable<BlurMutate> values, Context context) throws IOException,
InterruptedException {
Text textKey = getTextKey(key);
for (BlurMutate value : values) {
context.write(textKey, value);
}
}
protected Text getTextKey(Writable key) {
if (key instanceof Text) {
return (Text) key;
}
throw new IllegalArgumentException("Key is not of type Text, you will need to "
+ "override DefaultBlurReducer and implement \"getTextKey\" method.");
}
}