/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.common; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.MapContext; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.ReduceContext; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.Method; import java.util.List; import java.util.Map; import java.util.Set; public final class DummyRecordWriter<K extends Writable, V extends Writable> extends RecordWriter<K, V> { private final List<K> keysInInsertionOrder = Lists.newArrayList(); private final Map<K, List<V>> data = Maps.newHashMap(); @Override public void write(K key, V value) { // if the user reuses the same writable class, we need to create a new one // otherwise the Map content will be modified after the insert try { K keyToUse = key instanceof NullWritable ? key : (K) cloneWritable(key); V valueToUse = (V) cloneWritable(value); keysInInsertionOrder.add(keyToUse); List<V> points = data.get(key); if (points == null) { points = Lists.newArrayList(); data.put(keyToUse, points); } points.add(valueToUse); } catch (IOException e) { throw new RuntimeException(e.getMessage(), e); } } private Writable cloneWritable(Writable original) throws IOException { Writable clone; try { clone = original.getClass().asSubclass(Writable.class).newInstance(); } catch (Exception e) { throw new RuntimeException("Unable to instantiate writable!", e); } ByteArrayOutputStream bytes = new ByteArrayOutputStream(); original.write(new DataOutputStream(bytes)); clone.readFields(new DataInputStream(new ByteArrayInputStream(bytes.toByteArray()))); return clone; } @Override public void close(TaskAttemptContext context) { } public Map<K, List<V>> getData() { return data; } public List<V> getValue(K key) { return data.get(key); } public Set<K> getKeys() { return data.keySet(); } public Iterable<K> getKeysInInsertionOrder() { return keysInInsertionOrder; } public static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context build(Mapper<K1, V1, K2, V2> mapper, Configuration configuration, RecordWriter<K2, V2> output) { // Use reflection since the context types changed incompatibly between 0.20 // and 0.23. try { return buildNewMapperContext(configuration, output); } catch (Exception|IncompatibleClassChangeError e) { try { return buildOldMapperContext(mapper, configuration, output); } catch (Exception ex) { throw new IllegalStateException(ex); } } } public static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context build(Reducer<K1, V1, K2, V2> reducer, Configuration configuration, RecordWriter<K2, V2> output, Class<K1> keyClass, Class<V1> valueClass) { // Use reflection since the context types changed incompatibly between 0.20 // and 0.23. try { return buildNewReducerContext(configuration, output, keyClass, valueClass); } catch (Exception|IncompatibleClassChangeError e) { try { return buildOldReducerContext(reducer, configuration, output, keyClass, valueClass); } catch (Exception ex) { throw new IllegalStateException(ex); } } } @SuppressWarnings({"unchecked", "rawtypes"}) private static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context buildNewMapperContext( Configuration configuration, RecordWriter<K2, V2> output) throws Exception { Class<?> mapContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl"); Constructor<?> cons = mapContextImplClass.getConstructors()[0]; Object mapContextImpl = cons.newInstance(configuration, new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null); Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper"); Object wrappedMapper = wrappedMapperClass.getConstructor().newInstance(); Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class); return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl); } @SuppressWarnings({"unchecked", "rawtypes"}) private static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context buildOldMapperContext( Mapper<K1, V1, K2, V2> mapper, Configuration configuration, RecordWriter<K2, V2> output) throws Exception { Constructor<?> cons = getNestedContextConstructor(mapper.getClass()); // first argument to the constructor is the enclosing instance return (Mapper.Context) cons.newInstance(mapper, configuration, new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null); } @SuppressWarnings({"unchecked", "rawtypes"}) private static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context buildNewReducerContext( Configuration configuration, RecordWriter<K2, V2> output, Class<K1> keyClass, Class<V1> valueClass) throws Exception { Class<?> reduceContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.ReduceContextImpl"); Constructor<?> cons = reduceContextImplClass.getConstructors()[0]; Object reduceContextImpl = cons.newInstance(configuration, new TaskAttemptID(), new MockIterator(), null, null, output, null, new DummyStatusReporter(), null, keyClass, valueClass); Class<?> wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer"); Object wrappedReducer = wrappedReducerClass.getConstructor().newInstance(); Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class); return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl); } @SuppressWarnings({"unchecked", "rawtypes"}) private static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context buildOldReducerContext( Reducer<K1, V1, K2, V2> reducer, Configuration configuration, RecordWriter<K2, V2> output, Class<K1> keyClass, Class<V1> valueClass) throws Exception { Constructor<?> cons = getNestedContextConstructor(reducer.getClass()); // first argument to the constructor is the enclosing instance return (Reducer.Context) cons.newInstance(reducer, configuration, new TaskAttemptID(), new MockIterator(), null, null, output, null, new DummyStatusReporter(), null, keyClass, valueClass); } private static Constructor<?> getNestedContextConstructor(Class<?> outerClass) { for (Class<?> nestedClass : outerClass.getClasses()) { if ("Context".equals(nestedClass.getSimpleName())) { return nestedClass.getConstructors()[0]; } } throw new IllegalStateException("Cannot find context class for " + outerClass); } }