/*
* Copyright (c) 2010 Chris Smowton <chris.smowton@cl.cam.ac.uk>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
package uk.co.mrry.mercator.mapreduce;
import java.io.EOFException;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.WritableSerialization;
import org.apache.hadoop.io.WritableComparator;
public class SWReduceInputMerger<K extends WritableComparable, V extends Writable> {
public class SWReduceIterator implements Iterator<V>, Iterable<V> {
private K key;
public SWReduceIterator(K key) {
/* uses an SWReduceInputMerger that takes care of fetching elements from the input streams
* and deserializes them.
*/
this.key = key;
}
@Override
public boolean hasNext() {
// iterate over all head keys and check if there is one matching the current key
for (int i = 0; i < streamHeadsKeys.size(); ++i) {
if (streamHeadsKeys.get(i) == key) return true;
}
return false;
}
@Override
public V next() {
// Fetch the next element - can be null if there is none for the active key
return getNextElement();
}
@Override
public void remove() {
// fails
throw new RuntimeException("method not implemented");
}
@Override
public Iterator<V> iterator() {
// Return a reference to ourself
return this;
}
}
private FileInputStream[] inputs;
private ArrayList<K> streamHeadsKeys;
private ArrayList<V> streamHeadsValues;
private K currentKey;
private int nextStreamID;
// TODO does this have to be writable?
private WritableSerialization serialization;
private final WritableComparator keyComparator;
public SWReduceInputMerger(FileInputStream[] fis) {
inputs = fis;
streamHeadsKeys = new ArrayList<K>(fis.length);
streamHeadsValues = new ArrayList<V>(fis.length);
serialization = new WritableSerialization();
// populate the head arrays
for (int i = 0; i < fis.length; ++i) {
try {
fetchFromStream(i);
} catch (EOFException eofe) {
nextStreamID = -1;
currentKey = null;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
currentKey = streamHeadsKeys.get(0);
nextStreamID = 0;
keyComparator = WritableComparator.get(currentKey.getClass());
}
public V getNextElement() {
// Fetch the next value for the currently active key (null if there is none)
for (int i = nextStreamID; i < streamHeadsKeys.size(); ++i) {
if (streamHeadsKeys.get(i) == currentKey) {
V val = streamHeadsValues.get(i);
nextStreamID = i;
// replace key and value in head arrays
try {
fetchFromStream(i);
} catch (EOFException eofe) {
nextStreamID = -1;
currentKey = null;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
// return the associated value
return val;
} else continue;
}
// if we can't find a next element for the key, return null
nextStreamID = getLeastKeyStreamID();
currentKey = streamHeadsKeys.get(nextStreamID);
return null;
}
private int getLeastKeyStreamID() {
// Iterate over keys at the head of the input streams and select the least one as the next key
int currentBestID = 0;
K currentBest = streamHeadsKeys.get(0);
for (int i = 1; i < inputs.length; ++i) {
if (currentBest.compareTo(streamHeadsKeys.get(i)) < 0) {
currentBest = streamHeadsKeys.get(i);
currentBestID = i;
}
}
return currentBestID;
}
private void fetchFromStream(int streamID) throws IOException {
// do the deserialization dance
Deserializer<Writable> deserializer = serialization.getDeserializer(null);
deserializer.open(inputs[streamID]);
K key = (K)deserializer.deserialize(null);
V value = (V)deserializer.deserialize(null);
// update head arrays with next entry from stream
streamHeadsKeys.set(streamID, key);
streamHeadsValues.set(streamID, value);
}
public void setKey(K key) {
currentKey = key;
}
public K getKey() {
return currentKey;
}
public boolean hasMoreKeys() {
return (currentKey != null);
}
public Iterable<V> getIterator() {
if (currentKey == null) throw new NullPointerException();
return new SWReduceIterator(currentKey);
}
}