/* * chombo: Hadoop Map Reduce utility * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.chombo.util; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * Big list of tuple. Spills data to disk when in memory size threshold exceeds. * Allows sequential write and read * @author pranab * */ public class BigTupleList { private List<Tuple> tuples; private int maxInMemory; private String spillDirPath; private String spillFilePath; public enum Mode { Read, Write } private Mode mode; private int readCount; private int writeCount; private byte[] tupleFieldTypes; private DataOutputStream outStream; private int size; private DataInputStream inStream; /** * @param maxInMemory * @param spillDirPath */ public BigTupleList(int maxInMemory, String spillDirPath) { super(); this.maxInMemory = maxInMemory; this.spillDirPath = spillDirPath; } /** * @param mode */ public void open(Mode mode) { if (mode == Mode.Read) { readCount = 0; } else { tuples = new ArrayList<Tuple>(); writeCount = 0; size = 0; } } /** * closes streams and deletes spill file if necessary */ public void close() { close(true); } /** * closes streams deletes spill file if necessary */ public void close(boolean done) { if (mode == Mode.Read) { try { if (null != inStream) { inStream.close(); inStream = null; //delete spill file if (done) { File file = new File(spillFilePath); if(!file.delete()){ throw new RuntimeException("Failed to delete spill file after read"); } } } } catch (IOException ioe) { throw new RuntimeException("Failed to close spill file after read" +ioe); } } else { size = writeCount; try { if (null != outStream) { outStream.flush(); outStream.close(); outStream = null; } } catch (IOException ioe) { throw new RuntimeException("Failed to close spil file after write" +ioe); } } } /** * @param tuple */ public Tuple write(Tuple tuple) { Tuple retTuple = null; if (writeCount < maxInMemory) { tuples.add(tuple); if (++writeCount == maxInMemory) { //switch to spill prepareForSpilWritel(tuple); } } else { //write to disk try { for (int i = 0; i < tupleFieldTypes.length; ++i ) { if (tupleFieldTypes[i] == Tuple.STRING) { outStream.writeUTF(tuple.getString(i)); } else if (tupleFieldTypes[i] == Tuple.INT) { outStream.writeInt(tuple.getInt(i)); } else if (tupleFieldTypes[i] == Tuple.LONG) { outStream.writeLong(tuple.getInt(i)); } else if (tupleFieldTypes[i] == Tuple.DOUBLE) { outStream.writeDouble(tuple.getDouble(i)); } } retTuple = tuple; } catch (IOException ioe) { throw new RuntimeException("Failed spilling data to spill file" + ioe); } } return retTuple; } /** * @return */ public Tuple read() { Tuple tuple = null; if (readCount < maxInMemory) { //from memory if (readCount < tuples.size()) { tuple = tuples.get(readCount); if (++readCount == maxInMemory) { prepareForSpilReadl(); } } } else if (readCount < size){ //from spill try { tuple = new Tuple(); for (int i = 0; i < tupleFieldTypes.length; ++i ) { if (tupleFieldTypes[i] == Tuple.STRING) { tuple.add(inStream.readUTF()); } else if (tupleFieldTypes[i] == Tuple.INT) { tuple.add(inStream.readInt()); } else if (tupleFieldTypes[i] == Tuple.LONG) { tuple.add(inStream.readLong()); } else if (tupleFieldTypes[i] == Tuple.DOUBLE) { tuple.add(inStream.readDouble()); } } ++readCount; } catch (IOException ioe) { throw new RuntimeException("Failed to read data from spilli spill file" + ioe); } } return tuple; } public int getSize() { return size; } /** * @param tuple */ private void prepareForSpilWritel(Tuple tuple) { tupleFieldTypes = new byte[tuple.getSize()]; for (int i = 0; i < tupleFieldTypes.length; ++i ) { Object obj = tuple.get(i); if (obj instanceof String) { tupleFieldTypes[i] = Tuple.STRING; } else if (obj instanceof Integer) { tupleFieldTypes[i] = Tuple.INT; } else if (obj instanceof Long) { tupleFieldTypes[i] = Tuple.LONG; } else if (obj instanceof Double) { tupleFieldTypes[i] = Tuple.DOUBLE; } } spillFilePath = spillDirPath + "/spill-" + System.currentTimeMillis(); try { outStream = new DataOutputStream(new FileOutputStream(spillFilePath)); } catch (FileNotFoundException fnf) { throw new RuntimeException("error creating spill file" + fnf); } } /** * @param tuple */ private void prepareForSpilReadl() { try { inStream = new DataInputStream(new FileInputStream(spillFilePath)); } catch (FileNotFoundException fnf) { throw new RuntimeException("Error opening spill file for read" + fnf); } } }