package edu.cmu.graphchi.engine.auxdata; import edu.cmu.graphchi.ChiFilenames; import edu.cmu.graphchi.ChiLogger; import edu.cmu.graphchi.datablocks.BytesToValueConverter; import edu.cmu.graphchi.datablocks.ChiPointer; import edu.cmu.graphchi.datablocks.DataBlockManager; import edu.cmu.graphchi.datablocks.IntConverter; import nom.tam.util.BufferedDataInputStream; import ucar.unidata.io.RandomAccessFile; import java.io.*; import java.util.Arrays; import java.util.Iterator; import java.util.logging.Logger; /** * Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University] * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class VertexData <VertexDataType> { private byte[] vertexData; private int vertexSt, vertexEn; private String baseFilename; private RandomAccessFile vertexDataFile; private BytesToValueConverter <VertexDataType> converter; private DataBlockManager blockManager; private boolean sparse; private int[] index; private int lastOffset = 0; private int lastStart = 0; private final static Logger logger = ChiLogger.getLogger("vertex-data"); public VertexData(int nvertices, String baseFilename, BytesToValueConverter<VertexDataType> converter) throws IOException { this(nvertices, baseFilename, converter, true); } public VertexData(int nvertices, String baseFilename, BytesToValueConverter<VertexDataType> converter, boolean _sparse) throws IOException { this.baseFilename = baseFilename; this.converter = converter; this.sparse = _sparse; File sparseDegreeFile = new File(ChiFilenames.getFilenameOfDegreeData(baseFilename, true)); if (sparse && !sparseDegreeFile.exists()) { sparse = false; logger.info("Sparse vertex data was allowed but sparse degree file did not exist using dense"); } File vertexfile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, converter, sparse)); if (!sparse) { long expectedSize = (long) converter.sizeOf() * (long) nvertices; // Check size and create if does not exists logger.info("Vertex file [" + vertexfile.getAbsolutePath() + "] length: " + vertexfile.length() + ", nvertices=" + nvertices + ", expected size: " + expectedSize); if (!vertexfile.exists() || vertexfile.length() < expectedSize) { if (!vertexfile.exists()) { vertexfile.createNewFile(); } logger.warning("Vertex data file did not exists, creating it. Vertices: " + nvertices); FileOutputStream fos = new FileOutputStream(vertexfile); byte[] tmp = new byte[32678]; long written = 0; while(written < expectedSize) { long n = Math.min(expectedSize - written, tmp.length); fos.write(tmp, 0, (int)n); written += n; } fos.close(); } } else { if (!vertexfile.exists()) { BufferedDataInputStream dis = new BufferedDataInputStream(new FileInputStream(sparseDegreeFile)); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(vertexfile))); byte[] empty = new byte[converter.sizeOf()]; try { while(true) { int vertexId = Integer.reverseBytes(dis.readInt()); dis.skipBytes(8); dos.writeInt(Integer.reverseBytes(vertexId)); dos.write(empty); } } catch (EOFException err) {} dos.close(); dis.close();; } } vertexDataFile = new RandomAccessFile(vertexfile.getAbsolutePath(), "rwd"); vertexEn = vertexSt = 0; } public void releaseAndCommit(int firstVertex, int blockId) throws IOException { assert(blockId >= 0); byte[] data = blockManager.getRawBlock(blockId); if (!sparse) { long dataStart = (long) firstVertex * (long) converter.sizeOf(); synchronized (vertexDataFile) { vertexDataFile.seek(dataStart); vertexDataFile.write(data); blockManager.release(blockId); vertexDataFile.flush(); } logger.info("Vertex data write: " + dataStart + " -- " + (dataStart + data.length)); } else { synchronized (vertexDataFile) { vertexDataFile.seek(lastOffset); int sizeOf = converter.sizeOf(); for(int i=0; i < index.length; i++) { vertexDataFile.writeInt(Integer.reverseBytes(index[i])); // Note: when writing, the random access file does not take byte order into account! vertexDataFile.write(data, i * sizeOf, sizeOf); } blockManager.release(blockId); vertexDataFile.flush(); } } } /** * Load vertices' data * @param _vertexSt * @param _vertexEn inclusive * @return * @throws IOException */ public int load(int _vertexSt, int _vertexEn) throws IOException { vertexSt = _vertexSt; vertexEn = _vertexEn; synchronized (vertexDataFile) { if (!sparse) { long dataSize = (long) (vertexEn - vertexSt + 1) * (long) converter.sizeOf(); long dataStart = (long) vertexSt * (long) converter.sizeOf(); int blockId = blockManager.allocateBlock((int) dataSize); vertexData = blockManager.getRawBlock(blockId); vertexDataFile.seek(dataStart); vertexDataFile.readFully(vertexData); return blockId; } else { // Have to read in two passes if (lastStart > _vertexSt) { vertexDataFile.seek(0); } lastStart = _vertexSt; int sizeOf = converter.sizeOf(); long startPos = vertexDataFile.getFilePointer(); int n = 0; boolean foundStart = false; try { while(true) { int vertexId = vertexDataFile.readInt(); if (!foundStart && vertexId >= _vertexSt) { startPos = vertexDataFile.getFilePointer() - 4; foundStart = true; } if (vertexId >= _vertexSt && vertexId <= _vertexEn) { n++; } else if (vertexId > vertexEn) { break; } vertexDataFile.skipBytes(sizeOf); } } catch (EOFException eof) {} index = new int[n]; vertexDataFile.seek(startPos); int blockId = blockManager.allocateBlock(n * sizeOf); vertexData = blockManager.getRawBlock(blockId); int i = 0; try { while(i < n) { int vertexId = vertexDataFile.readInt(); if (vertexId >= _vertexSt && vertexId <= _vertexEn) { index[i] = vertexId; vertexDataFile.read(vertexData, i * sizeOf, sizeOf); i++; } else { vertexDataFile.skipBytes(sizeOf); } } } catch (EOFException eof) {} if (i != n) throw new IllegalStateException("Mismatch when reading sparse vertex data:" + i + " != " + n); lastOffset = (int) startPos; return blockId; } } } public ChiPointer getVertexValuePtr(int vertexId, int blockId) { assert(vertexId >= vertexSt && vertexId <= vertexEn); if (!sparse) { return new ChiPointer(blockId, (vertexId - vertexSt) * converter.sizeOf()); } else { int idx = Arrays.binarySearch(index, vertexId); if (idx < 0) { return null; } return new ChiPointer(blockId, idx * converter.sizeOf()); } } public void setBlockManager(DataBlockManager blockManager) { this.blockManager = blockManager; } // This is a bit funny... Is there a better way to create a memory efficient // int array in scala? public static int[] createIntArray(int n) { return new int[n]; } public Iterator<Integer> currentIterator() { if (!sparse) { return new Iterator<Integer>() { int j = vertexSt; @Override public boolean hasNext() { return (j <= vertexEn); } @Override public Integer next() { return j++; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } else { return new Iterator<Integer>() { int j = 0; @Override public boolean hasNext() { return (j < index.length); } @Override public Integer next() { return index[j++]; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } } public void close() { try { vertexDataFile.flush(); vertexDataFile.getFD().sync(); vertexDataFile.close(); } catch (IOException ie) { ie.printStackTrace(); } } }