package org.shanbo.feluca.data2; import java.io.BufferedInputStream; import java.io.File; import java.util.Arrays; import java.util.Random; import java.io.FileInputStream; import java.io.IOException; import java.util.Properties; import org.msgpack.MessagePack; import org.msgpack.unpacker.Unpacker; import org.shanbo.feluca.data2.Vector.VectorType; import com.google.common.io.Closeables; import com.google.common.io.PatternFilenameFilter; /** * Reads blocks sequentially * @author lgn * */ public class SeqVectorReader implements VectorReader{ File[] listFiles; boolean hasNext = true; MessagePack msgpack ; Unpacker unpacker; Random random = new Random(); Properties stat; VectorType vt; int blockIt = 0; File dir; public SeqVectorReader(String dirName) throws IOException{ this(dirName, "\\.\\d+\\.dat"); // all ordinary } public File getDataDir(){ return dir; } protected void shuffle(File[] files){ for(int i = 0 ; i < files.length; i++){ int swap = random.nextInt(files.length); File tmp = files[swap]; files[swap ] = files[i]; files[i] = tmp; } for(int i = 0 ; i < files.length; i++){ int swap = random.nextInt(files.length); File tmp = files[swap]; files[swap ] = files[i]; files[i] = tmp; } } public SeqVectorReader(String dirName, String filterPattern) throws IOException{ dir = new File(dirName); listFiles = dir.listFiles(new PatternFilenameFilter(dir.getName() + filterPattern)); if (listFiles == null || listFiles.length == 0){ throw new RuntimeException("blocks not found!"); } msgpack = new MessagePack(); unpacker = msgpack.createUnpacker(new BufferedInputStream(new FileInputStream(listFiles[blockIt]),1024 * 1024 * 2)); stat = new Properties(); FileInputStream fis = new FileInputStream(dirName + "/" + dir.getName() + ".sta"); stat.load(fis); fis.close(); vt = VectorType.valueOf(stat.getProperty("vectorType")); } public SeqVectorReader(String dirName, String filterPattern, boolean shuffle) throws IOException{ dir = new File(dirName); listFiles = dir.listFiles(new PatternFilenameFilter(dir.getName() + filterPattern)); if (shuffle){ shuffle(listFiles); System.out.println(Arrays.toString(listFiles)); }else { //sort by blockid } if (listFiles == null || listFiles.length == 0){ throw new RuntimeException("blocks not found!"); } msgpack = new MessagePack(); unpacker = msgpack.createUnpacker(new BufferedInputStream(new FileInputStream(listFiles[blockIt]),1024 * 1024 * 2)); stat = DataSetInfo.load(new File(dirName + "/" + dir.getName() + ".sta")); vt = VectorType.valueOf(stat.getProperty("vectorType")); } public VectorType getVectorType(){ return vt; } public Properties getDataStatistic(){ return stat; } public Vector getNextVector() throws IOException{ if (hasNext){ Boolean read = unpacker.readBoolean(); if (read == true){ return Vector.create(vt, unpacker); }else{ Closeables.close(unpacker, true); blockIt ++; if (blockIt >= listFiles.length){ hasNext = false; return null; }else{ // System.out.println("!"); unpacker = msgpack.createUnpacker(new BufferedInputStream(new FileInputStream(listFiles[blockIt]))); return getNextVector(); } } }else{ return null; } } public void close(){ try { Closeables.close(unpacker, true); } catch (IOException e) { } } }