package resa.evaluation.simulate; import backtype.storm.serialization.SerializationFactory; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Tuple; import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.KryoSerializable; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import resa.examples.wc.WordCountTopology; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.UUID; /** * Created by ding on 14-7-29. */ public class HdfsDataLoader extends WordCountTopology.WordCount { private Configuration hdfsConf; private static class WordCountDB implements KryoSerializable { private long size; public WordCountDB(long size) { this.size = size; } public WordCountDB() { } @Override public void write(Kryo kryo, Output output) { String s = UUID.randomUUID().toString(); output.writeLong(size); long total = output.total(); while (output.total() - total < size) { output.writeBoolean(true); output.writeString(s); output.writeInt((int) (Math.random() * Integer.MAX_VALUE)); } output.writeBoolean(false); } @Override public void read(Kryo kryo, Input input) { size = input.readLong(); while (input.readBoolean()) { input.readString(); input.readInt(); } } } @Override public void prepare(Map stormConf, TopologyContext context) { this.hdfsConf = new Configuration(); if (context.getTaskData("pattern") == null) { try { generateData(stormConf, context); } catch (IOException e) { e.printStackTrace(); } } else { Path file = getPath(stormConf, context); if (!dataFileExist(file)) { context.getSharedExecutor().submit(() -> { Kryo kryo = SerializationFactory.getKryo(stormConf); try (Output out = new Output(FileSystem.get(hdfsConf).create(getPath(stormConf, context), true, 12 * 1024, (short) 2, 32 * 1024 * 1024L))) { out.writeInt(1); out.writeString("pattern"); WordCountDB db = (WordCountDB) context.getTaskData("pattern"); kryo.writeClass(out, db.getClass()); db.write(kryo, out); } catch (IOException e) { e.printStackTrace(); } }); } } } private boolean dataFileExist(Path file) { try { return FileSystem.get(hdfsConf).exists(file); } catch (IOException e) { e.printStackTrace(); } return false; } public void generateData(Map stormConf, TopologyContext context) throws IOException { List<Double> dataSizes = (List<Double>) stormConf.get("dataSizes"); long dataSize = (long) (dataSizes.get(context.getThisTaskIndex()) * 600); Kryo kryo = SerializationFactory.getKryo(stormConf); WordCountDB db = new WordCountDB(dataSize); try (Output out = new Output(FileSystem.get(hdfsConf).create(getPath(stormConf, context), true, 12 * 1024, (short) 2, 32 * 1024 * 1024L))) { out.writeInt(1); out.writeString("pattern"); kryo.writeClass(out, db.getClass()); db.write(kryo, out); } System.out.println("Load dataSize is " + dataSize); context.setTaskData("pattern", db); } private Path getPath(Map<String, Object> conf, TopologyContext context) { Path dataPath = new Path(String.format("/resa/%s/task-%03d.data", context.getStormId(), context.getThisTaskId())); try { FileSystem fs = FileSystem.get(hdfsConf); if (!fs.exists(dataPath.getParent())) { fs.mkdirs(dataPath.getParent()); } } catch (IOException e) { e.printStackTrace(); } return dataPath; } @Override public void execute(Tuple input, BasicOutputCollector collector) { // long now = System.currentTimeMillis(); // do { for (int i = 0; i < 10; i++) { Math.atan(Math.sqrt(Math.random() * Integer.MAX_VALUE)); } // } while (System.currentTimeMillis() - now > 1); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } }