package com.produban.openbus.trident; import java.util.concurrent.TimeUnit; import org.apache.storm.hdfs.trident.HdfsState; import org.apache.storm.hdfs.trident.HdfsStateFactory; import org.apache.storm.hdfs.trident.HdfsUpdater; import org.apache.storm.hdfs.trident.format.DefaultFileNameFormat; import org.apache.storm.hdfs.trident.format.DelimitedRecordFormat; import org.apache.storm.hdfs.trident.format.FileNameFormat; import org.apache.storm.hdfs.trident.format.RecordFormat; import org.apache.storm.hdfs.trident.rotation.FileRotationPolicy; import org.apache.storm.hdfs.trident.rotation.FileSizeRotationPolicy; import org.apache.storm.hdfs.trident.sync.CountSyncPolicy; import org.apache.storm.hdfs.trident.sync.SyncPolicy; import com.produban.openbus.storm.SimpleFileStringSpout; import ch.qos.logback.core.rolling.TimeBasedRollingPolicy; import storm.trident.Stream; import storm.trident.TridentState; import storm.trident.TridentTopology; import storm.trident.state.StateFactory; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.tuple.Fields; public class HDFSOutputTopology { public static void main(String[] args) { //definición de la topología Config conf = new Config(); // conf.put(Config.TOPOLOGY_DEBUG,true); String entrada="D:\\produban\\Logs\\bepxnxusrsp01\\vmwtbitarecol01\\sample_5500.txt"; String salida="d:\\parseado_proxy.log"; if(args.length==2){ entrada=args[0]; salida=args[1]; } SimpleFileStringSpout spout1 = new SimpleFileStringSpout(entrada, "linea"); Fields hdfsFields = new Fields("eventTimeStamp","timeTaken","clientIP","User","Group","Exception","filterResult","category", "referer","responseCode","action","method","contentType","protocol","requestDomain", "requestPort","requestPath","requestQuery","requestURIExtension","userAgent","serverIP","scBytes","csBytes", "virusID","destinationIP"); //Fields hdfsFields = new Fields("resultado"); FileNameFormat fileNameFormat = new DefaultFileNameFormat() .withPath("/user/cloudera/") .withPrefix("trident") .withExtension(".txt"); RecordFormat recordFormat = new DelimitedRecordFormat() .withFields(hdfsFields).withFieldDelimiter("\001"); FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(10.0f, FileSizeRotationPolicy.Units.MB); HdfsState.Options options = new HdfsState.HdfsFileOptions() .withFileNameFormat(fileNameFormat) .withRecordFormat(recordFormat) .withRotationPolicy(rotationPolicy) .withFsUrl("hdfs://192.168.182.129:8020"); System.setProperty("HADOOP_USER_NAME", "cloudera");//Necesario para que no intente entrar con el usuario que lanza el programa StateFactory factory = new HdfsStateFactory().withOptions(options); TridentTopology topology = new TridentTopology(); Stream parseaLogs = topology.newStream("spout1", spout1) .each(new Fields("linea"), new ParseProxy(), new Fields("eventTimeStamp","timeTaken","clientIP","User","Group","Exception","filterResult","category", "referer","responseCode","action","method","contentType","protocol","requestDomain", "requestPort","requestPath","requestQuery","requestURIExtension","userAgent","serverIP","scBytes","csBytes", "virusID","destinationIP")); parseaLogs.partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields()); //.each(new Fields("eventTimeStamp","timeTaken","clientIP","User"),new Print("",salida)); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("basic_primitives", conf, topology.build()); } }