package hbase; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InterruptedIOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Logger; public class NyseLoad { static Configuration conf = HBaseConfiguration.create(); static NyseParser nyseParser = new NyseParser(); static Table table; static final Logger logger = Logger.getLogger(NyseLoad.class); public static Put buildPutList(Table table, NyseParser nyseRecord) throws RetriesExhaustedWithDetailsException, InterruptedIOException, IOException { SimpleDateFormat formatter = new SimpleDateFormat("dd-MMM-yyyy"); String transactionDate = null; try { transactionDate = (new SimpleDateFormat("yyyy-MM-dd") .format(formatter.parse(nyseRecord.getTransactionDate()))) .toString(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } if(transactionDate == null || transactionDate.equals("null")) System.out.println(nyseRecord.getTransactionDate()); Put put = new Put(Bytes.toBytes(nyseRecord.getStockTicker() + "," + transactionDate)); // Key put.addColumn(Bytes.toBytes("sd"), Bytes.toBytes("op"), Bytes.toBytes(nyseRecord.getOpenPrice().floatValue())); put.addColumn(Bytes.toBytes("sd"), Bytes.toBytes("hp"), Bytes.toBytes(nyseRecord.getHighPrice().floatValue())); put.addColumn(Bytes.toBytes("sd"), Bytes.toBytes("lp"), Bytes.toBytes(nyseRecord.getLowPrice().floatValue())); put.addColumn(Bytes.toBytes("sd"), Bytes.toBytes("cp"), Bytes.toBytes(nyseRecord.getClosePrice().floatValue())); put.addColumn(Bytes.toBytes("sd"), Bytes.toBytes("v"), Bytes.toBytes(nyseRecord.getVolume().intValue())); return put; } public static void loadPutList(List<Put> puts, Table table) throws IOException { table.put(puts); } public static void readFilesAndLoad(Table table, String nysePath) { int counter = 1; List<Put> puts = new ArrayList<Put>(); File localInputFolder = new File(nysePath); File[] listOfDirectories = localInputFolder.listFiles(); for (File dir : listOfDirectories) { if (dir.isDirectory()) { File[] files = dir.listFiles(); for (File file : files) { BufferedReader br = null; if (file.getName().endsWith("csv")) { try { String sCurrentLine; br = new BufferedReader(new FileReader(file)); while ((sCurrentLine = br.readLine()) != null) { if(++counter%10000 == 0) logger.info(counter); nyseParser.parse(sCurrentLine); puts.add(buildPutList(table, nyseParser)); } loadPutList(puts, table); } catch (IOException e) { e.printStackTrace(); } finally { try { if (br != null) br.close(); } catch (IOException ex) { ex.printStackTrace(); } } } } } } } public static void main(String[] args) throws IOException { conf.set("hbase.zookeeper.quorum", args[0]); conf.set("hbase.zookeeper.property.clientPort", "2181"); Connection connection = ConnectionFactory.createConnection(conf); Table table = connection.getTable(TableName.valueOf("nyse:stock_data")); readFilesAndLoad(table, args[1]); table.close(); connection.close(); } }