package example; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.net.MalformedURLException; import java.util.StringTokenizer; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.common.SolrInputDocument; public class CSVFileImporter { public static void main(String[] args){ if(args.length < 1){ System.out.println("Usage: java example.CSVFileImporter <csv filename>"); System.exit(0); } @SuppressWarnings("deprecation") HBaseConfiguration conf = new HBaseConfiguration(); conf.set("hbase.zookeeper.quorum", "localhost"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.setInt("hbase.client.retries.number", 7); conf.setInt("ipc.client.connect.max.retries", 3); HTablePool hTablePool = new HTablePool(conf, 10); try { BufferedReader in = new BufferedReader(new FileReader(args[0])); String str; while ((str = in.readLine()) != null) { process(str, hTablePool); } in.close(); } catch (IOException e) { } } private static void process(String str, HTablePool hTablePool){ HTableInterface idMapTable = hTablePool.getTable("DocKeyIdMap"); HTableInterface seqTable = hTablePool.getTable("Sequence"); try { StringTokenizer tokenizer = new StringTokenizer(str,","); if(tokenizer.countTokens() == 9){ String id = tokenizer.nextToken(); String cat = tokenizer.nextToken(); String name = tokenizer.nextToken(); float price = Float.parseFloat(tokenizer.nextToken()); String inStock = tokenizer.nextToken(); String author_t = tokenizer.nextToken(); String series_t = tokenizer.nextToken(); String sequence_i = tokenizer.nextToken(); String genre_s = tokenizer.nextToken(); Get get = new Get(Bytes.toBytes(id)); Result result = idMapTable.get(get); byte[] docId = result.getValue(Bytes.toBytes("docId"), Bytes.toBytes("")); int docNumber = 1; SolrInputDocument doc = new SolrInputDocument(); if(docId != null) { // we've indexed this doc previously docNumber = Bytes.toInt(docId); doc.addField("edit", true); } else { docNumber = new Long(seqTable.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), 1, true)).intValue(); } doc.addField("docId", docNumber); doc.addField("global_uniq_id", id); doc.addField("cat", cat); doc.addField("name", name); doc.addField("price", new Integer((int) price).toString()); // Solbase currently do not support float embedded field comparison doc.addField("inStock", inStock); doc.addField("author_t", author_t); doc.addField("series_t", series_t); doc.addField("sequence_i", sequence_i); doc.addField("genre_s", genre_s); // whether we want to store to hbase or not doc.addField("updateStore", true); CommonsHttpSolrServer solbaseServer = new CommonsHttpSolrServer("http://localhost:8080/solbase/books~0"); solbaseServer.add(doc); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SolrServerException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { hTablePool.putTable(idMapTable); } } }