package edu.isi.dig.elasticsearch; import java.io.IOException; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.http.client.methods.HttpPost; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.SSLContextBuilder; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.json.JSONObject; public class BulkLoadSequenceFile { private static final int retry = 10; public static void main(String[] args) throws IllegalArgumentException, IOException, InterruptedException, NoSuchAlgorithmException, KeyStoreException, KeyManagementException, InstantiationException, IllegalAccessException, ClassNotFoundException { Options options = createCommandLineOptions(); CommandLine cl = parse(args, options, BulkLoadSequenceFile.class.getSimpleName()); if(cl == null) { return; } String filePath = (String)cl.getOptionValue("filepath"); String index = (String)cl.getOptionValue("index"); String type = (String)cl.getOptionValue("type"); String hostname = (String)cl.getOptionValue("hostname"); String sleep = (String)cl.getOptionValue("sleep"); String bulksize = (String)cl.getOptionValue("bulksize"); String port = (String)cl.getOptionValue("port"); String protocol = (String)cl.getOptionValue("protocol"); String username = null; if(cl.hasOption("username")){ username = (String)cl.getOptionValue("username"); }else{ username = ""; } String password=null; if(cl.hasOption("password")){ password = (String)cl.getOptionValue("password"); }else{ password=""; } SSLContextBuilder builder = new SSLContextBuilder(); builder.loadTrustMaterial(null, new TrustSelfSignedStrategy()); SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(builder.build()); CloseableHttpClient httpClient = null; if(protocol.equalsIgnoreCase("https")) httpClient = HttpClients.custom().setSSLSocketFactory(sslsf).build(); else if(protocol.equalsIgnoreCase("http")) httpClient = HttpClients.createDefault(); HttpPost httpPost = null; if(!username.equals("") && !password.equals("")){ httpPost = new HttpPost(protocol+"://"+username + ":" + password + "@" + hostname + ":" + port + "/" + index + "/_bulk"); }else { httpPost = new HttpPost(protocol+"://" + hostname + ":" + port + "/" + index + "/_bulk"); } String bulkFormat = null; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(new Configuration()); Path path = new Path(filePath); for(FileStatus s: fs.listStatus(path)) { SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), SequenceFile.Reader.file(s.getPath())); Writable key = (Writable) Class.forName(reader.getKeyClass().getCanonicalName()).newInstance(); Text val = new Text(); StringBuilder sb = new StringBuilder(); long counter = 0; while (reader.next(key, val)) { JSONObject jObj = new JSONObject(val.toString()); String id = null; if(jObj.has("uri")) { id = jObj.getString("uri"); } if(id != null) { bulkFormat = "{\"index\":{\"_index\":\"" + index+ "\",\"_type\":\""+ type +"\",\"_id\":\""+id+"\"}}"; } else { bulkFormat = "{\"index\":{\"_index\":\"" + index+ "\",\"_type\":\""+ type +"\"}}"; } sb.append(bulkFormat); sb.append(System.getProperty("line.separator")); sb.append(val.toString()); //System.out.println("got val:" + val.toString()); sb.append(System.getProperty("line.separator")); counter++; if (counter % Integer.parseInt(bulksize) == 0) { int i = 0; Exception ex = null; while (i < retry) { try { StringEntity entity = new StringEntity(sb.toString(),"UTF-8"); entity.setContentType("application/json"); httpPost.setEntity(entity); httpClient.execute(httpPost); httpClient.close(); Thread.sleep(Integer.parseInt(sleep)); //System.out.println(counter + " processed"); break; }catch(Exception e) { ex = e; i++; } } if (i > 0) { System.out.println("Exception occurred!"); ex.printStackTrace(); break; } httpClient = null; if(protocol.equalsIgnoreCase("https")) httpClient = HttpClients.custom().setSSLSocketFactory(sslsf).build(); else if(protocol.equalsIgnoreCase("http")) httpClient = HttpClients.createDefault(); httpPost = new HttpPost(protocol + "://" + hostname + ":" + port + "/" + index + "/_bulk"); sb = new StringBuilder(); } } StringEntity entity = new StringEntity(sb.toString(),"UTF-8"); entity.setContentType("application/json"); httpPost.setEntity(entity); httpClient.execute(httpPost); httpClient.close(); reader.close(); } } private static Options createCommandLineOptions() { Options options = new Options(); options.addOption(new Option("filepath", "filepath", true, "location of the input file directory")); options.addOption(new Option("type", "type", true, "elasticsearch type")); options.addOption(new Option("index", "index", true, "elasticsearch index")); options.addOption(new Option("hostname", "hostname", true, "elasticsearch hostname")); options.addOption(new Option("sleep", "sleep", true, "thread sleep in ms")); options.addOption(new Option("bulksize", "bulksize", true, "bulk size")); options.addOption(new Option("port", "port", true, "es port")); options.addOption(new Option("protocol", "protocol", true, "es handshake protocol")); options.addOption(new Option("username", "username", true, "basic auth username for ES")); options.addOption(new Option("password", "password", true, "basic auth password for ES")); return options; } public static CommandLine parse(String args[], Options options, String commandName) { CommandLineParser parser = new BasicParser(); CommandLine cl = null; try { cl = parser.parse(options, args); if (cl == null || cl.getOptions().length == 0 || cl.hasOption("help")) { HelpFormatter hf = new HelpFormatter(); hf.printHelp(commandName, options); return null; } } catch (Exception e) { return cl; } return cl; } }