/* * Copyright 2010 Outerthought bvba * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.lilyproject.tools.import_.cli; import java.io.FileInputStream; import java.io.InputStream; import java.util.HashSet; import java.util.List; import java.util.Set; import com.google.common.base.Splitter; import com.ngdata.lily.security.hbase.client.AuthorizationContext; import org.lilyproject.repository.spi.AuthorizationContextHolder; import org.lilyproject.tools.import_.json.IgnoreAndDeleteEmptyFieldsRecordReader; import org.lilyproject.tools.import_.json.IgnoreEmptyFieldsRecordReader; import org.lilyproject.tools.import_.json.RecordReader; import org.lilyproject.util.hbase.RepoAndTableUtil; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.lilyproject.cli.BaseZkCliTool; import org.lilyproject.cli.OptionUtil; import org.lilyproject.client.LilyClient; import org.lilyproject.repository.api.LRepository; import org.lilyproject.repository.api.LTable; import org.lilyproject.util.Version; import org.lilyproject.util.hbase.LilyHBaseSchema.Table; import org.lilyproject.util.io.Closer; public class JsonImportTool extends BaseZkCliTool { private Option schemaOnlyOption; private Option workersOption; private Option quietOption; private Option tableOption; private Option repositoryOption; private Option fileFormatOption; private Option ignoreEmptyFieldsOption; private Option ignoreAndDeleteEmptyFieldsOption; private Option maxErrorsOption; private Option rolesOption; private LilyClient lilyClient; @Override protected String getCmdName() { return "lily-import"; } @Override protected String getVersion() { return Version.readVersion("org.lilyproject", "lily-import"); } public static void main(String[] args) throws Exception { new JsonImportTool().start(args); } @Override @SuppressWarnings("static-access") public List<Option> getOptions() { List<Option> options = super.getOptions(); workersOption = OptionBuilder .withArgName("count") .hasArg() .withDescription("Number of workers (threads)") .withLongOpt("workers") .create("w"); options.add(workersOption); schemaOnlyOption = OptionBuilder .withDescription("Only import the field types and record types, not the records.") .withLongOpt("schema-only") .create("s"); options.add(schemaOnlyOption); quietOption = OptionBuilder .withDescription("Instead of printing out all record ids, only print a dot every 1000 records") .withLongOpt("quiet") .create("q"); options.add(quietOption); tableOption = OptionBuilder .withArgName("table") .hasArg() .withDescription("Repository table to import to, defaults to record table") .withLongOpt("table") .create(); options.add(tableOption); repositoryOption = OptionBuilder .withArgName("repository") .hasArg() .withDescription("Repository name, if not specified default repository is used") .withLongOpt("repository") .create(); options.add(repositoryOption); fileFormatOption = OptionBuilder .withArgName("format") .hasArg() .withDescription("Input file format (see explanation at bottom)") .withLongOpt("format") .create(); options.add(fileFormatOption); ignoreEmptyFieldsOption = OptionBuilder .withDescription("Ignores fields defined as empty strings, ignores zero-length lists, ignores nested" + " records containing no fields. When in root record, adds them as fields-to-delete.") .withLongOpt("ignore-empty-fields") .create(); options.add(ignoreEmptyFieldsOption); ignoreAndDeleteEmptyFieldsOption = OptionBuilder .withDescription("Does everything ignore-empty-fields does, and adds empty fields in the root record" + "to the list of fields-to-delete (only makes sense for updates).") .withLongOpt("ignore-and-delete-empty-fields") .create(); options.add(ignoreAndDeleteEmptyFieldsOption); maxErrorsOption = OptionBuilder .withArgName("count") .hasArg() .withDescription("Give up the import after this amount of errors (only for records, not schema)") .withLongOpt("max-errors") .create(); options.add(maxErrorsOption); rolesOption = OptionBuilder .withArgName("roles") .hasArg() .withDescription("Comma-separated list of active user roles (excluding tenant part). Only has " + "effect when the NGDATA hbase-authz coprocessor is installed.") .withLongOpt("roles") .create(); options.add(rolesOption); return options; } @Override public int run(CommandLine cmd) throws Exception { int result = super.run(cmd); if (result != 0) { return result; } int workers = OptionUtil.getIntOption(cmd, workersOption, 1); String tableName = OptionUtil.getStringOption(cmd, tableOption, Table.RECORD.name); String repositoryName = OptionUtil.getStringOption(cmd, repositoryOption, RepoAndTableUtil.DEFAULT_REPOSITORY); ImportFileFormat fileFormat = OptionUtil.getEnum(cmd, fileFormatOption, ImportFileFormat.JSON, ImportFileFormat.class); if (cmd.getArgList().size() < 1) { System.out.println("No import file specified!"); return 1; } boolean schemaOnly = cmd.hasOption(schemaOnlyOption.getOpt()); boolean ignoreEmptyFields = cmd.hasOption(ignoreEmptyFieldsOption.getLongOpt()); boolean ignoreAndDeleteEmptyFields = cmd.hasOption(ignoreAndDeleteEmptyFieldsOption.getLongOpt()); long maxErrors = OptionUtil.getLongOption(cmd, maxErrorsOption, 1L); if (cmd.hasOption(rolesOption.getLongOpt())) { Set<String> roles = new HashSet<String>(); Splitter splitter = Splitter.on(",").trimResults().omitEmptyStrings(); for (String role : splitter.split(cmd.getOptionValue(rolesOption.getLongOpt()))) { roles.add(role); } AuthorizationContextHolder.setCurrentContext(new AuthorizationContext("lily-import", repositoryName, roles)); } lilyClient = new LilyClient(zkConnectionString, zkSessionTimeout); for (String arg : (List<String>)cmd.getArgList()) { System.out.println("----------------------------------------------------------------------"); System.out.println("Importing " + arg + " to " + tableName + " table of repository " + repositoryName); InputStream is = new FileInputStream(arg); try { LRepository repository = lilyClient.getRepository(repositoryName); LTable table = repository.getTable(tableName); ImportListener importListener; if (cmd.hasOption(quietOption.getOpt())) { importListener = new DefaultImportListener(System.out, EntityType.RECORD); } else { importListener = new DefaultImportListener(); } JsonImport.ImportSettings settings = new JsonImport.ImportSettings(); settings.importListener = importListener; settings.threadCount = workers; settings.maximumRecordErrors = maxErrors; if (ignoreAndDeleteEmptyFields) { settings.recordReader= IgnoreAndDeleteEmptyFieldsRecordReader.INSTANCE; } else if (ignoreEmptyFields) { settings.recordReader = IgnoreEmptyFieldsRecordReader.INSTANCE; } else { settings.recordReader = RecordReader.INSTANCE; } switch (fileFormat) { case JSON: if (schemaOnly) { JsonImport.loadSchema(repository, is, settings); } else { JsonImport.load(table, repository, is, settings); } break; case JSON_LINES: JsonImport.loadJsonLines(table, repository, is, settings); break; default: throw new RuntimeException("Unexpected import file format: " + fileFormat); } } finally { Closer.close(is); } } System.out.println("Import done"); return 0; } @Override protected void cleanup() { Closer.close(lilyClient); super.cleanup(); } public enum ImportFileFormat { JSON, JSON_LINES } }