Java Examples for org.apache.avro.mapreduce.AvroKeyOutputFormat
The following java examples will help you to understand the usage of org.apache.avro.mapreduce.AvroKeyOutputFormat. These source code samples are taken from different open source projects.
Example 1
Project: camus-master File: CamusSweeperAvroKeyJob.java View source code |
@Override
public void configureJob(String topic, Job job) {
boolean skipNameValidation = RelaxedSchemaUtils.skipNameValidation(job.getConfiguration());
if (skipNameValidation) {
RelaxedAvroSerialization.addToConfiguration(job.getConfiguration());
}
// setting up our input format and map output types
super.configureInput(job, AvroKeyCombineFileInputFormat.class, AvroKeyMapper.class, AvroKey.class, AvroValue.class);
// setting up our output format and output types
super.configureOutput(job, skipNameValidation ? RelaxedAvroKeyOutputFormat.class : AvroKeyOutputFormat.class, AvroKeyReducer.class, AvroKey.class, NullWritable.class);
// finding the newest file from our input. this file will contain the newest version of our avro
// schema.
Schema schema;
try {
schema = getNewestSchemaFromSource(job);
} catch (IOException e) {
throw new RuntimeException(e);
}
// checking if we have a key schema used for deduping. if we don't then we make this a map only
// job and set the key schema
// to the newest input schema
String keySchemaStr = getConfValue(job, topic, "camus.sweeper.avro.key.schema");
Schema keySchema;
if (job.getConfiguration().getBoolean("camus.sweeper.use.all.attributes", false)) {
log.info("Using all attributes in the schema (except Map fields) for deduping");
keySchema = getAllFieldsExceptMap(schema);
} else if (keySchemaStr == null || keySchemaStr.isEmpty() || job.getConfiguration().getBoolean("second.stage", false)) {
job.setNumReduceTasks(0);
keySchema = schema;
} else {
keySchema = RelaxedSchemaUtils.parseSchema(keySchemaStr, job.getConfiguration());
keySchema = duplicateRecord(keySchema, schema);
if (!validateKeySchema(schema, keySchema)) {
log.info("topic:" + topic + " key invalid, using map only job");
job.setNumReduceTasks(0);
keySchema = schema;
}
}
setupSchemas(topic, job, schema, keySchema);
// setting the compression level. Only used if compression is enabled. default is 6
job.getConfiguration().setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, job.getConfiguration().getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, 6));
}
Example 2
Project: avro-sorting-master File: AvroWritableKeySort.java View source code |
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setMapOutputKeyClass(WeatherSubset.class);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
job.setPartitionerClass(WeatherPartitioner.class);
job.setGroupingComparatorClass(WeatherSubsetGroupingComparator.class);
job.setSortComparatorClass(WeatherSubsetSortComparator.class);
return job.waitForCompletion(true);
}
Example 3
Project: etl-by-example-master File: Driver.java View source code |
@Override public int run(String[] args) throws Exception { Path mrInput, mrOutput; if (args.length == 2) { mrInput = new Path(args[0]); mrOutput = new Path(args[1] + directoryFormat.format(new Date())); } else { System.err.println("Parameter missing!"); return 1; } /** configure Job **/ Job job = new Job(getConf(), "DataIngest Example"); job.setJarByClass(Driver.class); job.setUserClassesTakesPrecedence(true); FileInputFormat.setInputPaths(job, mrInput); FileOutputFormat.setOutputPath(job, mrOutput); job.setMapperClass(MapperRawToAvro.class); job.setReducerClass(ReducerByDateTime.class); AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.LONG)); AvroJob.setMapOutputValueSchema(job, SampleRecord.SCHEMA$); AvroKeyOutputFormat.setCompressOutput(job, true); AvroKeyOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); AvroMultipleOutputs.addNamedOutput(job, "sampleRecord", AvroKeyOutputFormat.class, SampleRecord.SCHEMA$); MultipleOutputs.setCountersEnabled(job, true); if (job.waitForCompletion(true)) { return 0; } else { return 1; } }
Example 4
Project: cdap-master File: DynamicPartitioningOutputFormat.java View source code |
private boolean isAvroOutputFormat(FileOutputFormat<K, V> fileOutputFormat) {
String className = fileOutputFormat.getClass().getName();
// use class name String in order avoid having a dependency on the Avro libraries here
return "org.apache.avro.mapreduce.AvroKeyOutputFormat".equals(className) || "org.apache.avro.mapreduce.AvroKeyValueOutputFormat".equals(className);
}
Example 5
Project: hpg-bigdata-master File: Bam2AvroMR.java View source code |
public static int run(String input, String output, String codecName, boolean adjQuality, Configuration conf) throws Exception {
// read header, and save sequence index/name in conf
final Path p = new Path(input);
final SeekableStream seekableStream = WrapSeekable.openPath(conf, p);
final SamReader reader = SamReaderFactory.make().open(SamInputResource.of(seekableStream));
final SAMFileHeader header = reader.getFileHeader();
int i = 0;
SAMSequenceRecord sr;
while ((sr = header.getSequence(i)) != null) {
conf.set("" + i, sr.getSequenceName());
i++;
}
Job job = Job.getInstance(conf, "Bam2AvroMR");
job.setJarByClass(Bam2AvroMR.class);
// Avro problem fix
job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");
job.getConfiguration().set(ADJUST_QUALITY, Boolean.toString(adjQuality));
// We call setOutputSchema first so we can override the configuration
// parameters it sets
AvroJob.setOutputKeySchema(job, ReadAlignment.getClassSchema());
job.setOutputValueClass(NullWritable.class);
AvroJob.setMapOutputValueSchema(job, ReadAlignment.getClassSchema());
// point to input data
FileInputFormat.setInputPaths(job, new Path(input));
job.setInputFormatClass(AnySAMInputFormat.class);
// set the output format
FileOutputFormat.setOutputPath(job, new Path(output));
if (codecName != null) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, CompressionUtils.getHadoopCodec(codecName));
}
job.setOutputFormatClass(AvroKeyOutputFormat.class);
job.setMapOutputKeyClass(AvroKey.class);
job.setMapOutputValueClass(Void.class);
job.setMapperClass(Bam2GaMapper.class);
job.setNumReduceTasks(0);
job.waitForCompletion(true);
// write header
Path headerPath = new Path(output + "/part-m-00000.avro.header");
FileSystem fs = FileSystem.get(conf);
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(headerPath, true)));
br.write(header.getTextHeader());
br.close();
return 0;
}
Example 6
Project: iis-master File: SparkPipeMapReduce.java View source code |
//------------------------ LOGIC --------------------------
public static void main(String[] args) throws IOException, ClassNotFoundException {
SparkPipeMapReduceParameters params = parseParameters(args);
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.set("spark.kryo.registrator", "pl.edu.icm.sparkutils.avro.AvroCompatibleKryoRegistrator");
Class<? extends GenericRecord> outputAvroClass = Class.forName(params.outputAvroSchemaClass).asSubclass(GenericRecord.class);
Schema inputSchema = AvroUtils.toSchema(params.inputAvroSchemaClass);
Schema outputSchema = AvroUtils.toSchema(params.outputAvroSchemaClass);
Job job = Job.getInstance();
AvroJob.setInputKeySchema(job, inputSchema);
AvroJob.setOutputKeySchema(job, outputSchema);
try (JavaSparkContext sc = new JavaSparkContext(conf)) {
sc.addFile(params.mapperScript);
sc.addFile(params.reducerScript);
String mapperScriptName = new File(params.mapperScript).getName();
String reducerScriptName = new File(params.reducerScript).getName();
SparkPipeExecutor pipeExecutor = new SparkPipeExecutor();
@SuppressWarnings("unchecked") JavaPairRDD<AvroKey<GenericRecord>, NullWritable> inputRecords = (JavaPairRDD<AvroKey<GenericRecord>, NullWritable>) sc.newAPIHadoopFile(params.inputAvroPath, AvroKeyInputFormat.class, GenericRecord.class, NullWritable.class, job.getConfiguration());
JavaPairRDD<String, String> mappedRecords = pipeExecutor.doMap(inputRecords, mapperScriptName, params.mapperScriptArgs);
JavaPairRDD<AvroKey<GenericRecord>, NullWritable> reducedRecords = pipeExecutor.doReduce(mappedRecords, reducerScriptName, params.reducerScriptArgs, outputAvroClass);
reducedRecords.saveAsNewAPIHadoopFile(params.outputAvroPath, AvroKey.class, NullWritable.class, AvroKeyOutputFormat.class, job.getConfiguration());
}
}
Example 7
Project: hiped2-master File: BloomFilterCreator.java View source code |
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(ReplicatedJoin.UserOptions.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path usersPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.USERS));
Path outputPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(BloomFilterCreator.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
AvroJob.setOutputKeySchema(job, AvroBytesRecord.SCHEMA);
job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, SnappyCodec.class.getName());
job.setOutputFormatClass(AvroKeyOutputFormat.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(BloomFilter.class);
FileInputFormat.setInputPaths(job, usersPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : 1;
}
Example 8
Project: KOSHIK-master File: EnglishPipeline.java View source code |
@SuppressWarnings("static-access")
@Override
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path,...").hasArg().withDescription("input path[s]").create(OPTION_INPUTPATHS));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OPTION_OUTPUTPATH));
CommandLine commandLine;
CommandLineParser commandLineParser = new GnuParser();
commandLine = commandLineParser.parse(options, args);
if (!commandLine.hasOption(OPTION_INPUTPATHS) || !commandLine.hasOption(OPTION_OUTPUTPATH)) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp(getClass().getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
String inputPaths = commandLine.getOptionValue(OPTION_INPUTPATHS);
Path outputPath = new Path(commandLine.getOptionValue(OPTION_OUTPUTPATH));
LOGGER.info("Utility name: " + this.getClass().getName());
LOGGER.info(" - input path: " + inputPaths);
LOGGER.info(" - output path: " + outputPath);
Job job = new Job(getConf(), getClass().getName());
job.setJarByClass(getClass());
FileInputFormat.setInputPaths(job, inputPaths);
FileOutputFormat.setOutputPath(job, outputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
//job.setMapperClass(DocumentSelectMapper.class);
AvroJob.setInputKeySchema(job, AvroDocument.SCHEMA$);
AvroJob.setMapOutputKeySchema(job, AvroDocument.SCHEMA$);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(EnglishPipelineReducer.class);
AvroJob.setOutputKeySchema(job, AvroDocument.SCHEMA$);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
Example 9
Project: opencga-master File: VariantTableExportDriver.java View source code |
@Override
protected void initMapReduceJob(String inTable, Job job, Scan scan, boolean addDependencyJar) throws IOException {
super.initMapReduceJob(inTable, job, scan, addDependencyJar);
// set Path
FileOutputFormat.setOutputPath(job, new Path(this.outFile));
// compression
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
switch(this.type) {
case AVRO:
job.setOutputFormatClass(AvroKeyOutputFormat.class);
// Set schema
AvroJob.setOutputKeySchema(job, VariantAvro.getClassSchema());
break;
case VCF:
job.setOutputFormatClass(HadoopVcfOutputFormat.class);
break;
default:
throw new IllegalStateException("Type not known: " + this.type);
}
job.setNumReduceTasks(0);
}
Example 10
Project: aegisthus-master File: SSTableExport.java View source code |
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
job.setJarByClass(SSTableExport.class);
CommandLine cl = getOptions(args);
if (cl == null) {
return 1;
}
// Check all of the paths and load the sstable version from the input filenames
List<Path> paths = Lists.newArrayList();
if (cl.hasOption(Feature.CMD_ARG_INPUT_FILE)) {
for (String input : cl.getOptionValues(Feature.CMD_ARG_INPUT_FILE)) {
checkVersionFromFilename(input);
paths.add(new Path(input));
}
}
if (cl.hasOption(Feature.CMD_ARG_INPUT_DIR)) {
paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(Feature.CMD_ARG_INPUT_DIR)));
}
String avroSchemaString = getAvroSchema(cl.getOptionValue(Feature.CMD_ARG_AVRO_SCHEMA_FILE), job.getConfiguration());
Schema avroSchema = new Schema.Parser().parse(avroSchemaString);
// At this point we have the version of sstable that we can use for this run
job.getConfiguration().set(Aegisthus.Feature.CONF_SSTABLE_VERSION, version.toString());
if (job.getConfiguration().get(Aegisthus.Feature.CONF_CQL_SCHEMA) != null) {
setConfigurationFromCql(job.getConfiguration());
}
job.setInputFormatClass(AegisthusInputFormat.class);
job.setMapperClass(CQLMapper.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
AvroJob.setOutputKeySchema(job, avroSchema);
// Map-only job
job.setNumReduceTasks(0);
TextInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()]));
FileOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(Feature.CMD_ARG_OUTPUT_DIR)));
job.submit();
System.out.println(job.getJobID());
System.out.println(job.getTrackingURL());
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
Example 11
Project: white-elephant-master File: ParseJobConfs.java View source code |
public void execute(StagedOutputJobExecutor executor) throws IOException, InterruptedException, ExecutionException {
for (String clusterName : _clusterNames.split(",")) {
System.out.println("Processing cluster " + clusterName);
List<JobStatsProcessing.ProcessingTask> processingTasks = JobStatsProcessing.getTasks(_fs, _logsRoot, clusterName, _confsOutputPathRoot, "xml", _incremental, _numDays, _numDaysForced);
for (JobStatsProcessing.ProcessingTask task : processingTasks) {
List<String> inputPaths = new ArrayList<String>();
inputPaths.add(task.inputPathFormat);
String outputPath = task.outputPath;
final StagedOutputJob job = StagedOutputJob.createStagedJob(_props, _name + "-parse-confs-" + task.id, inputPaths, "/tmp" + outputPath, outputPath, _log);
job.getConfiguration().set("jobs.output.path", _confsOutputPathRoot);
job.getConfiguration().set("logs.cluster.name", clusterName);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(CombineDocumentFileFormat.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
AvroJob.setOutputKeySchema(job, JobConf.SCHEMA$);
job.setNumReduceTasks(0);
job.setMapperClass(ParseJobConfs.TheMapper.class);
executor.submit(job);
}
executor.waitForCompletion();
}
}
Example 12
Project: geowave-master File: VectorMRExportJobRunner.java View source code |
/**
* Main method to execute the MapReduce analytic.
*/
public int runJob() throws CQLException, IOException, InterruptedException, ClassNotFoundException {
Configuration conf = super.getConf();
if (conf == null) {
conf = new Configuration();
setConf(conf);
}
GeoWaveConfiguratorBase.setRemoteInvocationParams(hdfsHostPort, mrOptions.getResourceManagerHostPort(), conf);
final QueryOptions options = new QueryOptions();
final List<String> adapterIds = mrOptions.getAdapterIds();
final AdapterStore adapterStore = storeOptions.createAdapterStore();
if ((adapterIds != null) && !adapterIds.isEmpty()) {
options.setAdapters(Lists.transform(adapterIds, new Function<String, DataAdapter<?>>() {
@Override
public DataAdapter<?> apply(final String input) {
return adapterStore.getAdapter(new ByteArrayId(input));
}
}));
}
conf.setInt(BATCH_SIZE_KEY, mrOptions.getBatchSize());
if (mrOptions.getIndexId() != null) {
final Index index = storeOptions.createIndexStore().getIndex(new ByteArrayId(mrOptions.getIndexId()));
if (index == null) {
JCommander.getConsole().println("Unable to find index '" + mrOptions.getIndexId() + "' in store");
return -1;
}
if (index instanceof PrimaryIndex) {
options.setIndex((PrimaryIndex) index);
} else {
JCommander.getConsole().println("Index '" + mrOptions.getIndexId() + "' is not a primary index");
return -1;
}
}
if (mrOptions.getCqlFilter() != null) {
if ((adapterIds == null) || (adapterIds.size() != 1)) {
JCommander.getConsole().println("Exactly one type is expected when using CQL filter");
return -1;
}
final String adapterId = adapterIds.get(0);
final DataAdapter<?> adapter = storeOptions.createAdapterStore().getAdapter(new ByteArrayId(adapterId));
if (adapter == null) {
JCommander.getConsole().println("Type '" + adapterId + "' not found");
return -1;
}
if (!(adapter instanceof GeotoolsFeatureDataAdapter)) {
JCommander.getConsole().println("Type '" + adapterId + "' does not support vector export");
return -1;
}
GeoWaveInputFormat.setQuery(conf, (DistributableQuery) CQLQuery.createOptimalQuery(mrOptions.getCqlFilter(), (GeotoolsFeatureDataAdapter) adapter, options.getIndex(), null));
}
GeoWaveInputFormat.setStoreOptions(conf, storeOptions);
// the above code is a temporary placeholder until this gets merged with
// the new commandline options
GeoWaveInputFormat.setQueryOptions(conf, options);
final Job job = new Job(conf);
job.setJarByClass(this.getClass());
job.setJobName("Exporting to " + hdfsPath);
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputPath(job, new Path(hdfsPath));
job.setMapperClass(VectorExportMapper.class);
job.setInputFormatClass(GeoWaveInputFormat.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
job.setMapOutputKeyClass(AvroKey.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
AvroJob.setOutputKeySchema(job, AvroSimpleFeatureCollection.SCHEMA$);
AvroJob.setMapOutputKeySchema(job, AvroSimpleFeatureCollection.SCHEMA$);
GeoWaveInputFormat.setMinimumSplitCount(job.getConfiguration(), mrOptions.getMinSplits());
GeoWaveInputFormat.setMaximumSplitCount(job.getConfiguration(), mrOptions.getMaxSplits());
boolean retVal = false;
try {
retVal = job.waitForCompletion(true);
} catch (final IOException ex) {
LOGGER.error("Error waiting for map reduce tile resize job: ", ex);
}
return retVal ? 0 : 1;
}
Example 13
Project: pinot-master File: DerivedColumnTransformationPhaseJob.java View source code |
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name); job.setJarByClass(DerivedColumnTransformationPhaseJob.class); Configuration configuration = job.getConfiguration(); FileSystem fs = FileSystem.get(configuration); // Input Path String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(",")) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Topk path String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH); LOGGER.info("Topk path : " + topkPath); // Output path Path outputPath = new Path(getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode()); // New schema Schema outputSchema = newSchema(thirdeyeConfig); job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString()); // Map config job.setMapperClass(DerivedColumnTransformationPhaseMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(NullWritable.class); AvroJob.setOutputKeySchema(job, outputSchema); LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class); AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema); job.setNumReduceTasks(0); job.waitForCompletion(true); return job; }
Example 14
Project: hadcom.utils-master File: ConvertEnvMultiTable2MultiAvro2.java View source code |
private static void addSchema(String outputPath, Job job, BufferedReader br) throws IOException {
String line;
StringBuilder strBuilder = new StringBuilder();
// read the file
while ((line = br.readLine()) != null) {
strBuilder.append(line);
}
String schemaStr = strBuilder.toString();
// get the schema name and make a hdfs directory out
// of
// it.
Schema schema;
try {
schema = new Schema.Parser().parse(schemaStr);
} catch (Exception e) {
throw new RuntimeException("Unable to parse schema file: " + schemaStr, e);
}
getNamedOutputsList(job);
job.getConfiguration().set(SCHEMA_PRE_CONF + schema.getName(), schemaStr);
AvroMultipleOutputs.addNamedOutput(job, schema.getName(), AvroKeyOutputFormat.class, schema);
// clear the builder
strBuilder.delete(0, strBuilder.length());
}
Example 15
Project: kiji-mapreduce-master File: AvroKeyMapReduceJobOutput.java View source code |
/** {@inheritDoc} */
@Override
protected Class<? extends OutputFormat> getOutputFormatClass() {
return AvroKeyOutputFormat.class;
}