package com.linkedin.databus.eventgenerator;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.JsonEncoder;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.json.JSONException;
import org.json.JSONObject;
public class DataGenerator {
public final static String MODULE = DataGenerator.class.getName();
public final static Logger LOG = Logger.getLogger(MODULE);
private final static String PRINT_AVRO_JSON_OPTNAME = "printAvroJson";
Schema schema;
/*
* Takes a schema file as input
*/
public DataGenerator(File schemaFile) throws IOException {
schema = Schema.parse(schemaFile);
}
/*
* Takes a schema string as an input
*/
public DataGenerator(String schema) {
this.schema = Schema.parse(schema);
}
/*
* Generate random based on the avro schema
* The schema must be of a record type to work
*
* @return returns the randomly generated record
*/
public GenericRecord generateRandomRecord() throws UnknownTypeException {
if(schema.getType() != Schema.Type.RECORD)
{
LOG.error("The schema first level must be record.");
return null;
}
GenericRecord record = new GenericData.Record(schema);
for(Field field : schema.getFields() )
{
SchemaFiller schemaFill = SchemaFiller.createRandomField(field);
schemaFill.writeToRecord(record);
}
return record;
}
static public void prettyPrint(GenericRecord record)
{
try {
LOG.info(new JSONObject(record.toString()).toString(2));
} catch (JSONException e) {
LOG.error("Unable to parser json: The Json created by the generator is not valid!");
e.printStackTrace();
}
}
public static Options loadOptions() {
Options opt = new Options();
opt.addOption("s", "schemaLocation", true, "location of the schema");
opt.addOption("minStringLength", true, "Minimum length of string to be generated");
opt.addOption("maxStringLength", true, "Maximum length of string to be generated");
opt.addOption("minIntegerRange", true, "Start range of integer");
opt.addOption("maxIntegerRange", true, "End range of integer");
opt.addOption("minLongRange", true, "Start range of long");
opt.addOption("maxLongRange", true, "End range of long");
opt.addOption("maxBytesLength", true, "Maximum length of the bytes to be generated");
opt.addOption(PRINT_AVRO_JSON_OPTNAME, true, "Replace the default human-readable JSON serialization with" +
" the standard Avro JSON serialization of the record which can be deserialized back to a " +
" record. The result is printed out to a file or to the standard output (-)." );
return opt;
}
public static void printHelp(Options opts) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("Generate a record with random data given an Avro schema", opts);
}
public static void main(String[] args) throws IOException, UnknownTypeException {
// load and verify the options
CommandLineParser parser = new PosixParser();
Options opts = loadOptions();
CommandLine cmd = null;
try {
cmd = parser.parse(opts, args);
} catch (org.apache.commons.cli.ParseException parseEx) {
LOG.error("Invalid option");
printHelp(opts);
return;
}
// check for necessary options
String fileLoc = cmd.getOptionValue("schemaLocation");
if (fileLoc == null) {
LOG.error("schemaLocation not specified");
printHelp(opts);
}
//get string length and check if min is greater than 0
// Generate the record
File schemaFile = new File(fileLoc);
DataGenerator dataGenerator = new DataGenerator(schemaFile);
GenericRecord record = dataGenerator.generateRandomRecord();
if (cmd.hasOption(PRINT_AVRO_JSON_OPTNAME))
{
String outname = cmd.getOptionValue(PRINT_AVRO_JSON_OPTNAME);
OutputStream outs = System.out;
if (!outname.equals("-"))
{
outs = new FileOutputStream(outname);
}
printAvroJson(record, outs);
if (!outname.equals("-"))
{
outs.close();
}
}
else
{
DataGenerator.prettyPrint(record);
}
}
private static void printAvroJson(GenericRecord record, OutputStream outs) throws IOException
{
JsonEncoder jsonEnc = new JsonEncoder(record.getSchema(), outs);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(record.getSchema());
datumWriter.write(record, jsonEnc);
jsonEnc.flush();
}
// TODO add thread based generator here
}