package com.linkedin.camus.sweeper.utils;
import org.apache.avro.Schema;
import org.apache.avro.SchemaParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
/**
* Class to parse the schema without name validation
*
* The routines here might try to parse the schema twice.
* The first time with name validation on, if it failed with SchemaParseException,
* it will try to parse it again without name validation.
*
* The reason we do this is if the schema was generated/serialized using avro-1.4
* which has weaker check on field names (e.g. it allowed -, @, ' '). However,
* if we are running avro-1.7 during deserialization and avro 1.7 has stronger name
* validation. Change those field names would mean migration overhead.
* We might just have to live with the old schema names during the interim.
*
* @author hcai
*
*/
public class RelaxedSchemaUtils {
private static final Log LOG = LogFactory.getLog(RelaxedSchemaUtils.class.getName());
// This constant is coming from AvroJob, however it's defined as private
private static final String CONF_INPUT_KEY_SCHEMA = "avro.schema.input.key";
private static final String CONF_SKIP_NAME_VALIDATION = "camus.sweeper.skip.name.validation";
public static boolean skipNameValidation(Configuration conf) {
String validateStr = conf.get(CONF_SKIP_NAME_VALIDATION);
LOG.info(CONF_SKIP_NAME_VALIDATION + ": " + validateStr);
if (validateStr != null && Boolean.parseBoolean(validateStr)) {
return true;
}
return false;
}
/**
* This routine might try to parse the schema twice.
*
* @param schemaStr
* @return
*/
public static Schema parseSchema(String schemaStr, Configuration conf) {
Schema schema = null;
try {
schema = new Schema.Parser().parse(schemaStr);
} catch (SchemaParseException ex) {
boolean skipNameValidation = skipNameValidation(conf);
if (skipNameValidation) {
LOG.warn("Cannot parse schema. " + ex);
LOG.info("Try one more time without name validation.");
Schema.Parser parser = new Schema.Parser();
schema = parser.setValidate(false).parse(schemaStr);
} else {
throw ex;
}
}
return schema;
}
/**
* Gets the job input key schema.
*
* This is the equivalent code copied from AvroJob
*
* @param conf The job configuration.
* @return The job input key schema, or null if not set.
*/
public static Schema getInputKeySchema(Configuration conf) {
String schemaString = conf.get(CONF_INPUT_KEY_SCHEMA);
return schemaString != null ? parseSchema(schemaString, conf) : null;
}
}