package com.linkedin.databus2.producers.gg;
/*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.nio.charset.Charset;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Calendar;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.avro.Schema;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.log4j.Logger;
import com.linkedin.databus.core.DbusConstants;
import com.linkedin.databus.core.util.InvalidConfigException;
import com.linkedin.databus2.core.DatabusException;
import com.linkedin.databus2.producers.ConstantPartitionFunction;
import com.linkedin.databus2.producers.PartitionFunction;
import com.linkedin.databus2.relay.config.LogicalSourceStaticConfig;
import com.linkedin.databus2.schemas.utils.SchemaHelper;
public class GGEventGenerationFactory
{
//2013-03-10:11:45:01.001000000
private final Logger log = Logger.getLogger(getClass());
/**
* Given a logical source config, create a partition function.
*
* @param sourceConfig
* @return the partition function
* @throws InvalidConfigException
*/
public static PartitionFunction buildPartitionFunction(LogicalSourceStaticConfig sourceConfig)
throws InvalidConfigException
{
String partitionFunction = sourceConfig.getPartitionFunction();
if (partitionFunction.startsWith("constant:"))
{
try
{
String numberPart = partitionFunction.substring("constant:".length()).trim();
short constantPartitionNumber = Short.valueOf(numberPart);
return new ConstantPartitionFunction(constantPartitionNumber);
}
catch(Exception ex)
{
// Could be a NumberFormatException, IndexOutOfBoundsException or other exception when trying
// to parse the partition number.
throw new InvalidConfigException("Invalid partition configuration (" + partitionFunction + "). " +
"Could not parse the constant partition number.");
}
}
else
{
throw new InvalidConfigException("Invalid partition configuration (" + partitionFunction + ").");
}
}
public static String uriToGGDir(String uri)
throws DatabusException
{
if (uri == null)
{
throw new DatabusException("uri passed is null and not valid");
}
Pattern pattern = Pattern.compile("gg://(.*):(.*)");
Matcher matcher = pattern.matcher(uri);
if (!matcher.matches() || matcher.groupCount() != 2)
{
throw new DatabusException("Expected uri format for gg path not found");
}
return matcher.group(1);
}
public static String uriToXmlPrefix(String uri)
throws DatabusException
{
if (uri == null)
{
throw new DatabusException("uri passed is null and not valid");
}
Pattern pattern = Pattern.compile("gg://(.*):(.*)");
Matcher matcher = pattern.matcher(uri);
if (!matcher.matches() || matcher.groupCount() != 2)
{
throw new DatabusException("Expected uri format for gg path not found");
}
return matcher.group(2);
}
public static Object stringToAvroType(String fieldValue, Schema.Field avroField)
throws DatabusException
{
Schema.Type fieldType = SchemaHelper.getAnyType(avroField);
String recordFieldName = avroField.name();
switch (fieldType)
{
case BOOLEAN:
case BYTES:
case DOUBLE:
case FLOAT:
case INT:
case LONG:
case STRING:
case NULL:
return convertToSimpleType(fieldValue, avroField);
case RECORD:
case ARRAY:
// TODO Add support for these datatypes (warning: when do so, watch out for fieldType
// vs. avroField.schema() mismatches for arrays within unions: see DDSDBUS-3093/3136)
throw new DatabusException("Handling of Avro '" + fieldType + "' field type not yet implemented!");
case ENUM:
case FIXED:
case MAP:
case UNION:
default:
throw new DatabusException("unknown field type: " + recordFieldName + ":" + fieldType);
}
}
public static Object convertToSimpleType(String fieldValue, Schema.Field avroField)
throws DatabusException
{
String databaseFieldType = SchemaHelper.getMetaField(avroField, "dbFieldType");
String recordFieldName = avroField.name();
//return int
if (databaseFieldType.equalsIgnoreCase("INTEGER"))
{
return new Integer(fieldValue);
} //return long
else if (databaseFieldType.equalsIgnoreCase("LONG"))
{
return new Long(fieldValue);
}
else if (databaseFieldType.equalsIgnoreCase("DATE"))
{
return ggDateStringToLong(fieldValue);
}
else if (databaseFieldType.equalsIgnoreCase("TIMESTAMP"))
{
return ggTimeStampStringToMilliSeconds(fieldValue);
}
//return float
else if (databaseFieldType.equalsIgnoreCase("FLOAT"))
{
return new Float(fieldValue);
}
//return double
else if (databaseFieldType.equalsIgnoreCase("DOUBLE"))
{
return new Double(fieldValue);
}
//return string
else if (databaseFieldType.equalsIgnoreCase("CLOB"))
{
return fieldValue;
}
else if (databaseFieldType.equalsIgnoreCase("VARCHAR"))
{
return fieldValue;
}
else if (databaseFieldType.equalsIgnoreCase("VARCHAR2"))
{
return fieldValue;
}
else if (databaseFieldType.equalsIgnoreCase("NVARCHAR"))
{
return fieldValue;
}
else if (databaseFieldType.equalsIgnoreCase("NVARCHAR2"))
{
return fieldValue;
}
else if (databaseFieldType.equalsIgnoreCase("XMLTYPE"))
{
return fieldValue;
}
else if (databaseFieldType.equalsIgnoreCase("CHAR"))
{
return fieldValue;
}
//return bytes
else if (databaseFieldType.equalsIgnoreCase("BLOB") || databaseFieldType.equalsIgnoreCase("RAW"))
{
if (fieldValue.length() == 0)
{
return fieldValue.getBytes(Charset.defaultCharset());
}
if (fieldValue.length() <= 2)
{
throw new DatabusException("Unable to decode the string because length is less than 2");
}
if (!isStringHex(fieldValue))
{
throw new DatabusException("Unable to decode the string because it is not hex-encoded");
}
try
{
return stringToHex(fieldValue.substring(2, fieldValue.length()-1));
}
catch (DecoderException e)
{
throw new DatabusException("Unable to decode a " + databaseFieldType + " field: " + recordFieldName);
}
}
//return array
else if (databaseFieldType.equalsIgnoreCase("ARRAY"))
{
throw new DatabusException("ARRAY type still not implemented!"); //TODO add support for array
}
//return record
else if (databaseFieldType.equalsIgnoreCase("TABLE"))
{
throw new DatabusException("TABLE type still not implemented!"); //TODO add support for table
}
else
{
throw new DatabusException("unknown field type: " + recordFieldName + ":" + databaseFieldType);
}
}
public static boolean isStringHex(String fieldValue)
{
if (fieldValue == null || fieldValue.length() <= 2)
{
return false;
}
return fieldValue.substring(0, 2).equals("0x");
}
public static byte[] stringToHex(String hexString)
throws DecoderException
{
return Hex.decodeHex(hexString.toCharArray());
}
public static long ggTimeStampStringToNanoSeconds(String value)
throws DatabusException
{
return (ggTimeStampStringToMilliSeconds(value) * DbusConstants.NUM_NSECS_IN_MSEC);
}
public static long ggTimeStampStringToMilliSeconds(String value)
throws DatabusException
{
Pattern _pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2}):(\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{0,9})");
Matcher matcher = _pattern.matcher(value);
if (!matcher.matches() || matcher.groupCount() != 7)
{
throw new DatabusException("The timestamp format is not as expected, cannot proceed!");
}
Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
//Explicitly set ms to zero; without initialization it has random ms values :(
calendar.set(Calendar.MILLISECOND, 0);
calendar.set(Integer.valueOf(matcher.group(1)),
Integer.valueOf(matcher.group(2)) - 1,
Integer.valueOf(matcher.group(3)),
Integer.valueOf(matcher.group(4)),
Integer.valueOf(matcher.group(5)),
Integer.valueOf(matcher.group(6)));
//Prune to the first 3 digits or less
String milliSecondsString = matcher.group(7);
int maxSecondsLength = (milliSecondsString.length() > 3) ? 3 : milliSecondsString.length();
String prunedMilliSeconds = milliSecondsString.substring(0, maxSecondsLength);
//Add the ms value to the calendar object
calendar.add(Calendar.MILLISECOND, Integer.valueOf(prunedMilliSeconds));
return calendar.getTimeInMillis();
}
public static long ggDateStringToLong(String value)
throws DatabusException
{
Pattern _pattern = Pattern.compile("(\\d{4}-\\d{2}-\\d{2}).*");
Matcher matcher = _pattern.matcher(value);
if (!matcher.matches() || matcher.groupCount() != 1)
{
throw new DatabusException("The date format is not as expected, cannot proceed!");
}
String dateFormatString = matcher.group(1);
long dateLong = Date.valueOf(dateFormatString).getTime();
return dateLong;
}
}