/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.scheme;
import cascading.scheme.util.DelimitedParser;
import cascading.tap.TapException;
import cascading.tuple.Tuple;
import cascading.tuple.coerce.Coercions;
import cascading.tuple.coerce.StringCoerce;
import cascading.tuple.type.DateType;
import cascading.util.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* Created a custom class to trim spaces from numerical and date fields while
* reading and to apply datatypes while writing data
*
* @author Bhavesh
*
*/
@SuppressWarnings("rawtypes")
public class HydrographDelimitedParser extends DelimitedParser {
private static final long serialVersionUID = 4546944494735373827L;
private static final Logger LOG = LoggerFactory
.getLogger(HydrographDelimitedParser.class);
private boolean hasHeader = false;
public HydrographDelimitedParser(String delimiter, String quote,
Class[] types) {
super(delimiter, quote, types);
}
public HydrographDelimitedParser(String delimiter, String quote,
Class[] types, boolean strict, boolean safe) {
super(delimiter, quote, types, strict, safe);
}
/*
* (non-Javadoc)
*
* @see
* cascading.scheme.util.DelimitedParser#coerceParsedLine(java.lang.String,
* java.lang.Object[])
*/
@Override
protected Object[] coerceParsedLine(String line, Object[] split) {
if (types != null) // forced null in ctor
{
Object[] result = new Object[split.length];
for (int i = 0; i < split.length; i++) {
// Added custom code to apply datatypes
try {
if (coercibles[i] instanceof StringCoerce) {
result[i] = coercibles[i].canonical(split[i]);
} else {
result[i] = coercibles[i]
.canonical(split[i] == null ? null : split[i]
.toString().trim());
}
// End custom code
} catch (Exception exception) {
result[i] = null;
if (!safe) {
// trap data
throw new TapException(getSafeMessage(split[i], i),
exception, new Tuple(line));
}
if (LOG.isDebugEnabled())
LOG.debug(getSafeMessage(split[i], i), exception);
}
}
split = result;
}
return split;
}
/*
* (non-Javadoc)
*
* @see
* cascading.scheme.util.DelimitedParser#joinWithQuote(java.lang.Iterable,
* java.lang.Appendable)
*/
@Override
protected Appendable joinWithQuote(Iterable tuple, Appendable buffer)
throws IOException {
int count = 0;
for (Object value : tuple) {
if (!hasHeader) {
// to apply datatype while writing the file
if (!(types[count] instanceof DateType)) {
value = Coercions.coercibleTypeFor(types[count]).canonical(
value);
}
}
if (count != 0) {
buffer.append(delimiter);
}
if (value != null) {
String valueString = value.toString();
if (valueString.contains(quote)) {
valueString = valueString.replaceAll(quote, quote + quote);
}
if (valueString.contains(delimiter)) {
valueString = quote + valueString + quote;
}
buffer.append(valueString);
}
count++;
}
hasHeader = false;
return buffer;
}
/*
* (non-Javadoc)
*
* @see
* cascading.scheme.util.DelimitedParser#joinNoQuote(java.lang.Iterable,
* java.lang.Appendable)
*/
@Override
protected Appendable joinNoQuote(Iterable tuple, Appendable buffer)
throws IOException {
int count = 0;
for (Object value : tuple) {
if (!hasHeader) {
// to apply datatype while writing the file
if (!(types[count] instanceof DateType)) {
value = Coercions.coercibleTypeFor(types[count]).canonical(
value);
}
}
if (count != 0)
buffer.append(delimiter);
if (value != null)
buffer.append(value.toString());
count++;
}
hasHeader = false;
return buffer;
}
@Override
public Appendable joinFirstLine(Iterable iterable, Appendable buffer) {
hasHeader = true;
iterable = prepareFields(iterable);
return joinLine(iterable, buffer);
}
private String getSafeMessage(Object object, int i) {
try {
return "field " + sourceFields.get(i)
+ " cannot be coerced from : " + object + " to: "
+ Util.getTypeName(types[i]);
} catch (Exception e) {
return "field pos " + i + " cannot be coerced from: " + object
+ ", pos has no corresponding field name or coercion type";
}
}
}