/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.scheme;
import cascading.tap.TapException;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.coerce.Coercions;
import cascading.tuple.coerce.StringCoerce;
import cascading.tuple.type.CoercibleType;
import cascading.tuple.type.DateType;
import java.io.IOException;
import java.lang.reflect.Type;
public class FixedWidthHelper {
@SuppressWarnings("rawtypes")
public static Object[] splitLine(Fields sourceFields, String line,
int[] lengths, Type[] types, boolean safe, boolean strict) {
String[] tokens = new String[lengths.length];
int counter = 0;
int parsedLength = 0;
int availableLength;
String token;
for (int tokenLength : lengths) {
availableLength = line.length() - parsedLength;
if (tokenLength > availableLength) {
// if we are going beyond the line limit then we should check
// strict
if (strict) {
String message = "Input line does not have enough length to parse all fields. Input length is "
+ line.length()
+ ". Length required to parse "
+ (counter + 1)
+ " field is "
+ (parsedLength + tokenLength)
+ "\nLine being parsed: " + line;
// trap actual line data
throw new TapException(message, new Tuple(line));
}
// if its not strict then just parse whatever we can or put null
if (availableLength > 0) {
try {
token = line.substring(parsedLength, parsedLength
+ tokenLength);
} catch (Exception ex) {
String message = "Input line does not have enough length to parse all fields. Input length is "
+ line.length()
+ ". Length required to parse "
+ (counter + 1)
+ " field is "
+ (parsedLength + tokenLength)
+ "\nLine being parsed: " + line;
// trap actual line data
throw new TapException(message, new Tuple(line));
}
parsedLength = line.length();
} else {
token = null;
}
} else {
// if we still have room then just move on
token = line
.substring(parsedLength, parsedLength + tokenLength);
parsedLength = parsedLength + tokenLength;
}
tokens[counter] = token;
counter = counter + 1;
}
if (parsedLength != line.length() && strict) {
String message = "Input line length ("
+ line.length()
+ ") is not matching with parsed data length ("
+ parsedLength
+ "). If it is ok to have this situation then set strict to false and try again."
+ "\nLine being parsed: " + line;
// trap actual line data
throw new TapException(message, new Tuple(line));
}
// assign the field types, if any.
Type[] fieldDataTypes = sourceFields.getTypes();
// if field types is not present then check for datatypes provided in
// scheme
if (sourceFields.getTypes() == null && types != null) {
fieldDataTypes = types;
}
int fieldSize = sourceFields.size();
// if both the datatypes are not present then assign string to all.
if (fieldDataTypes == null) {
fieldDataTypes = new Type[fieldSize];
for (int i = 0; i < fieldSize; i++) {
fieldDataTypes[i] = String.class;
}
}
CoercibleType[] coercions = Coercions.coercibleArray(fieldSize,
fieldDataTypes);
return coerceParsedTokens(sourceFields, line, safe, tokens,
fieldDataTypes, coercions);
}
@SuppressWarnings("rawtypes")
private static Object[] coerceParsedTokens(Fields sourceFields,
String line, boolean safe, String[] tokens, Type[] fieldDataTypes,
CoercibleType[] coercions) {
Object[] coercedTokens = new Object[tokens.length];
for (int i = 0; i < tokens.length; i++) {
try {
if (coercions[i] instanceof StringCoerce) {
coercedTokens[i] = coercions[i].canonical(tokens[i]);
} else
coercedTokens[i] = coercions[i].canonical(tokens[i].trim()
.length() > 0 ? tokens[i].trim() : null);
} catch (Exception exception) {
String message = "field " + sourceFields.get(i)
+ " cannot be coerced from : " + tokens[i] + " to: "
+ fieldDataTypes[i];
coercedTokens[i] = null;
if (!safe) {
// trap actual line data
throw new TapException(message, exception, new Tuple(line));
}
}
}
return coercedTokens;
}
private static void appendFiller(Appendable buffer, char filler, int times)
throws IOException {
for (int i = 0; i < times; i++) {
buffer.append(filler);
}
}
@SuppressWarnings("rawtypes")
public static Appendable createLine(Iterable tuple, Appendable buffer,
char filler, int[] len, boolean strict, Type[] types,
Fields sinkFields) throws IOException {
int count = -1;
for (Object value : tuple) {
count++;
// to apply datatype while writing the file
if (types[count] instanceof DateType) {
value = Coercions.coercibleTypeFor(types[count]).coerce(value,
String.class);
} else {
value = Coercions.coercibleTypeFor(types[count]).canonical(
value);
}
// set blank for null
if (value == null) {
value = "";
}
int lengthDifference = value.toString().length() - len[count];
if (lengthDifference == 0) {
buffer.append(value.toString());
} else if (lengthDifference > 0) {
if (strict) {
throw new TapException(
"Fixed width write error. Field "
+ sinkFields.get(count)
+ " has length "
+ value.toString().length()
+ " whereas provided is "
+ len[count]
+ ". Set strict to false and provide filler to overide such errors if this is expected behaviour."
+ "\nLine being parsed: " + tuple,
new Tuple(tuple.toString()));
}
buffer.append(value.toString().substring(0, len[count]));
} else if (lengthDifference < 0) {
if (isNumeric(value)) {
appendZero(buffer, lengthDifference * -1);
buffer.append(value.toString());
} else {
buffer.append(value.toString());
appendFiller(buffer, filler, lengthDifference * -1);
}
}
}
return buffer;
}
private static boolean isNumeric(Object value) {
return value instanceof Number;
}
private static void appendZero(Appendable buffer, int times)
throws IOException {
char filler = ' ';
for (int i = 0; i < times; i++) {
buffer.append(filler);
}
}
}