/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.scheme; import cascading.flow.FlowProcess; import cascading.scheme.SinkCall; import cascading.scheme.SourceCall; import cascading.scheme.hadoop.TextLine; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.RecordReader; import java.io.IOException; import java.lang.reflect.Type; import java.nio.charset.Charset; @SuppressWarnings("serial") public class TextFixedWidth extends TextLine { public static final char DEFAULT_FILLER = ' '; public static final boolean DEFAULT_STRICT = false; public static final boolean DEFAULT_SAFE = false; public static final String DEFAULT_CHARSETNAME = "UTF-8"; public static final Type[] DEFAULT_TYPES = null; public static final Compress DEFAULT_SINKCOMPRESSION = null; protected char filler; protected boolean strict; protected boolean safe; protected int[] lengths; protected Type[] types; protected String charsetName; public TextFixedWidth(Fields fields, int[] lengths) { this(fields, lengths, DEFAULT_TYPES, DEFAULT_SINKCOMPRESSION, DEFAULT_STRICT, DEFAULT_SAFE, DEFAULT_FILLER, DEFAULT_CHARSETNAME); } public TextFixedWidth(Fields fields, int[] lengths, Type[] types) { this(fields, lengths, types, DEFAULT_SINKCOMPRESSION, DEFAULT_STRICT, DEFAULT_SAFE, DEFAULT_FILLER, DEFAULT_CHARSETNAME); } public TextFixedWidth(Fields fields, int[] lengths, Type[] types, boolean strict, boolean safe) { this(fields, lengths, types, DEFAULT_SINKCOMPRESSION, strict, safe, DEFAULT_FILLER, DEFAULT_CHARSETNAME); } public TextFixedWidth(Fields fields, int[] lengths, Type[] types, boolean strict, boolean safe, String charsetName) { this(fields, lengths, types, DEFAULT_SINKCOMPRESSION, strict, safe, DEFAULT_FILLER, charsetName); } public TextFixedWidth(Fields fields, int[] lengths, Type[] types, Compress sinkCompression, boolean strict, boolean safe, char filler, String charsetName) { super(sinkCompression); // normalizes ALL and UNKNOWN setSinkFields(fields); setSourceFields(fields); // throws an exception if not found setCharsetName(charsetName); this.filler = filler; this.strict = strict; // SonarQube: Constructors and methods receiving arrays should clone // objects and // store the copy. This prevents that future changes from the user // affect the internal functionality this.lengths = lengths == null ? null : lengths.clone(); this.types = types == null ? null : types.clone(); this.safe = safe; this.charsetName = charsetName; } @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public boolean source(FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { Object[] context = sourceCall.getContext(); if (!sourceCall.getInput().next(context[0], context[1])) return false; Object[] split = FixedWidthHelper.splitLine(getSourceFields(), makeEncodedString(context), lengths, types, safe, strict); Tuple tuple = sourceCall.getIncomingEntry().getTuple(); tuple.clear(); tuple.addAll(split); return true; } @SuppressWarnings("rawtypes") @Override public void sinkPrepare(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { sinkCall.setContext(new Object[3]); sinkCall.getContext()[0] = new Text(); sinkCall.getContext()[1] = new StringBuilder(4 * 1024); sinkCall.getContext()[2] = Charset.forName(charsetName); } @SuppressWarnings("rawtypes") @Override public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { Tuple tuple = sinkCall.getOutgoingEntry().getTuple(); write(sinkCall, tuple); } @SuppressWarnings({ "rawtypes", "unchecked" }) private void write(SinkCall<Object[], OutputCollector> sinkCall, Iterable tuple) throws IOException { Text text = (Text) sinkCall.getContext()[0]; StringBuilder line = (StringBuilder) sinkCall.getContext()[1]; Charset charset = (Charset) sinkCall.getContext()[2]; TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); line = (StringBuilder) FixedWidthHelper.createLine( tupleEntry.getTuple(), line, filler, lengths, strict, getSinkFields().getTypes(), getSinkFields()); text.set(line.toString().getBytes(charset)); sinkCall.getOutput().collect(null, text); line.setLength(0); } }