/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.morphline.stdio; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.Charset; import java.util.Collection; import java.util.Collections; import org.kitesdk.morphline.api.Command; import org.kitesdk.morphline.api.CommandBuilder; import org.kitesdk.morphline.api.MorphlineCompilationException; import org.kitesdk.morphline.api.MorphlineContext; import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.Fields; import com.typesafe.config.Config; /** * Command that emits one record per line in the input stream of the first attachment. */ public final class ReadLineBuilder implements CommandBuilder { @Override public Collection<String> getNames() { return Collections.singletonList("readLine"); } @Override public Command build(Config config, Command parent, Command child, MorphlineContext context) { return new ReadLine(this, config, parent, child, context); } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// private static final class ReadLine extends AbstractParser { private final Charset charset; private final boolean ignoreFirstLine; private final String commentPrefix; public ReadLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.charset = getConfigs().getCharset(config, "charset", null); this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false); String cprefix = getConfigs().getString(config, "commentPrefix", ""); if (cprefix.length() > 1) { throw new MorphlineCompilationException("commentPrefix must be at most one character long: " + cprefix, config); } this.commentPrefix = (cprefix.length() > 0 ? cprefix : null); validateArguments(); } @Override protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException { Record template = inputRecord.copy(); removeAttachments(template); template.removeAll(Fields.MESSAGE); Charset detectedCharset = detectCharset(inputRecord, charset); Reader reader = new InputStreamReader(stream, detectedCharset); BufferedReader lineReader = new BufferedReader(reader, getBufferSize(stream)); boolean isFirst = true; String line; while ((line = lineReader.readLine()) != null) { if (isFirst && ignoreFirstLine) { isFirst = false; continue; // ignore first line } if (line.length() == 0) { continue; // ignore empty lines } if (commentPrefix != null && line.startsWith(commentPrefix)) { continue; // ignore comments } Record outputRecord = template.copy(); outputRecord.put(Fields.MESSAGE, line); incrementNumRecords(); // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } return true; } } }