/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.util; import java.io.IOException; import java.io.InputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; /** * A class that provides a line reader from an input stream. */ public class LineReader { private static final int DEFAULT_BUFFER_SIZE = 64 * 1024; private int bufferSize = DEFAULT_BUFFER_SIZE; private InputStream in; private byte[] buffer; // the number of bytes of real data in the buffer private int bufferLength = 0; // the current position in the buffer private int bufferPosn = 0; /** * Create a line reader that reads from the given stream using the * default buffer-size (64k). * @param in The input stream * @throws IOException */ public LineReader(InputStream in) { this(in, DEFAULT_BUFFER_SIZE); } /** * Create a line reader that reads from the given stream using the * given buffer-size. * @param in The input stream * @param bufferSize Size of the read buffer * @throws IOException */ public LineReader(InputStream in, int bufferSize) { this.in = in; this.bufferSize = bufferSize; this.buffer = new byte[this.bufferSize]; } /** * Create a line reader that reads from the given stream using the * <code>io.file.buffer.size</code> specified in the given * <code>Configuration</code>. * @param in input stream * @param conf configuration * @throws IOException */ public LineReader(InputStream in, Configuration conf) throws IOException { this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE)); } /** * Fill the buffer with more data. * @return was there more data? * @throws IOException */ boolean backfill() throws IOException { bufferPosn = 0; bufferLength = in.read(buffer); return bufferLength > 0; } /** * Close the underlying stream. * @throws IOException */ public void close() throws IOException { in.close(); } /** * Read from the InputStream into the given Text. * @param str the object to store the given line * @param maxLineLength the maximum number of bytes to store into str. * @param maxBytesToConsume the maximum number of bytes to consume in this call. * @return the number of bytes read including the newline * @throws IOException if the underlying stream throws */ public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { str.clear(); boolean hadFinalNewline = false; boolean hadFinalReturn = false; boolean hitEndOfFile = false; int startPosn = bufferPosn; long bytesConsumed = 0; outerLoop: while (true) { if (bufferPosn >= bufferLength) { if (!backfill()) { hitEndOfFile = true; break; } } startPosn = bufferPosn; for(; bufferPosn < bufferLength; ++bufferPosn) { switch (buffer[bufferPosn]) { case '\n': hadFinalNewline = true; bufferPosn += 1; break outerLoop; case '\r': if (hadFinalReturn) { // leave this \r in the stream, so we'll get it next time break outerLoop; } hadFinalReturn = true; break; default: if (hadFinalReturn) { break outerLoop; } } } bytesConsumed += bufferPosn - startPosn; int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0); length = (int)Math.min(length, maxLineLength - str.getLength()); if (length >= 0) { str.append(buffer, startPosn, length); } if (bytesConsumed >= maxBytesToConsume) { return (int)Math.min(bytesConsumed, (long)Integer.MAX_VALUE); } } int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0); if (!hitEndOfFile) { bytesConsumed += bufferPosn - startPosn; int length = bufferPosn - startPosn - newlineLength; length = (int)Math.min(length, maxLineLength - str.getLength()); if (length > 0) { str.append(buffer, startPosn, length); } } return (int)Math.min(bytesConsumed, (long)Integer.MAX_VALUE); } /** * Read from the InputStream into the given Text. * @param str the object to store the given line * @param maxLineLength the maximum number of bytes to store into str. * @return the number of bytes read including the newline * @throws IOException if the underlying stream throws */ public int readLine(Text str, int maxLineLength) throws IOException { return readLine(str, maxLineLength, Integer.MAX_VALUE); } /** * Read from the InputStream into the given Text. * @param str the object to store the given line * @return the number of bytes read including the newline * @throws IOException if the underlying stream throws */ public int readLine(Text str) throws IOException { return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE); } }