/*
* Copyright 2015-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package com.facebook.buck.util;
import com.google.common.base.Preconditions;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
/**
* Utility class to iterate over the lines present in one or more input strings, buffers, or arrays
* given a {@link LineHandler} callback.
*
* <p>Whenever possible, the input buffer is directly passed back to your {@code LineHandler}
* callback (with adjusted {@link Buffer#position() position} and {@link Buffer#limit() limit}), to
* avoid copies and allocations.
*
* <p>If a line spans more than one input buffer or string, this class will handle concatenating
* data until an end-of-line is reached (or the {@code LineHandler} is closed).
*
* <p>Supports Unix end-of-line ({@code \n}), Windows end-of-line ({@code \r\n}), and Mac
* end-of-line ({@code \r}).
*
* @see CharLineHandler
* @see ByteLineHandler
*/
public final class LineIterating {
private static final int INITIAL_BUFFER_CAPACITY = 256;
private enum ScanResult {
NEWLINE,
CARRIAGE_RETURN,
OTHER
}
private interface BufferOperations<T extends Buffer> {
T createBuffer(int initialBufferCapacity);
T createSubBuffer(T buffer, int fromIndex, int toIndex);
ScanResult scanAt(T buffer, int index);
void appendBuffers(T buffer, T bufferToAppend);
}
private static final BufferOperations<CharBuffer> CHAR_BUFFER_OPERATIONS =
new BufferOperations<CharBuffer>() {
@Override
public CharBuffer createBuffer(int initialBufferCapacity) {
return CharBuffer.allocate(initialBufferCapacity);
}
@Override
public CharBuffer createSubBuffer(CharBuffer buffer, int fromIndex, int toIndex) {
CharBuffer subBuffer = buffer.duplicate();
subBuffer.position(fromIndex).limit(toIndex);
return subBuffer;
}
@Override
public ScanResult scanAt(CharBuffer buffer, int index) {
switch (buffer.get(index)) {
case '\n':
return ScanResult.NEWLINE;
case '\r':
return ScanResult.CARRIAGE_RETURN;
default:
return ScanResult.OTHER;
}
}
@Override
public void appendBuffers(CharBuffer buffer, CharBuffer bufferToAppend) {
buffer.put(bufferToAppend);
}
};
private static final BufferOperations<ByteBuffer> BYTE_BUFFER_OPERATIONS =
new BufferOperations<ByteBuffer>() {
@Override
public ByteBuffer createBuffer(int initialBufferCapacity) {
return ByteBuffer.allocate(initialBufferCapacity);
}
@Override
public ByteBuffer createSubBuffer(ByteBuffer buffer, int fromIndex, int toIndex) {
ByteBuffer subBuffer = buffer.duplicate();
subBuffer.position(fromIndex).limit(toIndex);
return subBuffer;
}
@Override
public ScanResult scanAt(ByteBuffer buffer, int index) {
switch (buffer.get(index)) {
case 0x0A:
return ScanResult.NEWLINE;
case 0x0D:
return ScanResult.CARRIAGE_RETURN;
default:
return ScanResult.OTHER;
}
}
@Override
public void appendBuffers(ByteBuffer buffer, ByteBuffer bufferToAppend) {
buffer.put(bufferToAppend);
}
};
// Utility class, do not instantiate.
private LineIterating() {}
private abstract static class LineHandler<T extends Buffer> implements AutoCloseable {
private T buffer;
private boolean sawCarriageReturn;
/**
* Callback handler invoked once per line (not including any end-of-line sequences).
*
* @return true to continue iterating over lines, false to stop.
*/
public abstract boolean handleLine(T line);
public LineHandler(T buffer) {
this.buffer = buffer;
this.sawCarriageReturn = false;
}
@Override
public final void close() {
if (buffer.position() > 0) {
if (sawCarriageReturn) {
buffer.position(buffer.position() - 1);
}
buffer.flip();
handleLine(buffer);
buffer.clear();
}
sawCarriageReturn = false;
}
}
/**
* Stateful callback handler passed to {@link LineIterating#iterateByLines(CharSequence,
* CharLineHandler)} and {@link LineIterating#iterateByLines(CharBuffer, CharLineHandler)}.
*
* <p>Subclass this and provide a {@link LineHandler#handleLine(Buffer)} callback to receive each
* line of input.
*
* <p>This class is <i>not</i> thread-safe.
*
* <p>The method {@link LineHandler#handleLine(Buffer)} will be invoked once per line (not
* including any end-of-line sequences -- note that this means a line can be empty if there's a
* sequence of EOLs.)
*
* <p>You <i>must</i> call {@link #close()} after the last chunk of input has been provided to
* this object, at which point the last line (if any) will be passed back to {@link
* LineHandler#handleLine(Buffer)}.
*/
public abstract static class CharLineHandler extends LineHandler<CharBuffer> {
public CharLineHandler() {
super(CharBuffer.allocate(INITIAL_BUFFER_CAPACITY));
}
public CharLineHandler(int initialBufferCapacity) {
super(CharBuffer.allocate(initialBufferCapacity));
}
}
/**
* Stateful callback handler passed to {@link LineIterating#iterateByLines(byte[],
* ByteLineHandler)} and {@link LineIterating#iterateByLines(ByteBuffer, ByteLineHandler)}.
*
* <p>Subclass this and provide a {@link LineHandler#handleLine(Buffer)} callback to receive each
* line of input.
*
* <p>This class is <i>not</i> thread-safe.
*
* <p>The method {@link LineHandler#handleLine(Buffer)} will be invoked once per line (not
* including any end-of-line sequences -- note that this means a line can be empty if there's a
* sequence of EOLs.)
*
* <p>You <i>must</i> call {@link #close()} after the last chunk of input has been provided to
* this object, at which point the last line (if any) will be passed back to {@link
* LineHandler#handleLine(Buffer)}.
*/
public abstract static class ByteLineHandler extends LineHandler<ByteBuffer> {
public ByteLineHandler() {
super(ByteBuffer.allocate(INITIAL_BUFFER_CAPACITY));
}
public ByteLineHandler(int initialBufferCapacity) {
super(ByteBuffer.allocate(initialBufferCapacity));
}
}
/**
* Iterates over an input {@link CharSequence string} by lines, invoking your implementation of
* {@link LineHandler#handleLine(Buffer)} once for each line in the input.
*
* <p>If your input contains long lines split across multiple strings, you can call this method
* more than once, passing the same {@link CharLineHandler} to each invocation.
*
* @param str Input string containing zero or more lines to be iterated.
* @param lineHandler Callback to be invoked with each line present in {@code str}.
*/
public static void iterateByLines(CharSequence str, CharLineHandler lineHandler) {
iterateBufferByLines(CharBuffer.wrap(str), lineHandler, CHAR_BUFFER_OPERATIONS);
}
/**
* Iterates over an input {@link CharBuffer} by lines, invoking your implementation of {@link
* LineHandler#handleLine(Buffer)} once for each line in the input.
*
* <p>If your input contains long lines split across multiple buffers, you can call this method
* more than once, passing the same {@link CharLineHandler} to each invocation.
*
* <p>Consumes the entire {@code buffer}, starting at its current position. After returning, its
* position is set to its limit.
*
* @param buffer Input character buffer containing zero or more lines to be iterated.
* @param lineHandler Callback to be invoked with each line present in {@code buffer}.
*/
public static void iterateByLines(CharBuffer buffer, CharLineHandler lineHandler) {
iterateBufferByLines(buffer, lineHandler, CHAR_BUFFER_OPERATIONS);
}
/**
* Iterates over an input byte array by lines, invoking your implementation of {@link
* ByteLineHandler#handleLine(Buffer)} once for each line in the input.
*
* <p>If your input contains long lines split across multiple byte arrays, you can call this
* method more than once, passing the same {@link ByteLineHandler} to each invocation.
*
* @param bytes Input byte array containing zero or more lines to be iterated.
* @param lineHandler Callback to be invoked with each line present in {@code bytes}.
*/
@SuppressWarnings("javadoc")
public static void iterateByLines(byte[] bytes, ByteLineHandler lineHandler) {
iterateBufferByLines(ByteBuffer.wrap(bytes), lineHandler, BYTE_BUFFER_OPERATIONS);
}
/**
* Iterates over an input {@link ByteBuffer} by lines, invoking {@link
* ByteLineHandler#handleLine(Buffer)} once for each line in the input.
*
* <p>If your input contains long lines split across multiple byte arrays, you can call this
* method more than once, passing the same {@link ByteLineHandler} to each invocation.
*
* <p>Consumes the entire {@code buffer}, starting at its current position. After returning, its
* position is set to its limit.
*
* @param buffer Input byte buffer containing zero or more lines to be iterated.
* @param lineHandler Callback to be invoked with each line present in {@code bytes}.
*/
@SuppressWarnings("javadoc")
public static void iterateByLines(ByteBuffer buffer, ByteLineHandler lineHandler) {
iterateBufferByLines(buffer, lineHandler, BYTE_BUFFER_OPERATIONS);
}
private static <T extends Buffer> void iterateBufferByLines(
T lineBuffer, LineHandler<T> lineHandler, BufferOperations<T> bufferOperations) {
int lineStartPos = lineBuffer.position();
int lineEndPos;
boolean shouldContinue = true;
for (lineEndPos = 0; shouldContinue && lineEndPos < lineBuffer.limit(); lineEndPos++) {
switch (bufferOperations.scanAt(lineBuffer, lineEndPos)) {
case NEWLINE:
shouldContinue =
dispatchHandler(lineBuffer, lineHandler, bufferOperations, lineStartPos, lineEndPos);
lineStartPos = lineEndPos + 1;
break;
case CARRIAGE_RETURN:
if (lineHandler.sawCarriageReturn) {
shouldContinue =
dispatchHandler(
lineBuffer, lineHandler, bufferOperations, lineStartPos, lineEndPos);
// We don't add 1 here because this is the "previous" line's carriage return
// and the carriage return we're on is the start of the "next" line.
lineStartPos = lineEndPos;
}
lineHandler.sawCarriageReturn = true;
break;
case OTHER:
if (lineHandler.sawCarriageReturn) {
shouldContinue =
dispatchHandler(
lineBuffer, lineHandler, bufferOperations, lineStartPos, lineEndPos);
lineStartPos = lineEndPos;
}
break;
}
}
appendToLineHandlerBuffer(
lineHandler,
bufferOperations,
bufferOperations.createSubBuffer(lineBuffer, lineStartPos, lineEndPos));
lineBuffer.position(lineBuffer.limit());
}
private static <T extends Buffer> boolean dispatchHandler(
T buffer,
LineHandler<T> lineHandler,
BufferOperations<T> bufferOperations,
int lineStartPos,
int lineEndPos) {
T line;
boolean shouldContinue;
if (lineHandler.buffer.position() > 0) {
// There's left-over data in the line handler buffer from a previous dispatch.
if (lineHandler.sawCarriageReturn) {
lineHandler.buffer.position(lineHandler.buffer.position() - 1);
}
line = bufferOperations.createSubBuffer(buffer, lineStartPos, lineEndPos);
appendToLineHandlerBuffer(lineHandler, bufferOperations, line);
lineHandler.buffer.flip();
shouldContinue = lineHandler.handleLine(lineHandler.buffer);
lineHandler.buffer.clear();
} else {
// Nothing left over in the line handler buffer. We can directly pass
// the input to the line handler, avoiding copies or allocations.
if (lineHandler.sawCarriageReturn) {
Preconditions.checkState(lineEndPos > 0);
lineEndPos--;
}
int oldPosition = buffer.position();
int oldLimit = buffer.limit();
buffer.position(lineStartPos).limit(lineEndPos);
shouldContinue = lineHandler.handleLine(buffer);
buffer.position(oldPosition).limit(oldLimit);
}
lineHandler.sawCarriageReturn = false;
return shouldContinue;
}
private static <T extends Buffer> void appendToLineHandlerBuffer(
LineHandler<T> lineHandler, BufferOperations<T> bufferOperations, T buffer) {
// We had a partial line left over from the last time we were invoked.
// Concatenate the two chunks of data and send them together.
int neededCapacity = lineHandler.buffer.remaining() + buffer.remaining();
if (lineHandler.buffer.capacity() < neededCapacity) {
int newCapacity = lineHandler.buffer.capacity();
while (newCapacity < neededCapacity) {
newCapacity *= 2;
}
T newBuffer = bufferOperations.createBuffer(newCapacity);
lineHandler.buffer.flip();
bufferOperations.appendBuffers(newBuffer, lineHandler.buffer);
lineHandler.buffer = newBuffer;
}
bufferOperations.appendBuffers(lineHandler.buffer, buffer);
}
}