/* Copyright (c) 2007 Jython Developers */ package org.python.core.io; import java.nio.ByteBuffer; import java.util.EnumSet; import org.python.core.Py; import org.python.core.PyObject; import org.python.core.PyString; import org.python.core.PyTuple; /** * A Buffered text stream in universal newlines mode. * * @author Philip Jenvey */ public class UniversalIOWrapper extends TextIOBase { /** Whether the next character, if it's an LF, should be skipped * (the previous character was a CR) */ private boolean skipNextLF = false; /** The Newlines encountered in this file */ private EnumSet<Newline> newlineTypes = EnumSet.noneOf(Newline.class); /** * Contruct a UniversalIOWrapper wrapping the given * BufferedIOBase. * * @param bufferedIO {@inheritDoc} */ public UniversalIOWrapper(BufferedIOBase bufferedIO) { super(bufferedIO); } @Override public String read(int size) { if (size < 0) { return readall(); } // Avoid ByteBuffer (this.readahead) method calls in the inner // loop by reading directly from the readahead's backing array byte[] readaheadArray; int readaheadPos; char[] builderArray = new char[size]; int builderPos = 0; do { readaheadArray = readahead.array(); readaheadPos = readahead.position(); while (readaheadPos < readahead.limit() && builderPos < size) { char next = (char)(readaheadArray[readaheadPos++] & 0xff); switch (next) { case '\r': next = '\n'; // Don't skipNextLF (identify CR immediately) at // EOF if (readaheadPos == readahead.limit()) { if (readChunk() == 0) { // EOF newlineTypes.add(Newline.CR); builderArray[builderPos++] = next; return new String(builderArray, 0, builderPos); } // Not EOF and readChunk replaced the // readahead; reset the readahead info readaheadArray = readahead.array(); readaheadPos = readahead.position(); } skipNextLF = true; break; case '\n': if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CRLF); continue; } newlineTypes.add(Newline.LF); break; default: if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CR); } } builderArray[builderPos++] = next; } } while (builderPos < size && readChunk(size - builderPos) > 0); // Finally reposition the readahead to where we ended. The // position is invalid if the readahead is empty (at EOF; // readChunk() returned 0) if (readahead.hasRemaining()) { readahead.position(readaheadPos); } // Shrink the readahead if it grew packReadahead(); return new String(builderArray, 0, builderPos); } @Override public String readall() { // Read the remainder of file ByteBuffer remaining = bufferedIO.readall(); // Create an array that accommodates the readahead and the // remainder char[] all = new char[readahead.remaining() + remaining.remaining()]; // Consume the readahead int length = readLoop(readahead.array(), readahead.position(), all, 0, readahead.remaining()); readahead.position(readahead.limit()); // Consume the remainder of the file length += readLoop(remaining.array(), remaining.position(), all, length, remaining.remaining()); // Handle skipNextLF at EOF if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CR); } return new String(all, 0, length); } /** * Read and convert the src byte array into the dest char array. * * Converts CR and CRLF to LF. No attempt is made to handle CRLF * at EOF; skipNextLF may be toggled true after this method * returns. * * @param src the source byte array * @param srcPos starting position in the source array * @param dest the destination char array * @param destPos starting position in the destination array * @param length the number of array elements to be copied * @return the number of chars written to the destination array */ private int readLoop(byte[] src, int srcPos, char[] dest, int destPos, int length) { int destStartPos = destPos; int srcEndPos = srcPos + length; while (srcPos < srcEndPos) { char next = (char)(src[srcPos++] & 0xff); switch (next) { case '\r': next = '\n'; skipNextLF = true; break; case '\n': if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CRLF); continue; } newlineTypes.add(Newline.LF); break; default: if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CR); } } dest[destPos++] = next; } return destPos - destStartPos; } @Override public String readline(int size) { // Avoid ByteBuffer (this.readahead) and StringBuilder // (this.builder) method calls in the inner loop by reading // directly from the readahead's backing array and writing to // an interim char array (this.interimBuilder) byte[] readaheadArray; int readaheadPos; int interimBuilderPos; do { readaheadArray = readahead.array(); readaheadPos = readahead.position(); interimBuilderPos = 0; while (readaheadPos < readahead.limit() && (size < 0 || builder.length() + interimBuilderPos < size)) { char next = (char)(readaheadArray[readaheadPos++] & 0xff); switch (next) { case '\r': next = '\n'; // Don't skipNextLF (identify CR immediately) at // EOF if (readaheadPos == readahead.limit()) { if (readChunk() == 0) { // EOF newlineTypes.add(Newline.CR); interimBuilder[interimBuilderPos++] = next; builder.append(interimBuilder, 0, interimBuilderPos); // The readahead position is already valid // (it's at 0) return drainBuilder(); } // Not EOF and readChunk replaced the // readahead; reset the readahead info readaheadPos = readahead.position(); } skipNextLF = true; interimBuilder[interimBuilderPos++] = next; builder.append(interimBuilder, 0, interimBuilderPos); // Reposition the readahead to where we ended readahead.position(readaheadPos); return drainBuilder(); case '\n': if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CRLF); continue; } newlineTypes.add(Newline.LF); interimBuilder[interimBuilderPos++] = next; builder.append(interimBuilder, 0, interimBuilderPos); // Reposition the readahead to where we ended readahead.position(readaheadPos); return drainBuilder(); default: if (skipNextLF) { skipNextLF = false; newlineTypes.add(Newline.CR); } } interimBuilder[interimBuilderPos++] = next; } builder.append(interimBuilder, 0, interimBuilderPos); } while ((size < 0 || builder.length() < size) && readChunk() > 0); // Finally reposition the readahead to where we ended. The // position is invalid if the readahead is empty (at EOF; // readChunk() returned 0) if (readahead.hasRemaining()) { readahead.position(readaheadPos); } return drainBuilder(); } @Override public int write(String buf) { // Universal newlines doesn't support writing; just raise the // appropriate exception checkClosed(); checkWritable(); return -1; } @Override public long seek(long pos, int whence) { pos = super.seek(pos, whence); skipNextLF = false; return pos; } @Override public long tell() { long pos = super.tell(); if (skipNextLF) { // Look for a succeeding LF; if it exists, consume it and // report the position as the beginning of the next // newline if (!atEOF()) { int readaheadPos = readahead.position(); if (readahead.get(readaheadPos) == LF_BYTE) { skipNextLF = false; newlineTypes.add(Newline.CRLF); readahead.position(++readaheadPos); pos++; } } } return pos; } @Override public PyObject getNewlines() { int size = newlineTypes.size(); if (size == 0) { return Py.None; } else if (size == 1) { Newline newline = newlineTypes.iterator().next(); return new PyString(newline.getValue()); } PyObject[] newlines = new PyObject[size]; int i = 0; for (Newline newline : newlineTypes) { newlines[i++] = new PyString(newline.getValue()); } return new PyTuple(newlines); } /** * Newline types. * */ private enum Newline { /** Carriage return */ CR("\r"), /** Line feed */ LF("\n"), /** Carriage return line feed **/ CRLF("\r\n"); /** The String value */ private final String value; /** * Return the String value of this newline * * @return the newline character as a String */ public String getValue() { return value; } /** * Construct a new Newline. * * @param value the newline character as a String */ Newline(String value) { this.value = value; } } }