/* * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package java.nio.file; import java.io.BufferedReader; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; import java.util.Spliterator; import java.util.function.Consumer; /** * A file-based lines spliterator, leveraging a shared mapped byte buffer and * associated file channel, covering lines of a file for character encodings * where line feed characters can be easily identified from character encoded * bytes. * * <p> * When the root spliterator is first split a mapped byte buffer will be created * over the file for it's size that was observed when the stream was created. * Thus a mapped byte buffer is only required for parallel stream execution. * Sub-spliterators will share that mapped byte buffer. Splitting will use the * mapped byte buffer to find the closest line feed characters(s) to the left or * right of the mid-point of covered range of bytes of the file. If a line feed * is found then the spliterator is split with returned spliterator containing * the identified line feed characters(s) at the end of it's covered range of * bytes. * * <p> * Traversing will create a buffered reader, derived from the file channel, for * the range of bytes of the file. The lines are then read from that buffered * reader. Once traversing commences no further splitting can be performed and * the reference to the mapped byte buffer will be set to null. */ final class FileChannelLinesSpliterator implements Spliterator<String> { static final Set<String> SUPPORTED_CHARSET_NAMES; static { SUPPORTED_CHARSET_NAMES = new HashSet<>(); SUPPORTED_CHARSET_NAMES.add(StandardCharsets.UTF_8.name()); SUPPORTED_CHARSET_NAMES.add(StandardCharsets.ISO_8859_1.name()); SUPPORTED_CHARSET_NAMES.add(StandardCharsets.US_ASCII.name()); } private final FileChannel fc; private final Charset cs; private int index; private final int fence; // Null before first split, non-null when splitting, null when traversing private ByteBuffer buffer; // Non-null when traversing private BufferedReader reader; FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) { this.fc = fc; this.cs = cs; this.index = index; this.fence = fence; } private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) { this.fc = fc; this.buffer = buffer; this.cs = cs; this.index = index; this.fence = fence; } @Override public boolean tryAdvance(Consumer<? super String> action) { String line = readLine(); if (line != null) { action.accept(line); return true; } else { return false; } } @Override public void forEachRemaining(Consumer<? super String> action) { String line; while ((line = readLine()) != null) { action.accept(line); } } private BufferedReader getBufferedReader() { /** * A readable byte channel that reads bytes from an underlying * file channel over a specified range. */ ReadableByteChannel rrbc = new ReadableByteChannel() { @Override public int read(ByteBuffer dst) throws IOException { int bytesToRead = fence - index; if (bytesToRead == 0) return -1; int bytesRead; if (bytesToRead < dst.remaining()) { // The number of bytes to read is less than remaining // bytes in the buffer // Snapshot the limit, reduce it, read, then restore int oldLimit = dst.limit(); dst.limit(dst.position() + bytesToRead); bytesRead = fc.read(dst, index); dst.limit(oldLimit); } else { bytesRead = fc.read(dst, index); } if (bytesRead == -1) { index = fence; return bytesRead; } index += bytesRead; return bytesRead; } @Override public boolean isOpen() { return fc.isOpen(); } @Override public void close() throws IOException { fc.close(); } }; return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1)); } private String readLine() { if (reader == null) { reader = getBufferedReader(); buffer = null; } try { return reader.readLine(); } catch (IOException e) { throw new UncheckedIOException(e); } } private ByteBuffer getMappedByteBuffer() { // TODO can the mapped byte buffer be explicitly unmapped? // It's possible, via a shared-secret mechanism, when either // 1) the spliterator starts traversing, although traversal can // happen concurrently for mulitple spliterators, so care is // needed in this case; or // 2) when the stream is closed using some shared holder to pass // the mapped byte buffer when it is created. try { return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence); } catch (IOException e) { throw new UncheckedIOException(e); } } @Override public Spliterator<String> trySplit() { // Cannot split after partial traverse if (reader != null) return null; ByteBuffer b; if ((b = buffer) == null) { b = buffer = getMappedByteBuffer(); } final int hi = fence, lo = index; // Check if line separator hits the mid point int mid = (lo + hi) >>> 1; int c = b.get(mid); if (c == '\n') { mid++; } else if (c == '\r') { // Check if a line separator of "\r\n" if (++mid < hi && b.get(mid) == '\n') { mid++; } } else { // TODO give up after a certain distance from the mid point? // Scan to the left and right of the mid point int midL = mid - 1; int midR = mid + 1; mid = 0; while (midL > lo && midR < hi) { // Sample to the left c = b.get(midL--); if (c == '\n' || c == '\r') { // If c is "\r" then no need to check for "\r\n" // since the subsequent value was previously checked mid = midL + 2; break; } // Sample to the right c = b.get(midR++); if (c == '\n' || c == '\r') { mid = midR; // Check if line-separator is "\r\n" if (c == '\r' && mid < hi && b.get(mid) == '\n') { mid++; } break; } } } // The left spliterator will have the line-separator at the end return (mid > lo && mid < hi) ? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b) : null; } @Override public long estimateSize() { // Use the number of bytes as an estimate. // We could divide by a constant that is the average number of // characters per-line, but that constant will be factored out. return fence - index; } @Override public long getExactSizeIfKnown() { return -1; } @Override public int characteristics() { return Spliterator.ORDERED | Spliterator.NONNULL; } }