/* * Copyright (C) 2009, Google Inc. * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de> * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available * under the terms of the Eclipse Distribution License v1.0 which * accompanies this distribution, is reproduced below, and is * available at http://www.eclipse.org/org/documents/edl-v10.php * * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * - Neither the name of the Eclipse Foundation, Inc. nor the * names of its contributors may be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.eclipse.jgit.diff; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.IntList; import org.eclipse.jgit.util.RawParseUtils; /** * A Sequence supporting UNIX formatted text in byte[] format. * <p> * Elements of the sequence are the lines of the file, as delimited by the UNIX * newline character ('\n'). The file content is treated as 8 bit binary text, * with no assumptions or requirements on character encoding. * <p> * Note that the first line of the file is element 0, as defined by the Sequence * interface API. Traditionally in a text editor a patch file the first line is * line number 1. Callers may need to subtract 1 prior to invoking methods if * they are converting from "line number" to "element index". */ public class RawText extends Sequence { /** A Rawtext of length 0 */ public static final RawText EMPTY_TEXT = new RawText(new byte[0]); /** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */ private static final int FIRST_FEW_BYTES = 8000; /** The file content for this sequence. */ protected final byte[] content; /** Map of line number to starting position within {@link #content}. */ protected final IntList lines; /** * Create a new sequence from an existing content byte array. * <p> * The entire array (indexes 0 through length-1) is used as the content. * * @param input * the content array. The array is never modified, so passing * through cached arrays is safe. */ public RawText(final byte[] input) { content = input; lines = RawParseUtils.lineMap(content, 0, content.length); } /** * Create a new sequence from a file. * <p> * The entire file contents are used. * * @param file * the text file. * @throws IOException * if Exceptions occur while reading the file */ public RawText(File file) throws IOException { this(IO.readFully(file)); } /** @return total number of items in the sequence. */ public int size() { // The line map is always 2 entries larger than the number of lines in // the file. Index 0 is padded out/unused. The last index is the total // length of the buffer, and acts as a sentinel. // return lines.size() - 2; } /** * Write a specific line to the output stream, without its trailing LF. * <p> * The specified line is copied as-is, with no character encoding * translation performed. * <p> * If the specified line ends with an LF ('\n'), the LF is <b>not</b> * copied. It is up to the caller to write the LF, if desired, between * output lines. * * @param out * stream to copy the line data onto. * @param i * index of the line to extract. Note this is 0-based, so line * number 1 is actually index 0. * @throws IOException * the stream write operation failed. */ public void writeLine(final OutputStream out, final int i) throws IOException { int start = getStart(i); int end = getEnd(i); if (content[end - 1] == '\n') end--; out.write(content, start, end - start); } /** * Determine if the file ends with a LF ('\n'). * * @return true if the last line has an LF; false otherwise. */ public boolean isMissingNewlineAtEnd() { final int end = lines.get(lines.size() - 1); if (end == 0) return true; return content[end - 1] != '\n'; } /** * Get the text for a single line. * * @param i * index of the line to extract. Note this is 0-based, so line * number 1 is actually index 0. * @return the text for the line, without a trailing LF. */ public String getString(int i) { return getString(i, i + 1, true); } /** * Get the text for a region of lines. * * @param begin * index of the first line to extract. Note this is 0-based, so * line number 1 is actually index 0. * @param end * index of one past the last line to extract. * @param dropLF * if true the trailing LF ('\n') of the last returned line is * dropped, if present. * @return the text for lines {@code [begin, end)}. */ public String getString(int begin, int end, boolean dropLF) { if (begin == end) return ""; //$NON-NLS-1$ int s = getStart(begin); int e = getEnd(end - 1); if (dropLF && content[e - 1] == '\n') e--; return decode(s, e); } /** * Decode a region of the text into a String. * * The default implementation of this method tries to guess the character * set by considering UTF-8, the platform default, and falling back on * ISO-8859-1 if neither of those can correctly decode the region given. * * @param start * first byte of the content to decode. * @param end * one past the last byte of the content to decode. * @return the region {@code [start, end)} decoded as a String. */ protected String decode(int start, int end) { return RawParseUtils.decode(content, start, end); } private int getStart(final int i) { return lines.get(i + 1); } private int getEnd(final int i) { return lines.get(i + 2); } /** * Determine heuristically whether a byte array represents binary (as * opposed to text) content. * * @param raw * the raw file content. * @return true if raw is likely to be a binary file, false otherwise */ public static boolean isBinary(byte[] raw) { return isBinary(raw, raw.length); } /** * Determine heuristically whether the bytes contained in a stream * represents binary (as opposed to text) content. * * Note: Do not further use this stream after having called this method! The * stream may not be fully read and will be left at an unknown position * after consuming an unknown number of bytes. The caller is responsible for * closing the stream. * * @param raw * input stream containing the raw file content. * @return true if raw is likely to be a binary file, false otherwise * @throws IOException * if input stream could not be read */ public static boolean isBinary(InputStream raw) throws IOException { final byte[] buffer = new byte[FIRST_FEW_BYTES]; int cnt = 0; while (cnt < buffer.length) { final int n = raw.read(buffer, cnt, buffer.length - cnt); if (n == -1) break; cnt += n; } return isBinary(buffer, cnt); } /** * Determine heuristically whether a byte array represents binary (as * opposed to text) content. * * @param raw * the raw file content. * @param length * number of bytes in {@code raw} to evaluate. This should be * {@code raw.length} unless {@code raw} was over-allocated by * the caller. * @return true if raw is likely to be a binary file, false otherwise */ public static boolean isBinary(byte[] raw, int length) { // Same heuristic as C Git if (length > FIRST_FEW_BYTES) length = FIRST_FEW_BYTES; for (int ptr = 0; ptr < length; ptr++) if (raw[ptr] == '\0') return true; return false; } /** * Get the line delimiter for the first line. * * @since 2.0 * @return the line delimiter or <code>null</code> */ public String getLineDelimiter() { if (size() == 0) return null; int e = getEnd(0); if (content[e - 1] != '\n') return null; if (content.length > 1 && e > 1 && content[e - 2] == '\r') return "\r\n"; //$NON-NLS-1$ else return "\n"; //$NON-NLS-1$ } }