/* * Copyright 2007 T-Rank AS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package no.trank.openpipe.solr.util; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UTFDataFormatException; /** * Utility-class for I/O operations. * * @version $Revision$ */ public class IOUtil { /** * Writes a string as <tt>modified UTF-8</tt> as specified by {@link java.io.DataOutput#writeUTF(String)}, * except it writes the length using {@link #writeNibble(OutputStream, int)}, that way strings may be up to * {@link Integer#MAX_VALUE} in length. * * @param out the stream to write to. * @param str the string to write. * * @return the number of bytes written. * * @throws IOException if an I/O error occures. * * @see #readUTF(InputStream) */ public static int writeUTF(final OutputStream out, final String str) throws IOException { final int strlen = str.length(); // use charAt instead of copying String to char array final int utflen = findUtfLen(str, strlen); final int lenBytes = writeNibble(out, utflen); final byte[] buf = new byte[utflen]; int i; for (i = 0; i < strlen; i++) { final int c = str.charAt(i); if (!((c >= 0x0001) && (c <= 0x007F))) { break; } buf[i] = (byte) c; } int idx = i; for (; i < strlen; i++) { final int c = str.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { buf[idx++] = (byte) c; } else if (c > 0x07FF) { buf[idx++] = (byte) (0xE0 | c >> 12 & 0x0F); buf[idx++] = (byte) (0x80 | c >> 6 & 0x3F); buf[idx++] = (byte) (0x80 | c & 0x3F); } else { buf[idx++] = (byte) (0xC0 | c >> 6 & 0x1F); buf[idx++] = (byte) (0x80 | c & 0x3F); } } out.write(buf, 0, utflen); return utflen + lenBytes; } private static int findUtfLen(final String str, final int strlen) { int utflen = strlen; for (int i = 0; i < strlen; i++) { final int c = str.charAt(i); if (c < 0x0001 || c > 0x007F) { utflen += c > 0x07FF ? 2 : 1; } } return utflen; } /** * Reads a string as <tt>modified UTF-8</tt> as specified by {@link java.io.DataInput#readUTF()}, except it reads the * length using {@link #readUTF(InputStream)}, that way strings may be up to {@link Integer#MAX_VALUE} in length. * * @param in the stream to read from. * * @return the string read. * * @throws IOException if an I/O error occures. * @throws EOFException if a end-of-stream was reached. * * @see #writeUTF(OutputStream, String) */ @SuppressWarnings({"OverlyLongMethod"}) public static String readUTF(InputStream in) throws IOException { final int utflen = readNibble(in); if (utflen < 0) { throw new EOFException(); } final byte[] bytes = new byte[utflen]; readFully(in, bytes, 0, utflen); final char[] chars = new char[utflen]; int bIdx = readSingleByte(utflen, bytes, chars); int count = bIdx; while (bIdx < utflen) { final int c = bytes[bIdx++] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx*/ chars[count++] = (char) c; break; case 12: case 13: /* 110x xxxx 10xx xxxx*/ if (bIdx >= utflen) { throw new UTFDataFormatException("malformed input: partial character at end"); } final int c2 = (int) bytes[bIdx++]; if ((c2 & 0xC0) != 0x80) { throw new UTFDataFormatException("malformed input around byte " + (bIdx - 1)); } chars[count++] = (char) ((c & 0x1F) << 6 | c2 & 0x3F); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ if (bIdx + 2 > utflen) { throw new UTFDataFormatException("malformed input: partial character at end"); } final int c3 = bytes[bIdx++] & 0xff; final int c4 = bytes[bIdx++] & 0xff; if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) { throw new UTFDataFormatException("malformed input around byte " + (bIdx - 2)); } chars[count++] = (char) ((c & 0x0F) << 12 | (c3 & 0x3F) << 6 | c4 & 0x3F); break; default: /* 10xx xxxx, 1111 xxxx */ throw new UTFDataFormatException("malformed input around byte " + (bIdx - 1)); } } // The number of chars produced may be less than utflen return new String(chars, 0, count); } private static int readSingleByte(final int utflen, final byte[] bytes, final char[] chars) { int bIdx = 0; while (bIdx < utflen) { final int c = (int) bytes[bIdx] & 0xff; if (c > 127) { break; } chars[bIdx++] = (char) c; } return bIdx; } /** * Reads the given number of bytes from a stream. * * @param in the stream to read from. * @param b the buffer into which the data is read. * @param off an int specifying the offset into the data. * @param len an int specifying the number of bytes to read. * * @throws IOException if an I/O error occures. * @throws EOFException if this stream reaches the end before reading all the bytes. * * @see java.io.DataInput#readFully(byte[], int, int) */ public static void readFully(final InputStream in, final byte b[], final int off, final int len) throws IOException { if (len < 0) { throw new IndexOutOfBoundsException(); } int n = 0; while (n < len) { int count = in.read(b, off + n, len - n); if (count < 0) { throw new EOFException(); } n += count; } } /** * Writes a positive integer with variable length to a stream. * <p/> * <a href="http://en.wikipedia.org/wiki/Nibble">Nibble</a> is not the correct term for what this writes, but oh well * <br/> * Integers are written as 7-bit ints with the 8<sup>th</sup> bit being continuation flag. Number of bytes used to * code an integer is <tt>[1-5]</tt>.<br/> * E.g. * <table> * <tr><th>decimal</th><th>hexadecimal</th><th>encoded bytes</th></tr> * <tr><td><tt>0</tt></td><td><tt>0x00</tt></td><td><tt>0x80</tt></td></tr> * <tr><td><tt>128</tt></td><td><tt>0x80</tt></td><td><tt>0x01 0x80</tt></td></tr> * <tr><td><tt>5964</tt></td><td><tt>0x174C</tt></td><td><tt>0x2E 0xCC</tt></td></tr> * <tr><td><tt>2147483647</tt></td><td><tt>0x7FFFFFFF</tt></td><td><tt>0x07 0x7F 0x7F 0x7F 0xFF</tt></td></tr> * </table> * * @param out the stream to write to. * @param x the positive integer to write. * * @return the number of bytes written. * * @throws IOException if an I/O error occures. * * @see #readNibble(InputStream) */ public static int writeNibble(final OutputStream out, final int x) throws IOException { if (x < 0) { throw new IOException("The argument " + x + " is negative"); } if (x == 0) { out.write(0x80); return 1; } final int len = msb(x) / 7; int h = len; do { if (h == 0) { out.write(0x80 | ((x >> (h * 7)) & 0x7f)); } else { out.write(((x >> (h * 7)) & 0x7f)); } } while (h-- != 0); return len + 1; } /** * Reads a positive integer with variable length from a stream. * <p/> * For coding description see {@link #writeNibble(OutputStream, int)}. * * @param in the stream to read from. * * @return the positive integer read from the stream or <tt>-1</tt> if end of stream was reached before the int could * be decoded. * * @throws IOException if an I/O error occures. * * @see #writeNibble(OutputStream, int) */ public static int readNibble(final InputStream in) throws IOException { int x = 0; int b; do { x <<= 7; b = in.read(); x |= b & 0x7f; } while ((b & 0x80) == 0 && b >= 0 && x >= 0); if (x < 0) { throw new IOException("Invalid nibble read"); } if (b < 0) { return -1; } return x; } private static int msb(int x) { return (x < 1 << 15 ? (x < 1 << 7 ? (x < 1 << 3 ? (x < 1 << 1 ? (x < 1 ? x < 0 ? 31 : -1 /* 6 */ : 0 /* 5 */ ) : (x < 1 << 2 ? 1 /* 5 */ : 2 /* 5 */ ) ) : (x < 1 << 5 ? (x < 1 << 4 ? 3 /* 5 */ : 4 /* 5 */ ) : (x < 1 << 6 ? 5 /* 5 */ : 6 /* 5 */ ) ) ) : (x < 1 << 11 ? (x < 1 << 9 ? (x < 1 << 8 ? 7 /* 5 */ : 8 /* 5 */ ) : (x < 1 << 10 ? 9 /* 5 */ : 10 /* 5 */ ) ) : (x < 1 << 13 ? (x < 1 << 12 ? 11 /* 5 */ : 12 /* 5 */ ) : (x < 1 << 14 ? 13 /* 5 */ : 14 /* 5 */ ) ) ) ) : (x < 1 << 23 ? (x < 1 << 19 ? (x < 1 << 17 ? (x < 1 << 16 ? 15 /* 5 */ : 16 /* 5 */ ) : (x < 1 << 18 ? 17 /* 5 */ : 18 /* 5 */ ) ) : (x < 1 << 21 ? (x < 1 << 20 ? 19 /* 5 */ : 20 /* 5 */ ) : (x < 1 << 22 ? 21 /* 5 */ : 22 /* 5 */ ) ) ) : (x < 1 << 27 ? (x < 1 << 25 ? (x < 1 << 24 ? 23 /* 5 */ : 24 /* 5 */ ) : (x < 1 << 26 ? 25 /* 5 */ : 26 /* 5 */ ) ) : (x < 1 << 29 ? (x < 1 << 28 ? 27 /* 5 */ : 28 /* 5 */ ) : (x < 1 << 30 ? 29 /* 5 */ : 30 /* 5 */ ) ) ) ) ); } private IOUtil() { // Only static access } }