/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.avro.util; import java.nio.charset.Charset; import java.io.UnsupportedEncodingException; import org.apache.avro.io.BinaryData; /** A Utf8 string. Unlike {@link String}, instances are mutable. This is more * efficient than {@link String} when reading or writing a sequence of values, * as a single instance may be reused. */ public class Utf8 implements Comparable<Utf8>, CharSequence { private static final byte[] EMPTY = new byte[0]; private static final Charset UTF8 = Charset.forName("UTF-8"); private byte[] bytes = EMPTY; private int length; private String string; public Utf8() {} public Utf8(String string) { this.bytes = getBytesFor(string); this.length = bytes.length; this.string = string; } public Utf8(Utf8 other) { this.length = other.length; this.bytes = new byte[other.length]; System.arraycopy(other.bytes, 0, this.bytes, 0, this.length); this.string = other.string; } public Utf8(byte[] bytes) { this.bytes = bytes; this.length = bytes.length; } /** Return UTF-8 encoded bytes. * Only valid through {@link #getByteLength()}. */ public byte[] getBytes() { return bytes; } /** Return length in bytes. * @deprecated call {@link #getByteLength()} instead. */ public int getLength() { return length; } /** Return length in bytes. */ public int getByteLength() { return length; } /** Set length in bytes. Should called whenever byte content changes, even * if the length does not change, as this also clears the cached String. * @deprecated call {@link #setByteLength(int)} instead. */ public Utf8 setLength(int newLength) { return setByteLength(newLength); } /** Set length in bytes. Should called whenever byte content changes, even * if the length does not change, as this also clears the cached String. */ public Utf8 setByteLength(int newLength) { if (this.bytes.length < newLength) { byte[] newBytes = new byte[newLength]; System.arraycopy(bytes, 0, newBytes, 0, this.length); this.bytes = newBytes; } this.length = newLength; this.string = null; return this; } /** Set to the contents of a String. */ public Utf8 set(String string) { this.bytes = getBytesFor(string); this.length = bytes.length; this.string = string; return this; } private abstract static class Utf8Converter { public abstract String fromUtf8(byte[] bytes, int length); public abstract byte[] toUtf8(String str); } private static final Utf8Converter UTF8_CONVERTER = System.getProperty("java.version").startsWith("1.6.") ? new Utf8Converter() { // optimized for Java 6 public String fromUtf8(byte[] bytes, int length) { try { return new String(bytes, 0, length, "UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } public byte[] toUtf8(String str) { try { return str.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } } : new Utf8Converter() { // faster in Java 7 & 8 public String fromUtf8(byte[] bytes, int length) { return new String(bytes, 0, length, UTF8); } public byte[] toUtf8(String str) { return str.getBytes(UTF8); } }; @Override public String toString() { if (this.length == 0) return ""; if (this.string == null) { this.string = UTF8_CONVERTER.fromUtf8(bytes, length); } return this.string; } @Override public boolean equals(Object o) { if (o == this) return true; if (!(o instanceof Utf8)) return false; Utf8 that = (Utf8)o; if (!(this.length == that.length)) return false; byte[] thatBytes = that.bytes; for (int i = 0; i < this.length; i++) if (bytes[i] != thatBytes[i]) return false; return true; } @Override public int hashCode() { int hash = 0; for (int i = 0; i < this.length; i++) hash = hash*31 + bytes[i]; return hash; } @Override public int compareTo(Utf8 that) { return BinaryData.compareBytes(this.bytes, 0, this.length, that.bytes, 0, that.length); } // CharSequence implementation @Override public char charAt(int index) { return toString().charAt(index); } @Override public int length() { return toString().length(); } @Override public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); } /** Gets the UTF-8 bytes for a String */ public static final byte[] getBytesFor(String str) { return UTF8_CONVERTER.toUtf8(str); } }