// ================================================================================================= // Copyright 2011 Twitter, Inc. // ------------------------------------------------------------------------------------------------- // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this work except in compliance with the License. // You may obtain a copy of the License in the LICENSE file, or at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ================================================================================================= package com.twitter.common.text.token.attribute; import java.io.IOException; import java.io.NotSerializableException; import java.io.ObjectStreamException; import java.io.Serializable; import java.nio.CharBuffer; import com.google.common.base.Preconditions; import org.apache.lucene.util.AttributeImpl; /** * Implementation of {@code CharSequenceTermAttribute}. The implementation differs from Lucene's * {@code TermAttributeImpl}, which relies on an internal char[] termBuffer that can grow. * Extracting a token with {@code TermAttributeImpl} involves a copy into this buffer, and setting * the length of the term. In contrast, with this class, the client instead refers to * a span in the underlying {@code CharSequence} by start index (offset) and end index. * <p> * Note that this class explicitly suppresses the ability for instance to be serialized, inherited * via {@link AttributeImpl}. */ public class CharSequenceTermAttributeImpl extends AttributeImpl implements CharSequenceTermAttribute, Cloneable, Serializable { private static final long serialVersionUID = 0L; private CharSequence charSequence = ""; private int offset = 0; private int length = 0; private int hashCode = 0; @Override public CharSequence getTermCharSequence() { // CharBuffer.wrap for CharSequences takes start and end indices. return CharBuffer.wrap(charSequence, offset, offset + length); } @Override public String getTermString() { return charSequence.subSequence(offset, offset + length).toString(); } @Override public void setTermBuffer(CharSequence seq) { Preconditions.checkNotNull(seq); charSequence = seq; setOffset(0); setLength(seq.length()); } @Override public void setTermBuffer(CharSequence seq, int offset, int length) { charSequence = seq; setOffset(offset); setLength(length); } @Override public void clear() { setOffset(0); setLength(0); } /** * Passing a {@code CharSequenceTermAttribute} instead of a {@code TermAttribute} will * obviate the construction of an extra String. */ @Override public void copyTo(AttributeImpl target) { if (target instanceof CharSequenceTermAttribute) { CharSequenceTermAttribute attr = (CharSequenceTermAttribute) target; attr.setTermBuffer(charSequence, offset, length); } } @Override public boolean equals(Object other) { if (other == this) { return true; } if (other instanceof CharSequenceTermAttribute) { CharSequenceTermAttributeImpl otherImpl = (CharSequenceTermAttributeImpl) other; if (otherImpl.charSequence == charSequence && otherImpl.length == length && otherImpl.offset == offset) { return true; } if (otherImpl.length != length) { return false; } for (int i = 0; i < otherImpl.length; i++) { if (otherImpl.charSequence.charAt(otherImpl.offset + i) != charSequence.charAt(offset + i)) { return false; } } return true; } return false; } /** * This is largely based on {@link org.apache.lucene.util.ArrayUtil#hashCode(char[], int, int)}. */ @Override public int hashCode() { if (hashCode == 0) { for (int i = offset; i < offset + length; i++) { hashCode = hashCode * 31 + charSequence.charAt(i); } } return hashCode; } @Override public int getOffset() { return offset; } @Override public int getLength() { return length; } @Override public void setOffset(int offset) { if (offset < 0 || offset > charSequence.length()) { throw new IndexOutOfBoundsException("Offset " + offset + " must be >= 0 and < " + charSequence.length() + ", which is the length of the underlying CharSequence."); } this.offset = offset; this.hashCode = 0; } @Override public void setLength(int length) { if (length < 0 || length > charSequence.length()) { throw new IndexOutOfBoundsException("Length " + length + " must be >= 0 and <= " + charSequence.length() + ", which is the length of the underlying CharSequence."); } this.length = length; this.hashCode = 0; } @Override public CharSequence getCharSequence() { return charSequence; } @Override public void setCharSequence(CharSequence originalCharSequence) { charSequence = originalCharSequence; } // Explicitly suppress ability to serialize. private void writeObject(java.io.ObjectOutputStream out) throws IOException { throw new NotSerializableException(); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { throw new NotSerializableException(); } private void readObjectNoData() throws ObjectStreamException { throw new NotSerializableException(); } }