// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================
package com.twitter.common.text.token.attribute;
import java.io.IOException;
import java.io.NotSerializableException;
import java.io.ObjectStreamException;
import java.util.Collections;
import java.util.List;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State;
import com.twitter.common.text.token.TokenGroupStream;
import com.twitter.common.text.token.TokenizedCharSequence;
import com.twitter.common.text.token.TokenizedCharSequenceStream;
/**
* Implementation of {@link TokenGroupAttribute}.
* <p>
* Note that this class explicitly suppresses the ability for instance to be serialized, inherited
* via {@link AttributeImpl}.
*/
public class TokenGroupAttributeImpl extends AttributeImpl implements TokenGroupAttribute {
private static final long serialVersionUID = 0L;
private ImmutableList<Class<? extends Attribute>> attributeClasses;
private List<State> states = Collections.emptyList();
private TokenizedCharSequence seq = null;
// this is lazy-initialized and should not be cloned.
private TokenGroupStream tokenGroupStream = null;
@Override
public void clear() {
states = Collections.emptyList();
seq = null;
tokenGroupStream = null;
}
@Override
public void copyTo(AttributeImpl obj) {
if (obj instanceof TokenGroupAttributeImpl) {
TokenGroupAttributeImpl attr = (TokenGroupAttributeImpl) obj;
attr.attributeClasses = this.attributeClasses;
attr.states = this.states;
attr.seq = this.seq;
attr.tokenGroupStream = null;
}
}
@Override
public AttributeImpl clone() {
TokenGroupAttributeImpl clone = new TokenGroupAttributeImpl();
// we don't need to clone attributeClasses because it's immutable.
clone.attributeClasses = attributeClasses;
// same here. TokenizedCharSequence is an immutable obj so no need to clone.
clone.seq = seq;
ImmutableList.Builder<State> builder = ImmutableList.builder();
for (State state : states) {
builder.add(state.clone());
}
clone.states = builder.build();
clone.tokenGroupStream = null;
return clone;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof TokenGroupAttributeImpl)
&& (((TokenGroupAttributeImpl) obj).states.equals(states) &&
((TokenGroupAttributeImpl) obj).seq == null && seq == null) ||
(((TokenGroupAttributeImpl) obj).seq != null && seq != null &&
((TokenGroupAttributeImpl) obj).seq.equals(seq));
}
@Override
public int hashCode() {
return (seq == null ? states.hashCode() : seq.hashCode());
}
@Override
public boolean isEmpty() {
return states.isEmpty() && (seq == null || seq.getTokens().isEmpty());
}
@Override
public int size() {
return (!states.isEmpty() ? states.size() :
(seq != null ? seq.getTokens().size() : states.size()));
}
/**
* Sets the list of states for this group. Invalidates any previously set sequence.
*/
public void setStates(List<AttributeSource.State> states) {
// A State contains clones of AttributeImpl, so we must make sure that
// no AttributeImpl holds a circular reference back to itself.
this.states = ImmutableList.copyOf(states);
this.seq = null;
}
/**
* Sets the attribute source for this group. Invalidates any previously set sequence.
*/
public void setAttributeSource(AttributeSource source) {
attributeClasses = ImmutableList.copyOf(source.getAttributeClassesIterator());
this.seq = null;
}
/**
* Sets the group token stream as a sequence. Constructs a stream from this sequence lazily.
* Invalidates any information set from setStates or setAttributeSource
*/
public void setSequence(TokenizedCharSequence seq) {
this.seq = seq;
this.states = Collections.emptyList();
this.attributeClasses = null;
}
/**
* Returns the backing TokenizedCharSequence. Will be null if group was set using states
*/
public TokenizedCharSequence getSequence() {
return seq;
}
@Override
public TokenGroupStream getTokenGroupStream() {
//Lazily process the sequence into a set of states, only do it when getTokenGroupStream is called
if ((attributeClasses == null || states.isEmpty()) && seq != null) {
TokenizedCharSequenceStream ret = new TokenizedCharSequenceStream();
ret.reset(seq);
//TODO(alewis) This could probably be lazier. Make a new extension of TokenGroupStream?
ImmutableList.Builder<State> builder = ImmutableList.builder();
while (ret.incrementToken()) {
builder.add(ret.captureState());
}
setAttributeSource(ret);
setStates(builder.build());
}
// lazy initialize tokenGroupStream
if (tokenGroupStream == null) {
tokenGroupStream = new TokenGroupStream(attributeClasses);
}
tokenGroupStream.setStates(states);
return tokenGroupStream;
}
// Explicitly suppress ability to serialize.
private void writeObject(java.io.ObjectOutputStream out) throws IOException {
throw new NotSerializableException();
}
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
throw new NotSerializableException();
}
private void readObjectNoData() throws ObjectStreamException {
throw new NotSerializableException();
}
}