/*
* Copyright 2007 T-Rank AS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package no.trank.openpipe.solr.analysis;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import no.trank.openpipe.solr.analysis.io.Base64InputStream;
import static no.trank.openpipe.solr.util.IOUtil.readNibble;
import static no.trank.openpipe.solr.util.IOUtil.readUTF;
/**
* This class reads tokens on the binary form:
* <pre>
* token.startOffset = IOUtil.readNibble(in);
* token.endOffset = token.startOffset + IOUtil.readNibble(in);
* token.positionIncrement = IOUtil.readNibble(in);
* token.termText = IOUtil.readUTF(in);
* token.type = IOUtil.readUTF(in);
* </pre>
*
* @see no.trank.openpipe.solr.util.IOUtil#readNibble(InputStream)
* @see no.trank.openpipe.solr.util.IOUtil#readUTF(InputStream)
*
* @version $Revision$
*/
public class BinaryTokenDeserializer extends Tokenizer {
private InputStream in;
public BinaryTokenDeserializer(Reader input) {
if (input instanceof DummyReader) {
in = ((DummyReader) input).getInputStream();
} else {
in = new Base64InputStream(input);
}
}
public BinaryTokenDeserializer(InputStream in) {
this.in = in;
}
@Override
public Token next() throws IOException {
try {
final int start = readNibble(in);
final int end = readNibble(in) + start;
final int posIncr = readNibble(in);
if (start < 0 || end < 0 || posIncr < 0) {
return null;
}
final String text = readUTF(in);
final String type = readUTF(in);
final Token token = new Token(text, start, end, type);
token.setPositionIncrement(posIncr);
return token;
} catch (EOFException e) {
// Ignoring
}
return null;
}
@Override
public void close() throws IOException {
if (in != null) {
in.close();
in = null;
}
}
/**
* Work around for <tt>pre 1.2</tt> Solr. Has not been tested, but should do the trick to solr version <
* <tt>1.2</tt>.
*/
public static Reader createDummyReader(InputStream in) {
return new DummyReader(in);
}
private static class DummyReader extends Reader {
private final InputStream inputStream;
public DummyReader(InputStream inputStream) {
this.inputStream = inputStream;
}
public InputStream getInputStream() {
return inputStream;
}
@Override
public int read(char cbuf[], int off, int len) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {
}
}
}