/* * Copyright 2007 T-Rank AS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package no.trank.openpipe.solr.analysis; import java.io.IOException; import java.io.OutputStream; import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import org.apache.lucene.analysis.Token; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import no.trank.openpipe.api.document.AnnotatedField; import no.trank.openpipe.solr.analysis.io.Base64OutputBuffer; import static no.trank.openpipe.solr.util.IOUtil.writeNibble; import static no.trank.openpipe.solr.util.IOUtil.writeUTF; /** * @version $Revision$ */ public class Base64TokenSerializer implements TokenSerializer { private static final Logger log = LoggerFactory.getLogger(Base64TokenSerializer.class); private Base64OutputBuffer out; private int maxBufferSize = 128 * 1024; private int compressionThreshold = 1024; private Deflater deflater; @Override public String serialize(AnnotatedField field) { final AnnotationTokenStream stream = new AnnotationTokenStream(field); if (out == null) { out = new Base64OutputBuffer(); } else { out.reset(); } try { try { final String value = field.getValue(); final boolean compress = value.length() > compressionThreshold; BinaryIO.writeHeader(out, compress); final OutputStream out = getOutputStream(compress); writeUTF(out, value); for (Token tok = stream.next(); tok != null; tok = stream.next()) { final int start = tok.startOffset(); writeNibble(out, start); writeNibble(out, tok.endOffset() - start); writeNibble(out, tok.getPositionIncrement()); writeUTF(out, tok.termText()); writeUTF(out, tok.type()); } out.close(); final String res = this.out.toString(); log.debug(res); return res; } catch (IOException e) { throw new RuntimeException(e); } } finally { out.trimToSize(maxBufferSize); } } private OutputStream getOutputStream(boolean compress) { if (compress) { if (deflater == null) { deflater = new Deflater(Deflater.BEST_COMPRESSION); } else { deflater.reset(); } return new DeflaterOutputStream(out, deflater); } return out; } public int getMaxBufferSize() { return maxBufferSize; } public void setMaxBufferSize(int maxBufferSize) { this.maxBufferSize = maxBufferSize; } public int getCompressionThreshold() { return compressionThreshold; } public void setCompressionThreshold(int compressionThreshold) { this.compressionThreshold = compressionThreshold; } @Override public void close() { if (deflater != null) { deflater.end(); deflater = null; } out = null; } }