/*
* Copyright 2007 T-Rank AS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package no.trank.openpipe.solr.schema;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.zip.InflaterInputStream;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.solr.core.SolrException;
import static org.apache.solr.core.SolrException.ErrorCode.SERVER_ERROR;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;
import no.trank.openpipe.solr.analysis.BinaryIO;
import no.trank.openpipe.solr.analysis.BinaryTokenDeserializer;
import no.trank.openpipe.solr.analysis.io.Base64InputStream;
import no.trank.openpipe.solr.util.IOUtil;
/**
* A field type for pre-tokenized field-values stored on a binary base64-encoded form.
* <br/>
* Uses {@link Base64InputStream} for decoding the base64-encoded string.
* <br/>
* Uses {@link BinaryIO} to verify version of serialized data and to find compression settings for the data.
* <p>
* Reading a field-value, {@link #createField(SchemaField, String, float)}, does the following:
* <pre>
* InputStream in = new Base64InputStream(externalVal);
* if (BinaryIO.readHeaderIsCompressed(in)) {
* in = new InflaterInputStream(in);
* }
* String untokenizedValue = IOUtil.readUTF(in);
* </pre>
* The tokens are parsed as follows:
* <pre>
* Fieldable {
* ...
* public TokenStream tokenStreamValue() {
* return new BinaryTokenDeserializer(in);
* }
* </pre>
* Where <tt>in</tt> is the stream openend in {@link #createField(SchemaField, String, float)}.
* <p/>
*
* @see IOUtil#readUTF(InputStream)
* @see BinaryTokenDeserializer
*
* @version $Revision$
*/
public class Base64Type extends TextField {
/**
* Creates a field from a pre-tokenized field from a binary base64-encoded string.
*
* @param field the field info as read from schema.
* @param externalVal the base64-encoded string.
* @param boost the boost of this field.
*
* @return a <tt>Fieldable</tt> as read from <tt>externalVal</tt> described {@linkplain Base64Type here}.
*/
@Override
public Fieldable createField(SchemaField field, String externalVal, float boost) {
if (externalVal == null) {
return null;
}
if (!field.indexed() && !field.stored()) {
log.finest("Ignoring unindexed/unstored field: " + field);
return null;
}
InputStream in = new Base64InputStream(externalVal);
try {
if (BinaryIO.readHeaderIsCompressed(in)) {
in = new InflaterInputStream(in);
}
final String val = IOUtil.readUTF(in);
final Fieldable f = new Base64Field(field.getName(), val, getFieldStore(field, val), getFieldIndex(field, val),
getFieldTermVec(field, val), in);
f.setOmitNorms(field.omitNorms());
f.setBoost(boost);
return f;
} catch (IOException e) {
throw new SolrException(SERVER_ERROR, "Could not create field '" + field + "' from value '" + externalVal +
"'", e, false);
}
}
private static class Base64Field extends AbstractField {
private final String val;
private transient final InputStream in;
private Base64Field(String name, String val, Field.Store store, Field.Index index, Field.TermVector termVector,
InputStream in) {
super(name, store, index, termVector);
this.val = val;
this.in = in;
}
@Override
public String stringValue() {
return val;
}
@Override
public Reader readerValue() {
return BinaryTokenDeserializer.createDummyReader(in);
}
@Override
public byte[] binaryValue() {
return null;
}
@Override
public TokenStream tokenStreamValue() {
return new BinaryTokenDeserializer(in);
}
}
}