package org.apache.solr.schema; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.lang.reflect.Constructor; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource.State; import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.response.TextResponseWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Pre-analyzed field type provides a way to index a serialized token stream, * optionally with an independent stored value of a field. */ public class PreAnalyzedField extends FieldType { private static final Logger LOG = LoggerFactory.getLogger(PreAnalyzedField.class); /** Init argument name. Value is a fully-qualified class name of the parser * that implements {@link PreAnalyzedParser}. */ public static final String PARSER_IMPL = "parserImpl"; private static final String DEFAULT_IMPL = JsonPreAnalyzedParser.class.getName(); private PreAnalyzedParser parser; @Override protected void init(IndexSchema schema, Map<String, String> args) { super.init(schema, args); String implName = args.get(PARSER_IMPL); if (implName == null) { parser = new JsonPreAnalyzedParser(); } else { try { Class<?> implClazz = Class.forName(implName); if (!PreAnalyzedParser.class.isAssignableFrom(implClazz)) { throw new Exception("must implement " + PreAnalyzedParser.class.getName()); } Constructor<?> c = implClazz.getConstructor(new Class<?>[0]); parser = (PreAnalyzedParser) c.newInstance(new Object[0]); } catch (Exception e) { LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName + "' (" + e.getMessage() + "), using default " + DEFAULT_IMPL); parser = new JsonPreAnalyzedParser(); } } } @Override public Analyzer getAnalyzer() { return new SolrAnalyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { return new TokenStreamComponents(new PreAnalyzedTokenizer(reader, parser)); } }; } @Override public Analyzer getQueryAnalyzer() { return getAnalyzer(); } @Override public IndexableField createField(SchemaField field, Object value, float boost) { IndexableField f = null; try { f = fromString(field, String.valueOf(value), boost); } catch (Exception e) { e.printStackTrace(); return null; } return f; } @Override public SortField getSortField(SchemaField field, boolean top) { return getStringSort(field, top); } @Override public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeStr(name, f.stringValue(), true); } /** Utility method to convert a field to a string that is parse-able by this * class. * @param f field to convert * @return string that is compatible with the serialization format * @throws IOException If there is a low-level I/O error. */ public String toFormattedString(Field f) throws IOException { return parser.toFormattedString(f); } /** * This is a simple holder of a stored part and the collected states (tokens with attributes). */ public static class ParseResult { public String str; public byte[] bin; public List<State> states = new LinkedList<State>(); } /** * Parse the input and return the stored part and the tokens with attributes. */ public static interface PreAnalyzedParser { /** * Parse input. * @param reader input to read from * @param parent parent who will own the resulting states (tokens with attributes) * @return parse result, with possibly null stored and/or states fields. * @throws IOException if a parsing error or IO error occurs */ public ParseResult parse(Reader reader, AttributeSource parent) throws IOException; /** * Format a field so that the resulting String is valid for parsing with {@link #parse(Reader, AttributeSource)}. * @param f field instance * @return formatted string * @throws IOException If there is a low-level I/O error. */ public String toFormattedString(Field f) throws IOException; } public IndexableField fromString(SchemaField field, String val, float boost) throws Exception { if (val == null || val.trim().length() == 0) { return null; } PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(new StringReader(val), parser); parse.reset(); // consume Field f = (Field)super.createField(field, val, boost); if (parse.getStringValue() != null) { f.setStringValue(parse.getStringValue()); } else if (parse.getBinaryValue() != null) { f.setBytesValue(parse.getBinaryValue()); } else { f.fieldType().setStored(false); } if (parse.hasTokenStream()) { f.fieldType().setIndexed(true); f.fieldType().setTokenized(true); f.setTokenStream(parse); } return f; } /** * Token stream that works from a list of saved states. */ private static class PreAnalyzedTokenizer extends Tokenizer { private final List<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>(); private Iterator<AttributeSource.State> it = null; private String stringValue = null; private byte[] binaryValue = null; private PreAnalyzedParser parser; private Reader lastReader; public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) { super(reader); this.parser = parser; } public boolean hasTokenStream() { return !cachedStates.isEmpty(); } public String getStringValue() { return stringValue; } public byte[] getBinaryValue() { return binaryValue; } public final boolean incrementToken() { // lazy init the iterator if (it == null) { it = cachedStates.iterator(); } if (!it.hasNext()) { return false; } AttributeSource.State state = (State) it.next(); restoreState(state.clone()); return true; } @Override public final void reset() throws IOException { // NOTE: this acts like rewind if you call it again if (input != lastReader) { lastReader = input; cachedStates.clear(); stringValue = null; binaryValue = null; ParseResult res = parser.parse(input, this); if (res != null) { stringValue = res.str; binaryValue = res.bin; if (res.states != null) { cachedStates.addAll(res.states); } } } it = cachedStates.iterator(); } @Override public void close() throws IOException { super.close(); lastReader = null; // just a ref, null for gc } } }