package org.apache.solr.schema; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.core.LowerCaseFilterFactory; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory; import org.apache.lucene.analysis.miscellaneous.TrimFilterFactory; import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.solr.analysis.DiagnoseFilterFactory; import org.apache.solr.common.SolrException; /** * StrField with configurable tokenizer chain. * * @author rchyla * */ public class AqpStrField extends StrField { public AqpStrField() { Map<String, String> replArgs = new HashMap<String, String>(); replArgs.put("pattern", "(doi:|arxiv:|\\-)"); List<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>(); filters.add(new DiagnoseFilterFactory(new HashMap<String, String>())); filters.add(new RemoveDuplicatesTokenFilterFactory(new HashMap<String, String>())); filters.add(new TrimFilterFactory(new HashMap<String, String>())); filters.add(new PatternReplaceFilterFactory(replArgs)); filters.add(new LowerCaseFilterFactory(new HashMap<String, String>())); filters.add(new ASCIIFoldingFilterFactory(new HashMap<String, String>())); TokenFilterFactory[] filterArr = new TokenFilterFactory[filters.size()]; filters.toArray(filterArr); } @Override public boolean isTokenized() { return true; } protected boolean supportsAnalyzers() { return true; } /** @Override public Analyzer getIndexAnalyzer() { return indexAnalyzer; } @Override public Analyzer getQueryAnalyzer() { return queryAnalyzer; } @Override public String toInternal(String val) { // - used in delete when a Term needs to be created. // - used by the default getTokenizer() and createField() return val; } **/ @Override public IndexableField createField(SchemaField field, Object value, float boost) { if (!field.indexed() && !field.stored()) { return null; } String val; try { val = toInternal(value.toString()); } catch (RuntimeException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error while creating field '" + field + "' from value '" + value + "'", e); } if (val==null) return null; org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType(); newType.setTokenized(true); // force it newType.setStored(field.stored()); newType.setOmitNorms(field.omitNorms()); newType.setIndexOptions(field.indexed() ? getIndexOptions(field, val) : IndexOptions.NONE); newType.setStoreTermVectors(field.storeTermVector()); newType.setStoreTermVectorOffsets(field.storeTermOffsets()); newType.setStoreTermVectorPositions(field.storeTermPositions()); newType.setStoreTermVectorPayloads(field.storeTermPayloads()); return createField(field.getName(), val, newType, boost); } /** @Override public List<IndexableField> createFields(SchemaField field, Object value, float boost) { //if (!field.multiValued()) // return super.createFields(field, value, boost); List<IndexableField> output = new ArrayList<IndexableField>(); for (String v: value.toString().split("\\s")) { IndexableField fval = createField(field, v, boost); if (field.hasDocValues()) { IndexableField docval; final BytesRef bytes = new BytesRef(v.toString()); if (field.multiValued()) { docval = new SortedSetDocValuesField(field.getName(), bytes); } else { docval = new SortedDocValuesField(field.getName(), bytes); } // Only create a list of we have 2 values... if (fval != null) { List<IndexableField> fields = new ArrayList<>(2); fields.add(fval); fields.add(docval); return fields; } fval = docval; } output.add(fval); } return output; } **/ }