/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.analysis; import java.io.Reader; import java.util.Map.Entry; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.util.Version; import org.sindice.siren.analysis.filter.DatatypeAnalyzerFilter; import org.sindice.siren.analysis.filter.PositionAttributeFilter; import org.sindice.siren.analysis.filter.SirenPayloadFilter; import org.sindice.siren.util.JSONDatatype; import org.sindice.siren.util.XSDDatatype; /** * The JsonAnalyzer is especially designed to process JSON data. */ public class JsonAnalyzer extends Analyzer { private Analyzer valueAnalyzer; private Analyzer fieldAnalyzer; private final Version matchVersion; private final CharArrayMap<Analyzer> regAnalyzers; /** * Create a {@link JsonAnalyzer} with the specified {@link Analyzer}s for * field names and values. * <p> * The default analyzer for field names will be associated with the datatype * {@link JSONDatatype#JSON_FIELD}. The default analyzer for values will be * associated with the datatype {@link XSDDatatype#XSD_STRING}. * * @param fieldAnalyzer Default {@link Analyzer} for the field names * @param valueAnalyzer Default {@link Analyzer} for the values */ public JsonAnalyzer(final Version version, final Analyzer fieldAnalyzer, final Analyzer valueAnalyzer) { matchVersion = version; this.valueAnalyzer = valueAnalyzer; this.fieldAnalyzer = fieldAnalyzer; regAnalyzers = new CharArrayMap<Analyzer>(version, 64, false); } public void setValueAnalyzer(final Analyzer analyzer) { valueAnalyzer = analyzer; } public void setFieldAnalyzer(final Analyzer analyzer) { fieldAnalyzer = analyzer; } /** * Assign an {@link Analyzer} to be used with that key. That analyzer is used * to process tokens generated by the {@link JsonTokenizer}. * * @param datatype The datatype key * @param a the associated {@link Analyzer} */ public void registerDatatype(final char[] datatype, final Analyzer a) { if (!regAnalyzers.containsKey(datatype)) { regAnalyzers.put(datatype, a); } } /** * Remove all registered Datatype {@link Analyzer}s. */ public void clearDatatypes() { regAnalyzers.clear(); } @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final JsonTokenizer source = new JsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(matchVersion, source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); } }