/* * Copyright (C) 2014 Jörg Prante * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program; if not, see http://www.gnu.org/licenses * or write to the Free Software Foundation, Inc., 51 Franklin Street, * Fifth Floor, Boston, MA 02110-1301 USA. * * The interactive user interfaces in modified source and object code * versions of this program must display Appropriate Legal Notices, * as required under Section 5 of the GNU Affero General Public License. * */ package org.xbib.elasticsearch.index.analysis.icu; import com.ibm.icu.text.Collator; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.collation.ICUCollationAttributeFactory; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.analysis.AbstractTokenizerFactory; import org.elasticsearch.index.settings.IndexSettings; import java.io.Reader; public class IcuCollationTokenizerFactory extends AbstractTokenizerFactory { private final ICUCollationAttributeFactory factory; private final int bufferSize; @Inject public IcuCollationTokenizerFactory(Index index, @IndexSettings Settings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); Collator collator = IcuCollationKeyAnalyzerProvider.createCollator(environment, settings); this.factory = new ICUCollationAttributeFactory(collator); this.bufferSize = settings.getAsInt("buffer_size", KeywordTokenizer.DEFAULT_BUFFER_SIZE); } @Override public Tokenizer create(Reader reader) { return new KeywordTokenizer(factory, reader, bufferSize); } }