package com.code972.elasticsearch.plugins.rest.action; import com.code972.elasticsearch.HebrewAnalysisPlugin; import com.code972.hebmorph.WordType; import com.code972.hebmorph.datastructures.DictHebMorph; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.hebrew.HebrewAnalyzer; import org.apache.lucene.analysis.hebrew.HebrewQueryLightAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.client.Client; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.rest.*; import java.io.IOException; import java.util.ArrayList; import java.util.List; import static org.elasticsearch.rest.RestRequest.Method.GET; /** * REST endpoint for getting lemmas for a given word */ public class RestHebrewAnalyzerCheckWordAction extends BaseRestHandler { @Inject public RestHebrewAnalyzerCheckWordAction(Settings settings, RestController controller) { super(settings); controller.registerHandler(GET, "/_hebrew/check-word/{word}", this); } @Override protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient nodeClient) throws IOException { final String word = request.param("word"); final boolean tolerate = request.paramAsBoolean("tolerate", true); return channel -> { final DictHebMorph dict; if ((dict = HebrewAnalysisPlugin.getDictionary()) == null) throw new IllegalStateException("Dictionary was not initialized"); final XContentBuilder builder = channel.newBuilder().startObject(); final WordType wordType = HebrewAnalyzer.isRecognizedWord(word, tolerate, dict); builder.field("word", word); builder.field("wordType", wordType); if (wordType != WordType.UNRECOGNIZED && wordType != WordType.NON_HEBREW) { builder.startArray("lemmas"); for (String lemma : getLemmas(word, dict)) { builder.value(lemma); } builder.endArray(); } builder.endObject(); channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); }; } private List<String> getLemmas(final String word, final DictHebMorph dict) throws IOException { final List<String> ret = new ArrayList<>(); final Analyzer a = new HebrewQueryLightAnalyzer(dict); final TokenStream ts = a.tokenStream("foo", word); ts.reset(); while (ts.incrementToken()) { CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class); ret.add(new String(cta.buffer(), 0, cta.length())); } ts.close(); return ret; } }