/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.ingest; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ingest.IngestAnalysisService.AnalysisServiceHolder; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * Processor that splits fields content into tokens using specified analyzer. * New field value will be an array containing all generated tokens. * Throws exception if the field is null or a type other than string. */ public class AnalyzerProcessor extends AbstractProcessor { public static final String TYPE = "analyzer"; private final String targetField; private final String field; private final String analyzer; private final IngestAnalysisService ingestAnalysisService; AnalyzerProcessor(String tag, String field, String targetField, String analyzer, IngestAnalysisService ingestAnalysisService) { super(tag); this.field = field; this.targetField = targetField; this.analyzer = analyzer; this.ingestAnalysisService = ingestAnalysisService; } String getField() { return field; } String getTargetField() { return targetField; } String getAnalyzer() { return analyzer; } @Override public void execute(IngestDocument document) { Object oldVal = document.getFieldValue(field, Object.class); if (oldVal == null) { throw new IllegalArgumentException("field [" + field + "] is null, cannot be analyzed."); } List<String> tokenList = new ArrayList<>(); if (oldVal instanceof String) { analyze(tokenList, (String) oldVal); } else if (oldVal instanceof ArrayList) { for (Object obj : (ArrayList) oldVal) { analyze(tokenList, obj.toString()); } } else { throw new IllegalArgumentException("field [" + field + "] has type [" + oldVal.getClass().getName() + "] and cannot be analyzed"); } document.setFieldValue(targetField, tokenList); } private void analyze(List<String> tokenList, String val) { AnalysisServiceHolder analysisServiceHolder = ingestAnalysisService.acquireAnalysisServiceHolder(); try { try (TokenStream stream = analysisServiceHolder.tokenStream(analyzer, field, val)) { stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { tokenList.add(term.toString()); } stream.end(); } catch (IOException e) { throw new ElasticsearchException("failed to analyze field [" + field + "]", e); } } finally { analysisServiceHolder.release(); } } @Override public String getType() { return TYPE; } public static class Factory implements Processor.Factory { private final IngestAnalysisService ingestAnalysisService; public Factory(IngestAnalysisService ingestAnalysisService) { this.ingestAnalysisService = ingestAnalysisService; } @Override public AnalyzerProcessor create(Map<String, Processor.Factory> registry, String processorTag, Map<String, Object> config) throws Exception { String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field); String analyzer = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "analyzer"); AnalysisServiceHolder analysisServiceHolder = ingestAnalysisService.acquireAnalysisServiceHolder(); try { if (analysisServiceHolder.hasAnalyzer(analyzer) == false) { throw new IllegalArgumentException("Unknown analyzer [" + analyzer + "]"); } return new AnalyzerProcessor(processorTag, field, targetField, analyzer, ingestAnalysisService); } finally { analysisServiceHolder.release(); } } } }