/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.script.pmml; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.plugin.TokenPlugin; import org.elasticsearch.script.AbstractSearchScript; import org.elasticsearch.script.ExecutableScript; import org.elasticsearch.script.NativeScriptFactory; import org.elasticsearch.ml.modelinput.DataSource; import org.elasticsearch.ml.modelinput.EsDataSource; import org.elasticsearch.ml.modelinput.VectorRangesToVector; import org.elasticsearch.ml.modelinput.VectorRangesToVectorJSON; import org.elasticsearch.search.lookup.LeafDocLookup; import org.elasticsearch.search.lookup.LeafIndexLookup; import java.util.Map; /** * Can read json def and return sparse vectors with tfs. */ public class VectorScriptFactory implements NativeScriptFactory { public static final String NAME = "doc_to_vector"; public VectorScriptFactory() { } @Override public ExecutableScript newScript(@Nullable Map<String, Object> params) { if (params == null || params.containsKey("spec") == false) { throw new IllegalArgumentException("the spec parameter is required"); } Map<String, Object> spec = XContentMapValues.nodeMapValue(params.get("spec"), "spec"); // TODO: Add caching mechanism VectorRangesToVector features = new VectorRangesToVectorJSON(spec); return new VectorizerScript(features); } @Override public boolean needsScores() { // TODO: can we reliably know if a vectorizer script does not make use of _score return false; } @Override public String getName() { return NAME; } public static class VectorizerScript extends AbstractSearchScript { private final VectorRangesToVector features; private DataSource dataSource; /** * Factory that is registered in * {@link TokenPlugin#onModule(org.elasticsearch.script.ScriptModule)} * method when the plugin is loaded. */ private VectorizerScript(VectorRangesToVector features) { this.features = features; dataSource = new EsDataSource() { @Override protected LeafDocLookup getDocLookup() { return doc(); } @Override protected LeafIndexLookup getLeafIndexLookup() { return indexLookup(); } }; } @Override public Object run() { return features.convert(dataSource).getAsMap(); } } }