/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * © 2012 LinkedIn Corp. All Rights Reserved. */ package com.senseidb.search.req.mapred; import java.io.Serializable; import java.util.List; import org.json.JSONObject; /** * By extending this interface, one can access the Sensei segment data, produce intermediate results, aggregate those results on * the partition, node and cluster level. This is much simple than to implement your own facet handler. * Also this allows to enhance the bobo/Sensei merging logic * * @param <MapResult> * @param <ReduceResult> */ public interface SenseiMapReduce<MapResult extends Serializable, ReduceResult extends Serializable> extends Serializable { /** * "mapReduce":{"function":"com.senseidb.search.req.mapred.functions.MaxMapReduce","parameters":{"column":"groupid"}} * the argument corresponds to the parameters object in Json request. It is used to initialize the mapred job * */ public void init(JSONObject params); /** * The map function. It can get the docId from the docIds array containing value from 0 to docIdCount. * All the docIds with array indexes >= docIdCount should be ignored * One can simply get the document's uid by calling uids[docId] * @param docIds * @param docIdCount * @param uids * @param accessor is used to get field's values * @param facetCountsAccessor * @return arbitrary map function results */ public MapResult map(int[] docIds, int docIdCount, long[] uids, FieldAccessor accessor, FacetCountAccessor facetCountsAccessor); /** * Merge map results objects to reduce memory and serialization costs. If this method will not merge map results, there is a high chance, that you'd get * outOfMemory in case there is a significant number of documents indexed * @param mapResults * @return */ public List<MapResult> combine(List<MapResult> mapResults, CombinerStage combinerStage); /** * Reduce the merged map results * @param combineResults * @return */ public ReduceResult reduce(List<MapResult> combineResults); /** * Converts the result of the reduce function into JsonObject, so that it can be sent back to the client * @param reduceResult * @return */ public JSONObject render(ReduceResult reduceResult); public String[] getColumns(); }