SenseiMapReduce.java example

Explorer
sensei-master
/**
 * This software is licensed to you under the Apache License, Version 2.0 (the
 * "Apache License").
 *
 * LinkedIn's contributions are made under the Apache License. If you contribute
 * to the Software, the contributions will be deemed to have been made under the
 * Apache License, unless you expressly indicate otherwise. Please do not make any
 * contributions that would be inconsistent with the Apache License.
 *
 * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, this software
 * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
 * License for the specific language governing permissions and limitations for the
 * software governed under the Apache License.
 *
 * © 2012 LinkedIn Corp. All Rights Reserved.  
 */
package com.senseidb.search.req.mapred;

import java.io.Serializable;
import java.util.List;

import org.json.JSONObject;

/**
 * By extending this interface, one can access the Sensei segment data, produce intermediate results, aggregate those results on 
 * the partition, node and cluster level. This is much simple than to implement your own facet handler. 
 * Also this allows to enhance the bobo/Sensei merging logic
 *
 * @param <MapResult>
 * @param <ReduceResult>
 */
public interface SenseiMapReduce<MapResult extends Serializable, ReduceResult extends Serializable> extends Serializable {
  /**
   * "mapReduce":{"function":"com.senseidb.search.req.mapred.functions.MaxMapReduce","parameters":{"column":"groupid"}} 
   * the argument corresponds to the parameters object in Json request. It is used to initialize the mapred job
   * 
   */
  public void init(JSONObject params);
  /**
   * The map function. It can get the docId  from the docIds array containing value from 0 to docIdCount. 
   * All the docIds with array indexes >= docIdCount should be ignored
   * One can simply get the document's uid by calling uids[docId]
   * @param docIds
   * @param docIdCount
   * @param uids
   * @param accessor is used to get field's values 
   * @param facetCountsAccessor 
   * @return arbitrary map function results
   */
  public MapResult map(int[] docIds, int docIdCount, long[] uids, FieldAccessor accessor, FacetCountAccessor facetCountsAccessor);
  /**
   * Merge map results objects to reduce memory and serialization costs. If this method will not merge map results, there is a high chance, that you'd get 
   * outOfMemory in case there is a significant number of documents indexed
   * @param mapResults
   * @return
   */
  public List<MapResult>  combine(List<MapResult> mapResults, CombinerStage combinerStage);
  /**
   * Reduce the merged map results
   * @param combineResults
   * @return
   */
  public ReduceResult  reduce(List<MapResult> combineResults);
  /**
   * Converts the result of the reduce function into JsonObject, so that it can be sent back to the client
   * @param reduceResult
   * @return
   */
  public JSONObject  render(ReduceResult reduceResult);

  public String[] getColumns();
}