/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.outpipe; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import com.bizosys.hsearch.index.TermList; import com.bizosys.hsearch.query.DocWeight; import com.bizosys.hsearch.query.HQuery; import com.bizosys.hsearch.query.QueryContext; import com.bizosys.hsearch.query.QueryPlanner; import com.bizosys.hsearch.query.QueryResult; import com.bizosys.hsearch.query.QueryTerm; import com.bizosys.hsearch.util.ObjectFactory; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.pipes.PipeOut; /** * Static ranking is done based on term weight * @author karan * */ public class ComputeStaticRanking implements PipeOut{ Map<String, DocWeight> docWeightMap = null; int highGradesLimit = 1000; /** * DEfault Constructor follows with a init */ public ComputeStaticRanking() { } /** * Subsequent constructor.. Initializes with default settings * @param dynamicRanked */ public ComputeStaticRanking(int dynamicRanked) { this.highGradesLimit = dynamicRanked; } public void visit(Object objQuery, boolean multiWriter) throws ApplicationFault, SystemFault { OutpipeLog.l.debug("ComputeStaticRank ENTER"); HQuery query = (HQuery) objQuery; QueryContext ctx = query.ctx; QueryPlanner planner = query.planner; QueryResult result = query.result; Collection<DocWeight> highGrades = computeWeight(ctx, planner); if ( OutpipeLog.l.isDebugEnabled()) { if ( null == highGrades) OutpipeLog.l.debug("ComputeStaticRank NONE"); else OutpipeLog.l.debug("ComputeStaticRank TOTAL = " + highGrades.size()); } gradeBasedSorting(result, highGrades); highGrades.clear(); highGrades = null; } /** * Compute the static weight * @param ctx * @param planner * @return */ private Collection<DocWeight> computeWeight(QueryContext ctx, QueryPlanner planner) { float thisWt = -1; String mappedDocId = null; String idPrefix = ""; this.docWeightMap = ObjectFactory.getInstance().getDocWeightMap(); for ( List<QueryTerm> qts : planner.sequences) { if ( null == qts) continue; for ( QueryTerm qt : qts) { if ( null == qt) continue; Map<Long, TermList> founded = qt.foundIds; if ( null == founded) continue; for ( Long bucket: founded.keySet()) { TermList tl = founded.get(bucket); if ( null == tl) continue; idPrefix = bucket.toString() + "_"; int bytePos = -1; for ( short docPos : tl.docPos ) { if ( -1 == docPos) continue; bytePos++; thisWt = tl.termWeight[bytePos]; if ( thisWt < 0 ) thisWt = 0; if ( qt.preciousNess > 0) thisWt = thisWt * qt.preciousNess; thisWt = thisWt * ctx.boostTermWeight; mappedDocId = idPrefix + docPos; if ( docWeightMap.containsKey(mappedDocId) ) { docWeightMap.get(mappedDocId).add(thisWt); } else { docWeightMap.put(mappedDocId, new DocWeight(bucket, docPos, thisWt) ); } } tl.cleanup(); } founded.clear(); } } planner.sequences.clear(); return docWeightMap.values(); } /** * Deduct Maximum and minimum range of the document weights * @param values Document Weight Collection * @return min,max values as array */ private float[] getMinMaxScore(Collection<DocWeight> values) { float max = -999999.00F; float min = 999999.00F; for (DocWeight weight : values) { if ( weight.wt > max) max = weight.wt; if ( min > weight.wt) min = weight.wt; } return new float[] { min, max }; } /** * Grade to 0-10 based on weight ranges computed based on max and min value * @param values DocWeight collections * @param minLimit Maximum Weight * @param maxLimit Minimum Weight * @return */ private Collection<DocWeight> keepHighGrades( Collection<DocWeight> values, float minLimit, float maxLimit) { if ( OutpipeLog.l.isInfoEnabled()) OutpipeLog.l.info( "Static Ranking Range Min/Max: " + minLimit + "/" + maxLimit ); if ( minLimit == maxLimit) return values; float diff = (maxLimit - minLimit) / 10; float[] gradesRanges = new float[] {minLimit, minLimit + diff, minLimit + 2 * diff, minLimit + 3 * diff, minLimit + 4 * diff, minLimit + 5 * diff, minLimit + 6 * diff, minLimit + 7 * diff, minLimit + 8 * diff, minLimit + 9 * diff }; int[] gradesTotals = new int[]{0,0,0,0,0,0,0,0,0,0}; for (DocWeight weight : values) { if (weight.wt <= gradesRanges[1]) gradesTotals[0]++; else if (weight.wt <= gradesRanges[2]) gradesTotals[1]++; else if (weight.wt <= gradesRanges[3]) gradesTotals[2]++; else if (weight.wt <= gradesRanges[4]) gradesTotals[3]++; else if (weight.wt <= gradesRanges[5]) gradesTotals[4]++; else if (weight.wt <= gradesRanges[6]) gradesTotals[5]++; else if (weight.wt <= gradesRanges[7]) gradesTotals[6]++; else if (weight.wt <= gradesRanges[8]) gradesTotals[7]++; else if (weight.wt <= gradesRanges[9]) gradesTotals[8]++; else gradesTotals[9]++; } int total = 0; int cutoffIndex; for (cutoffIndex=9; cutoffIndex>-1; cutoffIndex-- ) { total = total + gradesTotals[cutoffIndex]; if ( total > highGradesLimit) break; } if ( cutoffIndex == -1) return values; //Included All //Remove all the low grades Iterator<DocWeight> valuesI = values.iterator(); int valuesT = values.size(); for ( int j=0; j<valuesT; j++ ) { DocWeight weight = valuesI.next(); if ( weight.wt <= gradesRanges[cutoffIndex]) valuesI.remove(); valuesT--; j--; } return values; } private void gradeBasedSorting(QueryResult result, Collection<DocWeight> highGrades) throws SystemFault { float[] minMax = null; while ( true) { minMax = getMinMaxScore(highGrades); if ( minMax[0] == minMax[1]) break; int existingT = highGrades.size(); highGrades = keepHighGrades(highGrades,minMax[0],minMax[1]); int newT = highGrades.size(); if ( OutpipeLog.l.isDebugEnabled()) OutpipeLog.l.debug( "ComputeStaticRank Gradation :" + existingT + "/" + newT); if ( existingT == newT) break; } result.sortedStaticWeights = highGrades.toArray(); if ( minMax[0] != minMax[1] ) DocWeight.sort(result.sortedStaticWeights); } public boolean commit() throws ApplicationFault, SystemFault { ObjectFactory.getInstance().putDocWeightMap(docWeightMap); return true; } public PipeOut getInstance() { return new ComputeStaticRanking(this.highGradesLimit); } public void init(Configuration conf) throws ApplicationFault, SystemFault { this.highGradesLimit = conf.getInt("meta.fetch.limit", 100) * 2; } public void commit(boolean multiWriter) throws ApplicationFault, SystemFault { } public String getName() { return "ComputeStaticRanking"; } }