/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.outpipe; import java.util.List; import com.bizosys.hsearch.query.HQuery; import com.bizosys.hsearch.query.QueryContext; import com.bizosys.hsearch.query.QueryPlanner; import com.bizosys.hsearch.query.QueryTerm; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.pipes.PipeOut; /** * Preciousness is computed after finding detail about the word from * the dictionary. Dictionary maintains number of documents who have * the word in their term vector. More documents containing a word, * the lesser precious it it. * @author karan * */ public class ComputePreciousness implements PipeOut{ public ComputePreciousness() { } public void visit(Object objQuery, boolean multiWriter) throws ApplicationFault, SystemFault { HQuery query = (HQuery) objQuery; @SuppressWarnings("unused") QueryContext ctx = query.ctx; QueryPlanner planner = query.planner; /** * Go through the list to find which one maximim occuring * Compute based on that from 0-1 scale the preciousness */ int maxOccurance1 = computeMaximimOccurance(planner.mustTerms); int maxOccurance2 = computeMaximimOccurance(planner.optionalTerms); int maxOccurance = ( maxOccurance1 > maxOccurance2) ? maxOccurance1 : maxOccurance2; /** if ( 0 == maxOccurance) { OutpipeLog.l.info("Word not recognized " + ctx.queryString); throw new ApplicationFault("Word not Recognized : " + ctx.queryString); } */ computePreciousness(planner.mustTerms, maxOccurance); computePreciousness(planner.optionalTerms, maxOccurance); } /** * Compute the maximum occurance instance. * @param queryWordL * @return * @throws ApplicationFault */ private int computeMaximimOccurance(List<QueryTerm> queryWordL) throws ApplicationFault { int maxOccurance = 0; if ( null == queryWordL) return 0; for (QueryTerm term : queryWordL) { if ( null == term.foundTerm) continue; if ( term.foundTerm.frequency > maxOccurance) maxOccurance = term.foundTerm.frequency; } return maxOccurance; } /** * 1 is most previous and 0 is least precious * Less found terms are more precious in nature * @param queryWordL * @param maxOccurance * @throws ApplicationFault */ private void computePreciousness(List<QueryTerm> queryWordL, int maxOccurance) throws ApplicationFault { if ( null == queryWordL) return; for (QueryTerm term : queryWordL) { if ( null == term.foundTerm) continue; if ( 0 == maxOccurance ) maxOccurance = 1; term.preciousNess = 1 - ( term.foundTerm.frequency / maxOccurance); if ( 0 == term.preciousNess) term.preciousNess = 0.01f; } } public void commit(boolean multiWriter) throws ApplicationFault, SystemFault { } public PipeOut getInstance() { return this; } public void init(Configuration conf) throws ApplicationFault, SystemFault { } public String getName() { return "ComputePreciousness"; } }