/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.outpipe;
import java.util.Date;
import com.bizosys.hsearch.index.DocMeta;
import com.bizosys.hsearch.query.DocMetaWeight;
import com.bizosys.hsearch.query.HQuery;
import com.bizosys.hsearch.query.QueryContext;
import com.bizosys.hsearch.query.QueryPlanner;
import com.bizosys.hsearch.query.QueryResult;
import com.bizosys.hsearch.query.QueryTerm;
import com.bizosys.hsearch.util.IpUtil;
import com.bizosys.oneline.ApplicationFault;
import com.bizosys.oneline.SystemFault;
import com.bizosys.oneline.conf.Configuration;
import com.bizosys.oneline.pipes.PipeOut;
/**
* Ranks the document based on term and meta information.
* Following criterias are taken for finding the dynamic ranking
* <lu>
* <li>Freshness</li>
* <li>IP Proximity</li>
* <li>Sighting on author Tag words</li>
* <li>Sighting on User Tag words</li>
* </lu>
* @author karan
*
*/
public class ComputeDynamicRanking implements PipeOut{
private static final boolean DEBUG_ENABLED = OutpipeLog.l.isDebugEnabled();
public ComputeDynamicRanking() {
}
public void visit(Object objQuery, boolean multiWriter) throws ApplicationFault, SystemFault {
HQuery query = (HQuery) objQuery;
QueryResult result = query.result;
if ( null == result.sortedDynamicWeights) return;
QueryContext ctx = query.ctx;
QueryPlanner plan = query.planner;
int ipHouse = ( null == ctx.ipAddress ) ? 0 : IpUtil.computeHouse(ctx.ipAddress);
int wtFreshness=0,wtIpProxim=0,wtSocial=0,wtTags=0;
StringBuilder log = new StringBuilder();
int logTop10 = 0;
if ( DEBUG_ENABLED) {
log.append("wtTerm|wtFreshness|wtIpProxim|wtSocial|wtTags|meta.weight\n");
}
float finalWeight = 0;
for (Object metaO : result.sortedDynamicWeights) {
DocMetaWeight meta = (DocMetaWeight) metaO;
wtFreshness = this.scoreFreshness(meta, ctx);
if ( 0 != ipHouse )wtIpProxim = this.scoreIpProximity(meta, ipHouse, ctx);
wtSocial = this.scoreSocialText(meta, plan, ctx);
wtTags = this.scoreTags(meta, plan, ctx);
if ( DEBUG_ENABLED ) {
if ( logTop10 < 10) {
logTop10++;
log.append(meta.serialId).append(">").append(meta.termWeight).
append("|").append(wtFreshness).append("|").append(wtIpProxim).
append("|").append(wtSocial).append("|").append(wtTags).append("|").
append(meta.weight).append('\n');
}
}
finalWeight = meta.weight * ctx.boostDocumentWeight;
finalWeight = finalWeight + meta.termWeight; // Ternm weight is already boosted during static ranking
finalWeight = finalWeight + wtFreshness;
finalWeight = finalWeight + wtIpProxim;
finalWeight = finalWeight + wtSocial;
finalWeight = finalWeight + wtTags;
meta.weight = (int) (finalWeight * 100);
}
if ( DEBUG_ENABLED ) {
OutpipeLog.l.debug("ComputeDynamicRanking > " + log.toString());
}
DocMetaWeight.sort(result.sortedDynamicWeights);
}
private int scoreFreshness(DocMeta meta, QueryContext ctx) {
Date referenceDate = meta.modifiedOn;
if ( null == referenceDate ) {
referenceDate = meta.createdOn;
}
if ( null == referenceDate ) return 0;
double totalScore = System.currentTimeMillis() - referenceDate.getTime();
totalScore = 100 - (totalScore / 1170000000L);
int score = new Double(totalScore).intValue();
if ( score < 0 ) score = 0;
return (score * ctx.boostFreshness);
}
private int scoreIpProximity(DocMeta meta, int ipHouse, QueryContext ctx) {
int ipScore = meta.ipHouse - ipHouse;
ipScore = (ipScore < 0) ? ipScore * -1 : ipScore;
if ( ipScore == 0) {
ipScore = 1; //Complete Match
} else {
ipScore = new Double(100 - (Math.log10(ipScore) / 9 * 100)).intValue();
ipScore = (ipScore < 0) ? ipScore * -1 : ipScore;
}
if ( 0 != ipScore) ipScore = ipScore/3;
return ( ipScore * ctx.boostIpProximity);
}
/**
* For each social text found in matching to term work
* 1 point is contributed for ranking.
* @param meta
* @param planner
* @return
*/
private int scoreSocialText(DocMeta meta, QueryPlanner planner, QueryContext ctx) {
if (null == meta.socialText) return 0;
int socialRanking = 0;
if ( null != planner.mustTerms) {
for (QueryTerm term : planner.mustTerms) {
if ( meta.socialText.indexOf(term.wordOrigLower) >= 0 ) {
socialRanking++;
}
}
}
if ( null != planner.optionalTerms) {
for (QueryTerm term : planner.optionalTerms) {
if ( meta.socialText.indexOf(term.wordOrigLower) >= 0 ) {
socialRanking++;
}
}
}
return ( socialRanking * ctx.boostChoices);
}
/**
* For each term word found in social text which matches the provided tag
* word, 1 point is contributed for ranking.
* @param meta
* @param planner
* @return
*/
private int scoreTags(DocMeta meta, QueryPlanner planner, QueryContext ctx) {
if (null == meta.tags) return 0;
int tagRanking = 0;
if ( null != planner.mustTerms) {
for (QueryTerm term : planner.mustTerms) {
if ( meta.tags.indexOf(term.wordOrigLower) >= 0 ) {
tagRanking++;
}
}
}
if ( null != planner.optionalTerms) {
for (QueryTerm term : planner.optionalTerms) {
if ( meta.tags.indexOf(term.wordOrigLower) >= 0 ) {
tagRanking++;
}
}
}
return tagRanking;
}
public void commit(boolean multiWriter) throws ApplicationFault, SystemFault {
}
public PipeOut getInstance() {
return this;
}
public void init(Configuration conf) throws ApplicationFault, SystemFault {
}
public String getName() {
return "ComputeDynamicRanking";
}
}