/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.cascade.model.potential;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.index.ProximityPostingsReader;
import ivory.smrf.model.builder.ExpressionGenerator;
import ivory.smrf.model.potential.QueryPotential;
import ivory.smrf.model.score.ScoringFunction;
import java.util.Arrays;
/**
* @author Lidan Wang
*
*/
public class CascadeQueryPotential extends QueryPotential {
//If it's a term feature, store its positions at current document in the postings list
private int [] positions;
private int document_length;
public CascadeQueryPotential() {
}
public CascadeQueryPotential(RetrievalEnvironment env, ExpressionGenerator generator,
ScoringFunction scoringFunction) {
super(env, generator, scoringFunction);
}
public long termCollectionCF(){
return termEvidence.getCf();
}
public int termCollectionDF(){
return termEvidence.getDf();
}
public int getDocno(){
return postingsReader.getDocno();
}
@Override
public float computePotential() {
// If there are no postings associated with this potential then just
// return the default score.
if (postingsReader == null) {
return DEFAULT_SCORE;
}
// Advance postings reader. Invariant: mCurPosting will always point to
// the next posting that has not yet been scored.
while (!endOfList && postingsReader.getDocno() < docNode.getDocno()) {
if (!postingsReader.nextPosting(curPosting)) {
endOfList = true;
}
}
// Compute term frequency.
int tf = 0;
int docLen = env.getDocumentLength(docNode.getDocno());
document_length = -1;
positions = null;
if (docNode.getDocno() == postingsReader.getDocno()) {
document_length = docLen;
//getPositions() only defined for term features
if (termNodes.size()==1){
int [] p = postingsReader.getPositions();
positions = Arrays.copyOf(p, p.length);
}
tf = postingsReader.getTf(); //even if two terms match, tf can be 0, i.e., if they aren't within the window size in the doc
}
float score = scoringFunction.getScore(tf, docLen);
lastScoredDocno = docNode.getDocno();
return score;
}
public void resetPostingsListReader(){
try{
postingsReader.reset();
}
catch(Exception e){
System.out.println("Postings for this query doesn't exist!");
System.exit(-1);
}
endOfList = false;
lastScoredDocno = -1;
}
public int getNumberOfPostings(){
return postingsReader.getNumberOfPostings();
}
public int getWindowSize(){
if ( postingsReader instanceof ProximityPostingsReader) {
return ((ProximityPostingsReader) postingsReader).getWindowSize();
}
return -1;
}
public String getScoringFunctionName(){
if (scoringFunction.toString().indexOf("Dirichlet")!=-1){
return "dirichlet";
}
else if (scoringFunction.toString().indexOf("BM25")!=-1){
return "bm25";
}
else{
return null;
}
}
public ScoringFunction getScoringFunction() {
return scoringFunction;
}
public int [] getPositions(){
return positions;
}
public int getDocLen(){
return document_length;
}
}