/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.cascade.model;
import ivory.cascade.model.potential.CascadeQueryPotential;
import ivory.smrf.model.Clique;
import ivory.smrf.model.GraphNode;
import ivory.smrf.model.Parameter;
import ivory.smrf.model.potential.PotentialFunction;
import ivory.smrf.model.score.ScoringFunction;
import java.util.List;
/**
* @author Lidan Wang
*/
public class CascadeClique extends Clique {
// For estimating cascade cost
static float term_unit_cost = 1;
static float ordered_unit_cost = 20;
static float unordered_unit_cost = 20;
// Cascade stage
private int cascadeStage;
private String pruningFunction = "";
private float pruningParameter = -1;
public float cost;
private String[] singleTerms;
public CascadeClique(List<GraphNode> nodes, PotentialFunction f, Parameter weight,
int cascadeStage, String pruner_and_params) {
this(nodes, f, weight, 1.0f, null, true, cascadeStage, pruner_and_params);
}
public CascadeClique(List<GraphNode> nodes, PotentialFunction f, Parameter param,
float importance, Type type, boolean docDependent, int cascadeStage, String pruner_and_params) {
super(nodes, f, param, importance, type, docDependent);
String concept = getConcept();
String[] t = concept.trim().toLowerCase().split("\\s+");
singleTerms = new String[t.length];
for (int i = 0; i < t.length; i++) {
singleTerms[i] = t[i];
}
this.cascadeStage = cascadeStage;
if (pruner_and_params.indexOf("null") == -1) {
String[] tokens = pruner_and_params.trim().split("\\s+");
pruningFunction = tokens[0];
pruningParameter = (float) (Double.parseDouble(tokens[1]));
}
}
// If it's a term, then return positions at the current document
// not supported if it's term proximity feature!
public int[] getPositions() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getPositions();
}
public int getDocLen() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getDocLen();
}
// reset postings readers
public void resetPostingsListReader() {
PotentialFunction potential = getPotentialFunction();
((CascadeQueryPotential) potential).resetPostingsListReader();
}
public String getPruningFunction() {
return pruningFunction;
}
public float getPruningParameter() {
return pruningParameter;
}
public void setPruningFunction(String pruner) {
this.pruningFunction = pruner;
}
public void setPruningParametes(float pruner_param) {
pruningParameter = pruner_param;
}
public int getCascadeStage() {
return cascadeStage;
}
public void setCascadeStage(int cs) {
cascadeStage = cs;
}
// Collection CF of this term/bigram
public long termCollectionCF() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).termCollectionCF();
}
public int termCollectionDF() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).termCollectionDF();
}
public void setType(Type type) {
super.setType(type);
if (type == Clique.Type.Term) {
cost = term_unit_cost;
} else if (type == Clique.Type.Unordered) {
cost = unordered_unit_cost;
} else if (type == Clique.Type.Ordered) {
cost = ordered_unit_cost;
} else {
throw new RuntimeException("Invalid type " + type);
}
}
public int getDocno() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getDocno();
}
public int getNumberOfPostings() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getNumberOfPostings();
}
public int getWindowSize() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getWindowSize();
}
public String getScoringFunctionName() { // dirichlet, bm25
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getScoringFunctionName();
}
public ScoringFunction getScoringFunction() {
PotentialFunction potential = getPotentialFunction();
return ((CascadeQueryPotential) potential).getScoringFunction();
}
public String[] getSingleTerms() {
return singleTerms;
}
public String getParamID() { // termWt, orderedWt, unorderedWt
return getParameter().getName();
}
public String toString() {
StringBuilder s = new StringBuilder();
s.append("<clique type=\"").append(getType()).append("\">");
s.append("<terms>").append(getConcept()).append("</terms>");
s.append("<terms>")
.append(getConcept())
.append("</terms>")
.append(" wgts " + getWeight())
.append(" pruner_and_param " + getPruningFunction() + " " + getPruningParameter())
.append("cascadeStage " + getCascadeStage())
.append(" unit_cost " + cost)
.append(" cliqueType " + getType());
s.append("</clique>");
return s.toString();
}
}