package experiments.collective.entdoccentric.query;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.util.Version;
import experiments.collective.entdoccentric.LTR.LTRBooleanQuery;
import experiments.collective.entdoccentric.LTR.LearnToRankClause;
import experiments.collective.entdoccentric.LTR.LearnToRankFuzzyQuery;
import experiments.collective.entdoccentric.LTR.LearnToRankQuery;
import experiments.collective.entdoccentric.LTR.LearnToRankTermQuery;
import experiments.collective.entdoccentric.StandardQueryDataObject.EntityObject;
import experiments.collective.entdoccentric.dpo.EntityToDisambiguate;
/**
* This class is responsible for feature setup when using the document centric
* approach. The respective features must be specified in a single method.
*/
public class LearnToRankFeatureSetupDocumentCentric implements
LearnToRankFeatureSetup {
private List<LearnToRankClause> features;
private LearnToRankQuery query;
private Analyzer analyzer;
public LearnToRankFeatureSetupDocumentCentric() {
this.analyzer = new PositionalPorterStopAnalyzer(Version.LUCENE_41);
}
public void setMainQuery(LearnToRankQuery query) {
this.features = new LinkedList<LearnToRankClause>();
this.query = query;
}
public void setSubQueries(EntityObject dataObject) {
features.add(this.query.add(createFeature1(dataObject), "Feature1",
true));
features.add(this.query.add(createFeature2(dataObject), "Feature2",
true));
features.add(this.query.add(createFeature3(dataObject), "Feature3",
false));
features.add(this.query.add(createFeature4(dataObject), "Feature4",
false));
// features.add(this.query.add(createFeature5(dataObject), "Feature5"));
// features.add(this.query.add(createFeature6(dataObject), "Feature6"));
// features.add(this.query.add(createFeature7(dataObject), "Feature7"));
// features.add(this.query.add(createFeature8(dataObject), "Feature8"));
features.get(0).setWeight(0.0056836f);
features.get(1).setWeight(0.0305069f);
features.get(2).setWeight(0.117543f);
features.get(3).setWeight(0.365259f);
// features.get(0).setWeight(0.018896f);
// features.get(1).setWeight(0.0301477f);
// features.get(2).setWeight(0.0951799f);
// features.get(3).setWeight(0.285818f);
}
/**
* Feature 1: cos(Lucene-Score) * sim(t_d, q)
*
* @param keyword
* @return
*/
private Query createFeature1(EntityObject dataObject) {
String keyword = dataObject.getText();
DefaultSimilarity defaultSim = new DefaultSimilarity();
LearnToRankTermQuery fq = new LearnToRankTermQuery(new Term("title",
keyword), defaultSim);
return fq;
}
/**
* Feature 2: cos(Lucene-Score) * sim(a_d, q)
*
* @param dataObject
* @return
*/
private Query createFeature2(EntityObject dataObject) {
String keyword = dataObject.getText();
DefaultSimilarity defaultSim = new DefaultSimilarity();
LearnToRankTermQuery fq = new LearnToRankTermQuery(new Term("abstract",
keyword), defaultSim);
return fq;
}
/**
* Feature 3: cos(Lucene-Score) * sim(t_d, q_c)
*
* @param dataObject
* @return
*/
private Query createFeature3(EntityObject dataObject) {
String sentence = dataObject.getContext();
String[] split = sentence.split(" ");
LTRBooleanQuery bq = new LTRBooleanQuery();
DefaultSimilarity defaultSim = new DefaultSimilarity();
for (int i = 0; i < split.length; i++) {
LearnToRankTermQuery fq = new LearnToRankTermQuery(new Term(
"title", split[i]), defaultSim);
bq.add(fq, Occur.SHOULD);
}
return bq;
}
/**
* Feature 4: cos(Lucene-Score) * sim(a_d, q_c)
*
* @param dataObject
* @return
*/
private Query createFeature4(EntityObject dataObject) {
String sentence = dataObject.getContext();
String[] split = sentence.split(" ");
LTRBooleanQuery bq = new LTRBooleanQuery();
DefaultSimilarity defaultSim = new DefaultSimilarity();
for (int i = 0; i < split.length; i++) {
LearnToRankTermQuery fq = new LearnToRankTermQuery(new Term(
"abstract", split[i]), defaultSim);
bq.add(fq, Occur.SHOULD);
}
return bq;
}
/**
* Feature 5: cos(BM25) * sim(t_d, q)
*
* @param keyword
* @return
*/
private Query createFeature5(EntityObject dataObject) {
String keyword = dataObject.getText();
BM25Similarity bm25 = new BM25Similarity();
LearnToRankFuzzyQuery fq = new LearnToRankFuzzyQuery(new Term("title",
keyword), bm25);
return fq;
}
/**
* Feature 6: cos(Bm25) * sim(a_d, q)
*
* @param dataObject
* @return
*/
private Query createFeature6(EntityObject dataObject) {
String keyword = dataObject.getText();
BM25Similarity bm25 = new BM25Similarity();
LearnToRankFuzzyQuery fq = new LearnToRankFuzzyQuery(new Term(
"abstract", keyword), bm25);
return fq;
}
/**
* Feature 7: cos(BM25) * sim(t_d, q_c)
*
* @param dataObject
* @return
*/
private Query createFeature7(EntityObject dataObject) {
String sentence = dataObject.getContext();
String[] split = sentence.split(" ");
LTRBooleanQuery bq = new LTRBooleanQuery();
BM25Similarity bm25 = new BM25Similarity();
for (int i = 0; i < split.length; i++) {
LearnToRankFuzzyQuery fq = new LearnToRankFuzzyQuery(new Term(
"title", split[i]), bm25);
bq.add(fq, Occur.SHOULD);
}
return bq;
}
/**
* Feature 8: cos(BM25) * sim(a_d, q_c)
*
* @param dataObject
* @return
*/
private Query createFeature8(EntityObject dataObject) {
String sentence = dataObject.getContext();
String[] split = sentence.split(" ");
LTRBooleanQuery bq = new LTRBooleanQuery();
BM25Similarity bm25 = new BM25Similarity();
for (int i = 0; i < split.length; i++) {
LearnToRankFuzzyQuery fq = new LearnToRankFuzzyQuery(new Term(
"abstract", split[i]), bm25);
bq.add(fq, Occur.SHOULD);
}
return bq;
}
@Override
public void setSubQueries(EntityToDisambiguate task) {
// TODO Auto-generated method stub
}
// private String usePorterStemmer(String input) {
// String nextToken = "";
// try {
// TokenStream source = analyzer.tokenStream(null, new StringReader(
// input));
// CharTermAttribute termAtt = source
// .addAttribute(CharTermAttribute.class);
// source.reset();
// if (source.incrementToken()) {
// nextToken = termAtt.toString();
// }
// } catch (IOException e) {
// e.printStackTrace();
// }
// return nextToken;
// }
//
// public void reset() {
// analyzer.close();
// }
}