package ivory.ffg.feature;
import ivory.ffg.score.ScoringFunction;
import ivory.ffg.stats.GlobalStats;
/**
* Implementation of a phrase feature (unordered-window, sequential-dependence model)
*
* @author Nima Asadi
*/
public class UnorderedWindowSequentialDependenceFeature implements Feature {
private int window;
private ScoringFunction scoringFunction;
/**
* @param window Window size
*/
public UnorderedWindowSequentialDependenceFeature(int window) {
this.window = window * 2;
}
@Override public void initialize(ScoringFunction scoringFunction) {
this.scoringFunction = scoringFunction;
}
@Override public float computeScoreWithSlidingWindow(int[] document, int[] query, int[] hashedQuery, GlobalStats stats) {
if(query.length == 1) {
return 0f;
}
int[] tf = countTerms(document, hashedQuery, window);
float score = 0;
for(int i = 0; i < tf.length; i++) {
score += scoringFunction.computePhraseScore(document.length, tf[i], stats);
}
return score;
}
@Override public float computeScoreWithMiniIndexes(int[][] positions, int[] query, int dl, GlobalStats stats) {
if(query.length == 1) {
return 0f;
}
int[] tf = countTerms(positions, window);
float score = 0;
for(int i = 0; i < tf.length; i++) {
score += scoringFunction.computePhraseScore(dl, tf[i], stats);
}
return score;
}
public static int[] countTerms(int[][] positions, int window) {
int[] tf = new int[positions.length - 1];
for(int i = 0; i < positions.length - 1; i++) {
int[] p = positions[i];
int[] pn = positions[i + 1];
for(int j = 0; j < p.length; j++) {
for(int k = 0; k < pn.length; k++) {
if(pn[k] > p[j] && (pn[k] - p[j] + 1) <= window) {
tf[i]++;
break;
} else if(pn[k] < p[j] && (p[j] - pn[k] + 1) <= window) {
if(j > 0) {
if(p[j - 1] < pn[k]) {
tf[i]++;
}
} else {
tf[i]++;
}
}
}
}
}
return tf;
}
public static int[] countTerms(int[] document, int[] query, int window) {
int[] tf = new int[query.length - 1];
for(int i = 0; i < document.length; i++) {
if(document[i] != query[0]) {
continue;
}
for(int j = i + 1; j < i + window && j < document.length; j++) {
if(document[j] == query[1]) {
tf[0]++;
break;
}
}
}
for(int q = 1; q < query.length - 1; q++) {
for(int i = 0; i < document.length; i++) {
if(document[i] != query[q]) {
continue;
}
for(int j = i + 1; j < i + window && j < document.length; j++) {
if(document[j] == query[q + 1]) {
tf[q]++;
break;
}
}
for(int j = i + 1; j < i + window && j < document.length; j++) {
if(document[j] == query[q - 1]) {
tf[q - 1]++;
break;
}
}
}
}
int e = query.length - 1;
for(int i = 0; i < document.length; i++) {
if(document[i] != query[e]) {
continue;
}
for(int j = i + 1; j < i + window && j < document.length; j++) {
if(document[j] == query[e - 1]) {
tf[e - 1]++;
break;
}
}
}
return tf;
}
@Override public String toString() {
return "featureClass=\"" + UnorderedWindowSequentialDependenceFeature.class.getName() +
"\" width=\"" + window + "\" " +
scoringFunction.toString();
}
}