package project.utils.collocation.impl;
import java.awt.Point;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import project.utils.collocation.CollocationCumulator;
import project.utils.collocation.Matrix;
import project.utils.collocation.WordStatistics;
public class CollocationCumulator_Impl implements CollocationCumulator {
private Matrix<WordStatistics> matrix;
public CollocationCumulator_Impl () {
matrix = new Matrix<WordStatistics> ();
}
public void addCollocation(Matrix<WordStatistics> collocation) {
Iterator<Point> i = collocation.getValues().keySet().iterator();
while (i.hasNext()) {
Point pt = i.next();
WordStatistics toInsert = collocation.getValue (pt.x, pt.y);
WordStatistics currentStats = matrix.getValue (pt.x, pt.y);
if (currentStats != null) {
//
// The value already exists there, so update ;)
//
for (int j = 0; j < toInsert.getNumOccurences (); j++) {
currentStats.addOffset(toInsert.getOffset());
}
currentStats.updateStats();
matrix.setValue(pt.x, pt.y, currentStats);
} else {
//
// A new value was found, just insert it
//
matrix.setValue(pt.x, pt.y, toInsert);
}
}
}
public void clearCollocations() {
this.matrix.getValues().clear();
}
public List<WordStatistics> getCollocations() {
return this.getCollocations (-1);
}
public List<WordStatistics> getCollocations(int limit) {
System.out.println ("getCollocations");
System.out.println ("\t> removing 1 occurence collocations");
Iterator<Point> i = matrix.getValues().keySet().iterator();
while (i.hasNext()) {
Point pt = i.next();
WordStatistics stats = matrix.getValues().get(pt);
//
// Remove collocations with 1 or 0 (this will never be the case) occurences
//
if (stats.getNumOccurences() < 1) {
i.remove();
}
}
System.out.println ("\t> done");
int size = matrix.getValues().size();
System.out.println ("Remaining " + size + " collocations");
List<WordStatistics> list = new LinkedList<WordStatistics> ();
//List<WordStatistics> list =
// Arrays.asList(matrix.getValues().values().toArray(new WordStatistics [size]));
Comparator<WordStatistics> comp = new Comparator<WordStatistics> () {
public int compare(WordStatistics o1, WordStatistics o2) {
return o2.getNumOccurences() - o1.getNumOccurences();
}
};
Iterator<WordStatistics> j = matrix.getValues().values().iterator();
while (j.hasNext()) {
WordStatistics s = j.next();
list.add(s);
Collections.sort(list, comp);
if (limit > 0) {
if (list.size() > limit) {
while (list.size() > limit) {
list.remove(list.size() - 1);
}
}
}
}
//
// Perform the sort, by number of occurences (a larger number will most probably mean
// the collocation is more accurate)
//
//Collections.sort(list, comp);
//
// Crop the list (if that's the case)
//
//if (limit > 0) {
// if (list.size() > limit)
// list = list.subList(0, limit);
//}
return list;
}
public Matrix<WordStatistics> getCollocationMatrix() {
return matrix;
}
}