/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package de.tudarmstadt.ukp.dkpro.core.decompounding.ranking;
import java.util.List;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import de.tudarmstadt.ukp.dkpro.core.decompounding.splitter.DecompoundedWord;
import de.tudarmstadt.ukp.dkpro.core.decompounding.splitter.Fragment;
import de.tudarmstadt.ukp.dkpro.core.decompounding.trie.ValueNode;
import de.tudarmstadt.ukp.dkpro.core.decompounding.web1t.Finder;
/**
* Frequency based ranking algorithm. See doc folder for more informations.
*
*/
public class FrequencyGeometricMeanRanker
extends AbstractRanker
implements RankerList
{
/**
* Empty constructor
*
* Use {@link #setFinder(Finder)} before using this class
*/
public FrequencyGeometricMeanRanker() {
}
public FrequencyGeometricMeanRanker(Finder aFinder)
{
super(aFinder);
}
@Override
public DecompoundedWord highestRank(List<DecompoundedWord> aSplits)
{
return rank(aSplits).get(0);
}
@Override
public List<DecompoundedWord> rank(List<DecompoundedWord> aSplits)
{
for (DecompoundedWord split : aSplits) {
split.setWeight(calcRank(split));
}
return filterAndSort(aSplits);
}
/**
* Calculates the weight for a split
*/
private double calcRank(DecompoundedWord aSplit)
{
SummaryStatistics stats = new SummaryStatistics();
for (Fragment elem : aSplit.getSplits()) {
stats.addValue(freq(elem).doubleValue());
}
return stats.getGeometricMean();
}
/**
* Searches a a path throw the tree
*/
@Override
public DecompoundedWord highestRank(ValueNode<DecompoundedWord> aParent,
List<DecompoundedWord> aPath)
{
if (aPath != null) {
aPath.add(aParent.getValue());
}
List<DecompoundedWord> children = aParent.getChildrenValues();
if (children.size() == 0) {
return aParent.getValue();
}
children.add(aParent.getValue());
List<DecompoundedWord> result = rank(children);
DecompoundedWord best = result.get(0);
if (best.equals(aParent.getValue())) {
// None of the childs get a better score than the parent
return aParent.getValue();
}
else {
// Find the child node that ranked best and recurse
for (ValueNode<DecompoundedWord> split : aParent.getChildren()) {
if (best.equals(split.getValue())) {
return highestRank(split, aPath);
}
}
}
return null;
}
}