/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package de.tudarmstadt.ukp.dkpro.core.decompounding.ranking;
import java.util.Collections;
import java.util.List;
import de.tudarmstadt.ukp.dkpro.core.decompounding.splitter.DecompoundedWord;
import de.tudarmstadt.ukp.dkpro.core.decompounding.splitter.Fragment;
import de.tudarmstadt.ukp.dkpro.core.decompounding.trie.ValueNode;
import de.tudarmstadt.ukp.dkpro.core.decompounding.web1t.Finder;
/**
* Probability based ranking method
*
*/
public class CompoundProbabilityRanker
extends AbstractRanker
implements RankerList
{
/**
* Empty constructor
*
* Use {@link #setFinder(Finder)} before using this class
*/
public CompoundProbabilityRanker() {
}
/**
* Constructor
*
* @param aFinder
* a finder.
*/
public CompoundProbabilityRanker(Finder aFinder)
{
super(aFinder);
}
@Override
public DecompoundedWord highestRank(List<DecompoundedWord> aSplits)
{
return rank(aSplits).get(0);
}
@Override
public List<DecompoundedWord> rank(List<DecompoundedWord> aSplits)
{
for (DecompoundedWord split : aSplits) {
split.setWeight(calcRank(split));
}
List<DecompoundedWord> result = filterAndSort(aSplits);
Collections.sort(result, Collections.reverseOrder());
return result;
}
/**
* Calculates the weight for a split
*/
private float calcRank(DecompoundedWord aSplit)
{
float result = 0;
for (Fragment elem : aSplit.getSplits()) {
result += -1 * Math.log(freq(elem).doubleValue() / getFinder().getUnigramCount().doubleValue());
}
return result;
}
/**
* Searches a a path throw the tree
*/
@Override
public DecompoundedWord highestRank(ValueNode<DecompoundedWord> aParent,
List<DecompoundedWord> aPath)
{
if (aPath != null) {
aPath.add(aParent.getValue());
}
List<DecompoundedWord> children = aParent.getChildrenValues();
if (children.size() == 0) {
return aParent.getValue();
}
children.add(aParent.getValue());
List<DecompoundedWord> result = rank(children);
DecompoundedWord best = result.get(0);
if (best.equals(aParent.getValue())) {
// None of the children get a better score than the parent
return aParent.getValue();
}
else {
// Find the child node that ranked best and recurse
for (ValueNode<DecompoundedWord> split : aParent.getChildren()) {
if (best.equals(split.getValue())) {
return highestRank(split, aPath);
}
}
}
return null;
}
}