/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package de.tudarmstadt.ukp.dkpro.core.decompounding.splitter;
import de.drni.bananasplit.BananaSplit;
import de.drni.bananasplit.Compound;
import de.drni.bananasplit.affix.Affix;
import de.drni.bananasplit.simpledict.SimpleDictEntry;
import de.drni.bananasplit.simpledict.SimpleDictionaryInterface;
import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.Dictionary;
import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.LinkingMorphemes;
import de.tudarmstadt.ukp.dkpro.core.decompounding.trie.ValueNode;
/**
* Wrapper for the banana splitter algorithm
*
*/
public class BananaSplitterAlgorithm
implements SplitterAlgorithm
{
private static class DictionaryWrapper
implements SimpleDictionaryInterface
{
protected Dictionary dict;
public DictionaryWrapper(Dictionary aDict)
{
dict = aDict;
}
@Override
public SimpleDictEntry findWord(String aWord)
{
if (dict.contains(aWord)) {
return new SimpleDictEntry(aWord, "UNKNOWN");
}
return null;
}
@Override
public SimpleDictEntry findWordNoCase(String aWord)
{
return findWord(aWord.toLowerCase());
}
}
private BananaSplit splitter;
private int maxTreeDepth = Integer.MAX_VALUE;
@Override
public DecompoundingTree split(String aWord)
{
DecompoundingTree t = new DecompoundingTree(aWord);
t.getRoot().getValue().getSplits().get(0).setSplitAgain(true);
bananaSplit(t.getRoot(), 0);
return t;
}
/**
* Recursively creates the split tree
*
* @param aParent
* The parent node
* @param aDepth
* the current depth.
*/
protected void bananaSplit(ValueNode<DecompoundedWord> aParent, int aDepth)
{
if (aDepth > maxTreeDepth) {
return;
}
for (int i = 0; i < aParent.getValue().getSplits().size(); i++) {
Fragment element = aParent.getValue().getSplits().get(i);
if (element.shouldSplitAgain()) {
DecompoundedWord result = makeSplit(element.getWord());
if (result != null) {
DecompoundedWord copy = aParent.getValue().createCopy();
if (result.getSplits().size() > 1) {
result.getSplits().get(0).setSplitAgain(true);
result.getSplits().get(1).setSplitAgain(true);
copy.replaceSplitElement(i, result);
ValueNode<DecompoundedWord> child = new ValueNode<DecompoundedWord>(copy);
aParent.addChild(child);
bananaSplit(child, aDepth + 1);
}
else if (result.getSplits().size() == 1 && !result.equals(aParent.getValue())) {
copy.replaceSplitElement(i, result);
ValueNode<DecompoundedWord> child = new ValueNode<DecompoundedWord>(copy);
aParent.addChild(child);
}
}
}
}
}
/**
* Split a word with the banana splitter
*
* @param aWord
* The word to split
* @return the split word
*/
protected DecompoundedWord makeSplit(String aWord)
{
int resultValue;
try {
resultValue = splitter.splitCompound(aWord);
}
catch (Exception e) {
// Return empty result
return null;
}
if (resultValue != 0) {
// return empty result
return null;
}
return compoundToSplit(splitter.getCompound());
}
/**
* Converts the banana split compound to a split
*
* @param aCompound
* the compound.
* @return the split word.
*/
protected DecompoundedWord compoundToSplit(Compound aCompound)
{
String s = "";
String left = aCompound.getLeftAtom();
if (left != null) {
Affix bounding = aCompound.getBoundingSuffix();
left = left.substring(0, left.length() - bounding.getDel().length());
if (bounding.getAdd().length() > 0) {
left += "(" + bounding.getAdd() + ")";
}
s += left + "+";
}
Affix suffix = aCompound.getInflectionSuffix();
String right = aCompound.getRightAtom();
right = right.substring(0, right.length() - suffix.getDel().length());
if (suffix.getAdd().length() > 0) {
right += "(" + suffix.getAdd() + ")";
}
s += right;
return DecompoundedWord.createFromString(s);
}
@Override
public void setDictionary(Dictionary aDict)
{
splitter = new BananaSplit(new DictionaryWrapper(aDict));
}
@Override
public void setLinkingMorphemes(LinkingMorphemes aMorphemes)
{
// Not needed for this algorithm
}
@Override
public void setMaximalTreeDepth(int aDepth)
{
maxTreeDepth = aDepth;
}
}