/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package de.tudarmstadt.ukp.dkpro.core.decompounding.splitter;
import java.util.ArrayList;
import java.util.List;
/**
* A data container for a split of a word. This container stores on split of all possible splits.
*
*/
public class DecompoundedWord
implements Comparable<DecompoundedWord>
{
private List<Fragment> splits = new ArrayList<Fragment>();
private double weight;
private int splitPos = -1;
/**
* Create a split from a string
*
* The string has the structure: word1(morpheme)+word2(morpheme)+...+word3
*
* For example: "Aktion(s)+plan" or "Verbraucher+zahlen"
*
* @param aSplit
* an string-encoded split
* @return the split word.
*/
public static DecompoundedWord createFromString(String aSplit)
{
DecompoundedWord s = new DecompoundedWord();
String[] elems = aSplit.split("\\+");
for (String string : elems) {
s.appendSplitElement(Fragment.createFromString(string));
}
return s;
}
@Override
public int hashCode()
{
final int prime = 31;
int result = 1;
result = prime * result + ((splits == null) ? 0 : splits.hashCode());
return result;
}
@Override
public boolean equals(Object obj)
{
if (obj == null) {
return false;
}
return toString().equals(obj.toString());
}
@Override
public String toString()
{
StringBuilder s = new StringBuilder();
for (int i = 0; i < splits.size(); i++) {
s.append(splits.get(i).toString());
if (i < splits.size() - 1) {
s.append('+');
}
}
return s.toString();
}
/**
* Adds a split element at the end
*
* @param aSplit
* a split.
*/
public void appendSplitElement(Fragment aSplit)
{
splits.add(aSplit);
}
/**
* Adds a split element to the beginning
*
* @param aSplit
* a split.
*/
public void prependSplitElement(Fragment aSplit)
{
splits.add(0, aSplit);
}
/**
* Returns all split elements
*
* @return all split elements
*/
public List<Fragment> getSplits()
{
return splits;
}
/**
* Set all split elements
*
* @param aSplits
* all split elements.
*/
public void setSplits(List<Fragment> aSplits)
{
splits = aSplits;
}
/**
* Adds a list of split elements.
*
* @param aSplits
* list of fragments.
*/
public void addAll(List<Fragment> aSplits)
{
splits.addAll(aSplits);
}
/**
* Replace one split element with a split. That means all split elements will be inserted at the
* position of the split element
*
* @param aIndex
* the index.
* @param aSplit
* the split.
*/
public void replaceSplitElement(int aIndex, DecompoundedWord aSplit)
{
splits.remove(aIndex);
for (int j = 0; j < aSplit.getSplits().size(); j++) {
Fragment e = aSplit.getSplits().get(j);
splits.add(aIndex + j, e);
}
}
/**
* Replaces a split element with another one
*
* @param aIndex
* the index.
* @param aSplitElement
* the fragement.
*/
public void replaceSplitElement(int aIndex, Fragment aSplitElement)
{
splits.set(aIndex, aSplitElement);
}
/**
* Similar to the equals method, but combines morpheme and word
*
* @param aOtherSplit
* a split.
* @return if they are equal without morpheme
*/
public boolean equalWithoutMorpheme(DecompoundedWord aOtherSplit)
{
return toStringWithoutMorpheme().equals(aOtherSplit.toStringWithoutMorpheme());
}
/**
* Similar to the toString method, but combines morpheme and word
*/
private String toStringWithoutMorpheme()
{
StringBuilder s = new StringBuilder();
for (int i = 0; i < splits.size(); i++) {
s.append(splits.get(i).toStringWithoutMorpheme());
if (i < splits.size() - 1) {
s.append('+');
}
}
return s.toString();
}
/**
* Returns the complete word without + or ()
*
* @return the complete word without + or ()
*/
public String getWord()
{
StringBuilder word = new StringBuilder();
for (Fragment e : getSplits()) {
word.append(e.getWord());
if (e.hasMorpheme()) {
word.append(e.getMorpheme());
}
}
return word.toString();
}
/**
* Creates a copy of this element.
*
* @return copy of this element.
*/
public DecompoundedWord createCopy()
{
DecompoundedWord s = DecompoundedWord.createFromString(toString());
s.setSplitPos(getSplitPos());
return s;
}
/**
* Returns the ranked weight of the split.
*
* @return the ranked weight of the split.
*/
public double getWeight()
{
return weight;
}
/**
* Sets a rank weight for the split.
*
* @param aWeight
* rank weight for the split.
*/
public void setWeight(double aWeight)
{
weight = aWeight;
}
@Override
public int compareTo(DecompoundedWord aOtherSplit)
{
if (getWeight() < aOtherSplit.getWeight()) {
return 1;
}
else if (getWeight() == aOtherSplit.getWeight()) {
return 0;
}
else {
return -1;
}
}
public void setSplitPos(int aSplitPos)
{
if (splitPos != -1) {
throw new IllegalStateException("Oops.");
}
splitPos = aSplitPos;
}
public int getSplitPos()
{
return splitPos;
}
/**
*
* Checks if this instance is a compounding word.
*
* @return true if this instance is a decompounded word
*
* */
public boolean isCompound()
{
return splits.size() != 1;
}
/**
*
* Checks if last fragment has a linking morpheme.
*
* @return true if this instance does not have a linking morpheme in the last fragment
*
* */
public boolean hasLastFragmentMorpheme()
{
return splits.get(splits.size() - 1).hasMorpheme();
}
}