package hu.u_szeged.kpe.candidates;
import java.io.Serializable;
import java.util.Comparator;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.NormalizerAnnotator.NormalizerAnnotation;
/**
* This class is responsible for the proper ordering of ExtendedWord objects, i.e. based on their normalized lemmas.
*/
public class CoreLabelComparator implements Comparator<CoreLabel>, Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public int compare(CoreLabel labelA, CoreLabel labelB) {
String baseOfComparisonA = labelA.getString(NormalizerAnnotation.class);
String baseOfComparisonB = labelB.getString((NormalizerAnnotation.class));
int comparison = baseOfComparisonA.compareTo(baseOfComparisonB);
String shortTagA = labelA.tag().toLowerCase().substring(0, Math.min(labelA.tag().length(), 2));
String shortTagB = labelB.tag().toLowerCase().substring(0, Math.min(labelB.tag().length(), 2));
return comparison != 0 ? comparison : shortTagA.compareTo(shortTagB);
}
public int compareForNGramEquality(CoreLabel labelA, CoreLabel labelB) {
String baseOfComparisonA = labelA.word();
String baseOfComparisonB = labelB.word();
if (baseOfComparisonA.length() > 1)
baseOfComparisonA = Character.toLowerCase(baseOfComparisonA.charAt(0)) + baseOfComparisonA.substring(1);
if (baseOfComparisonB.length() > 1)
baseOfComparisonB = Character.toLowerCase(baseOfComparisonB.charAt(0)) + baseOfComparisonB.substring(1);
int comparison = baseOfComparisonA.compareTo(baseOfComparisonB);
String shortTagA = labelA.tag().toLowerCase().substring(0, Math.min(labelA.tag().length(), 2));
String shortTagB = labelB.tag().toLowerCase().substring(0, Math.min(labelB.tag().length(), 2));
return comparison != 0 ? comparison : shortTagA.compareTo(shortTagB);
}
}