package edu.berkeley.cs.nlp.ocular.eval;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import edu.berkeley.cs.nlp.ocular.data.textreader.Charset;
import edu.berkeley.cs.nlp.ocular.util.Tuple2;
/**
* @author Taylor Berg-Kirkpatrick (tberg@eecs.berkeley.edu)
*/
public class Form implements Comparable<Form> {
private final List<Glyph> glyphs;
public Form(List<Glyph> glyphs) {
this.glyphs = glyphs;
}
public static Form charsAsGlyphs(String str) {
return charsAsGlyphs(str, true);
}
/**
*
* @param str
* @param charIncludesDiacritic If false, the diacritic will be scored separately from the base character.
* @return
*/
public static Form charsAsGlyphs(String str, boolean charIncludesDiacritic) {
List<Glyph> glyphs = new ArrayList<Glyph>();
for (String c : Charset.readNormalizeCharacters(str)) {
if (charIncludesDiacritic) {
glyphs.add(new Glyph(c));
}
else {
Tuple2<String,List<String>> letterAndNormalDiacritics = Charset.normalizeCharSeparateDiacritics(c);
Collections.sort(letterAndNormalDiacritics._2);
for (String diacritic : letterAndNormalDiacritics._2) {
glyphs.add(new Glyph(diacritic));
}
glyphs.add(new Glyph(letterAndNormalDiacritics._1));
}
}
return new Form(glyphs);
}
public static Form wordsAsGlyphs(List<String> words) {
List<Glyph> glyphs = new ArrayList<Glyph>();
for (int i = 0; i < words.size(); i++) {
glyphs.add(new Glyph(words.get(i)));
}
return new Form(glyphs);
}
public Form substring(int start) {
return substring(start, length());
}
public Form substring(int start, int end) {
return new Form(glyphs.subList(start, end));
}
public int length() {
return glyphs.size();
}
public Glyph charAt(int index) {
return glyphs.get(index);
}
public Form append(Form other) {
List<Glyph> newGlyphs = new ArrayList<Glyph>();
newGlyphs.addAll(this.glyphs);
newGlyphs.addAll(other.glyphs);
return new Form(newGlyphs);
}
@Override
public boolean equals(Object other) {
if (other == null || !(other instanceof Form)) {
return false;
}
return this.glyphs.equals(((Form)other).glyphs);
}
@Override
public int hashCode() {
return this.glyphs.hashCode();
}
@Override
public String toString() {
String ret = "";
for (Glyph glyph : glyphs) {
ret += glyph.toString();
}
return ret;
}
public String toStringWithSpaces() {
String ret = "";
for (Glyph glyph : glyphs) {
ret += glyph.toString() + " ";
}
return ret;
}
@Override
public int compareTo(Form o) {
return compareCollections(this.glyphs, o.glyphs);
}
public static <T extends Comparable<T>> int compareCollections(Iterable<T> col1, Iterable<T> col2) {
Iterator<T> first = col1.iterator();
Iterator<T> second = col2.iterator();
while (first.hasNext() && second.hasNext()) {
int result = first.next().compareTo(second.next());
if (result != 0) {
return result;
}
}
if (!first.hasNext() && !second.hasNext()) {
return 0;
}
// Longer one comes second
return (first.hasNext() ? 1 : -1);
}
}