package edu.berkeley.cs.nlp.ocular.data.textreader; import java.util.ArrayList; import java.util.List; /** * @author Dan Garrette (dhgarrette@gmail.com) */ public class ConvertLongSTextReader implements TextReader { private TextReader delegate; public ConvertLongSTextReader(TextReader delegate) { this.delegate = delegate; } public List<String> readCharacters(String line) { List<String> chars = new ArrayList<String>(); for (String c : delegate.readCharacters(line)) { chars.add(c); } /* * Replace 's' characters with 'long-s' characters. */ // for every letter except the last (since the last letter can // never be a long-s since it can never be followed by a letter for (int t = 0; t < chars.size() - 1; t++) { if (chars.get(t).equals("s")) { String next = chars.get(t + 1); String nextWithoutDiacritics = Charset.removeAnyDiacriticFromChar(next); if (nextWithoutDiacritics.length() != 1) { if (!nextWithoutDiacritics.equals("\\\\")) { throw new AssertionError("expected nextWithoutDiacritics [" + nextWithoutDiacritics + "] length() == 1"); } } char nextWithoutDiacriticsChar = nextWithoutDiacritics.charAt(0); if (t > 0 && chars.get(t - 1).equals(Charset.LONG_S) && nextWithoutDiacriticsChar == 'i') { // "ſsi": do nothing } else if (Character.isAlphabetic(nextWithoutDiacriticsChar)) { chars.set(t, Charset.LONG_S); } } } return chars; } public String toString() { return "ConvertLongSTextReader(" + delegate + ")"; } }