package edu.berkeley.cs.nlp.ocular.data.textreader;
import java.util.ArrayList;
import java.util.List;
/**
* @author Dan Garrette (dhgarrette@gmail.com)
*/
public class BasicTextReader implements TextReader {
private boolean treatBackslashAsEscape;
public BasicTextReader(boolean treatBackslashAsEscape) {
this.treatBackslashAsEscape = treatBackslashAsEscape;
}
public BasicTextReader() {
this.treatBackslashAsEscape = true;
}
public List<List<String>> readCharacters(List<String> lines) {
List<List<String>> characterLines = new ArrayList<List<String>>();
for (String l : lines)
characterLines.add(readCharacters(l));
return characterLines;
}
public List<String> readCharacters(String line) {
if (!treatBackslashAsEscape) {
line = line.replace("\\", "\\\\");
}
line = line.replace("``", "\"");
line = line.replace("''", "\"");
line = line.replace("\t", " ");
// Split characters and convert to diacritic-normalized forms.
List<String> normalizedChars = new ArrayList<String>();
for (String c : Charset.readNormalizeCharacters(line)) {
normalizedChars.add(c);
}
return normalizedChars;
}
public String toString() {
return "BasicTextReader(" + treatBackslashAsEscape + ")";
}
}