// Copyright 2014 Thomas Müller // This file is part of HMMLA, which is licensed under GPLv3. package hmmla.io; import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.Writer; import java.util.List; import java.util.zip.GZIPOutputStream; public class PosWriter { Writer writer_; String subtoken_delimiter_ = "\t"; String token_delimiter_ = "\n"; String sentence_delimiter_ = "\n"; public PosWriter(String filename) { try { if (filename.endsWith(".gz")) { writer_ = new BufferedWriter(new OutputStreamWriter( new GZIPOutputStream(new FileOutputStream(filename)))); } else { writer_ = new BufferedWriter(new FileWriter(filename)); } } catch (IOException e) { throw new RuntimeException(e); } } public PosWriter(Writer writer) { writer_ = writer; } public PosWriter(PrintStream printStream) { writer_ = new OutputStreamWriter(printStream); } public void write(Sentence sentence) { try { for (int index = 0; index < sentence.size(); index += 1) { Token token = sentence.get(index); writer_.write(Integer.toString(index)); writer_.write(subtoken_delimiter_); writer_.write(token.getWordForm()); if (token.getTag() != null) { writer_.write(subtoken_delimiter_); writer_.write(token.getTag()); } writer_.write(token_delimiter_); } writer_.write(sentence_delimiter_); } catch (IOException e) { throw new RuntimeException(e); } } public void write(Iterable<Sentence> sentences) { for (Sentence sentence : sentences) { write(sentence); } } public void close() { try { writer_.close(); } catch (IOException e) { throw new RuntimeException(e); } } public void write(Sentence sentence, List<String> tags) { sentence = new Sentence(sentence); sentence.setTags(tags); write(sentence); } }