// Copyright 2014 Thomas Müller // This file is part of HMMLA, which is licensed under GPLv3. package hmmla.io; import hmmla.util.LineIterator; import hmmla.util.Mapping; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.NoSuchElementException; public class PosReader implements Iterable<Sentence> { private Mapping map_; private PosFileOptions options_; public PosReader(String option_string) { this(option_string, null); } public PosReader(String option_string, Mapping map) { this(new PosFileOptions(option_string), map); } public PosReader(PosFileOptions options, Mapping map) { options_ = options; map_ = map; } public Iterator<Sentence> iterator() { return new Iterator<Sentence>() { int number_ = 0; LineIterator lineIterator = new LineIterator(options_.getFilename()); @Override public Sentence next() { int form_index_ = options_.getFormIndex(); int tag_index_ = options_.getTagIndex(); if (!hasNext()) { throw new NoSuchElementException(); } List<String> tags = new LinkedList<String>(); List<String> words = new LinkedList<String>(); while (lineIterator.hasNext()) { List<String> tokens = lineIterator.next(); if (tokens.isEmpty()) { break; } if (form_index_ < 0 || form_index_ >= tokens.size()) { RuntimeException e = new RuntimeException("form_index out of range: " + form_index_ + " : " + tokens); e.initCause(new Throwable("form_index")); throw e; } if (tag_index_ >= tokens.size()) { RuntimeException e = new RuntimeException("tag_index out of range: " + tag_index_ + " : " + tokens); e.initCause(new Throwable("tag_index")); throw e; } String word = tokens.get(form_index_); words.add(word); String tag = null; if (tag_index_ >= 0) { tag = tokens.get(tag_index_); if (map_ != null) { tag = map_.get(tag); } } tags.add(tag); } number_++; return new Sentence(words, tags); } @Override public boolean hasNext() { int limit = options_.getLimit(); if (limit >= 0 && number_ > limit) { return false; } return lineIterator.hasNext(); } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }