/**
* Copyright (c) 2009, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.reader;
import clear.dep.ITree;
import clear.util.IOUtil;
import java.io.BufferedReader;
import java.io.IOException;
/**
* Abstract reader.
*
* @author Jinho D. Choi <b>Last update:</b> 6/26/2010
*/
abstract public class AbstractReader<NodeType, TreeType> {
/**
* Flag for Chinese
*/
static public final String LANG_CH = "ch";
/**
* Flag for English
*/
static public final String LANG_EN = "en";
/**
* Flag for Hindi
*/
static public final String LANG_HI = "hi";
/**
* Flag for Czech
*/
static public final String LANG_CZ = "cz";
/**
* Flag for Korean
*/
static public final String LANG_KR = "kr";
/**
* Flag for raw format
*/
static public final String FORMAT_RAW = "raw";
/**
* Flag for tok format
*/
static public final String FORMAT_TOK = "tok";
/**
* Flag for part-of-speech format
*/
static public final String FORMAT_POS = "pos";
/**
* Flag for dependency format
*/
static public final String FORMAT_DEP = "dep";
/**
* Flag for semantic-role labeling format
*/
static public final String FORMAT_SRL = "srl";
/**
* Flag for CoNLL-X format
*/
static public final String FORMAT_CONLLX = "conllx";
/**
* Flag for dependency-verbnet format
*/
static public final String FORMAT_DEPV = "depv";
/**
* Delimiter between fields
*/
static public final String FIELD_DELIM = "\t";
/**
* Value for empty fields
*/
static public final String EMPTY_FIELD = "_";
/**
* File reader
*/
protected BufferedReader f_in;
/**
* Language code
*/
protected String s_language = LANG_EN;
/**
* The constructor calls {@link AbstractReader#open(String)}.
*
* @param filename name of the file to read
*/
public AbstractReader(String filename) {
open(filename);
}
/**
* Opens the abstract reader for
* <code>filename</code>.
*
* @param filename name of the file to read
*/
public void open(String filename) {
f_in = IOUtil.createBufferedFileReader(filename);
}
/**
* Closes the abstract reader.
*/
public void close() {
try {
f_in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void setLanguage(String language) {
s_language = language;
}
/**
* @return true if
* <code>line</code> is empty
*/
protected boolean isSkip(String line) {
// return line.startsWith(";") || line.trim().isEmpty();
return line.trim().isEmpty();
}
/**
* Appends the next tree to
* <code>tree</code>.
* <code>tree</code> may or may not already contain nodes.
*
* @return true if the next tree exists
* @throws IOException
*/
protected boolean appendNextTree(ITree<NodeType> tree) throws IOException {
// skip empty lines
String line;
while ((line = f_in.readLine()) != null) {
if (!isSkip(line)) {
break;
}
}
// the end of the line
if (line == null) {
close();
return false;
}
// add nodes
int id = tree.size();
tree.add(toNode(line, id++));
while ((line = f_in.readLine()) != null) {
if (isSkip(line)) {
return true;
} else {
tree.add(toNode(line, id++));
}
}
return true;
}
abstract public TreeType nextTree();
/**
* @param line string of values
* @param id token ID of the node
* @return node containing values from
* <code>line</code>.
*/
abstract protected NodeType toNode(String line, int id);
}