/** * Copyright (c) 2007, Regents of the University of Colorado All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. Redistributions in binary * form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided * with the distribution. Neither the name of the University of Colorado at * Boulder nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package clear.propbank; import clear.util.IOUtil; import java.util.Scanner; import java.util.StringTokenizer; /** * Propbank reader. * * @author Jinho D. Choi <b>Last update:</b> 02/15/2010 */ public class PBReader { /** * Scanner to read Propbank files */ private Scanner f_prop; /** * Initializes the Propbank reader. * * @param propFile name of the Propbank file */ public PBReader(String propFile) { f_prop = IOUtil.createFileScanner(propFile); } /** * Returns the next Propbank instance. If there is none, return null. */ public PBInstance nextInstance() { String line; if (!f_prop.hasNextLine()) { f_prop.close(); return null; } if ((line = f_prop.nextLine().trim()).isEmpty()) { f_prop.close(); return null; } String[] str = line.split(PBLib.FIELD_DELIM); PBInstance instance = new PBInstance(); instance.treePath = str[0]; instance.treeIndex = Integer.parseInt(str[1]); instance.predicateId = Integer.parseInt(str[2]); instance.annotator = str[3]; instance.type = str[4]; instance.rolesetId = str[5]; for (int i = 7; i < str.length; i++) { String sarg = str[i]; int idx = sarg.indexOf(PBLib.PROP_LABEL_DELIM); String label = sarg.substring(idx + 1); String locs = sarg.substring(0, idx); PBArg pbArg = new PBArg(label, instance.predicateId); StringTokenizer tok = new StringTokenizer(locs, PBLib.PROP_ARG_OP, true); String argType = ""; while (tok.hasMoreTokens()) { String next = tok.nextToken(); if (next.length() == 1) { argType = next; } else { String[] loc = next.split(PBLib.PROP_LOC_DELIM); int terminalId = Integer.parseInt(loc[0]); int height = Integer.parseInt(loc[1]); if (!pbArg.containsLoc(terminalId, height)) { pbArg.addLoc(new PBLoc(argType, terminalId, height)); } // else // System.err.println("Duplicated location "+next+": "+line); } } instance.addArg(pbArg); } return instance; } }