///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2007 University of Texas at Austin and (C) 2005
// University of Pennsylvania and Copyright (C) 2002, 2003 University
// of Massachusetts Amherst, Department of Computer Science.
//
// This software is licensed under the terms of the Common Public
// License, Version 1.0 or (at your option) any subsequent version.
//
// The license is approved by the Open Source Initiative, and is
// available from their website at http://www.opensource.org.
///////////////////////////////////////////////////////////////////////////////
package mstparser.io;
import java.io.IOException;
import java.text.DecimalFormat;
import mstparser.DependencyInstance;
/**
* A writer to create files in CONLL format.
*
* <p> Created: Sat Nov 10 15:25:10 2001 </p>
*
* @author Jason Baldridge
* @version $Id: CONLLWriter.java 94 2007-01-17 17:05:12Z jasonbaldridge $
* @see mstparser.io.DependencyWriter
*/
public class CONLLWriter extends DependencyWriter {
public CONLLWriter(boolean labeled) {
this.labeled = labeled;
}
@Override
public void write(DependencyInstance instance) throws IOException {
DecimalFormat df = null;
if (instance.confidenceScores != null) {
df = new DecimalFormat();
df.setMaximumFractionDigits(3);
}
int numInd = 0;
String tmp;
for (int i = 0; i < instance.length(); i++) {
// Id
writer.write(Integer.toString(i + 1));
writer.write('\t');
// word form
tmp = instance.forms[i];
if (tmp.equals("<num>") && instance.numbers.size() > 0) {
tmp = instance.numbers.get(numInd);
numInd++;
}
writer.write(tmp);
writer.write('\t');
// lemm
if (instance.lemmas != null) {
tmp = instance.lemmas[i+1];
if (tmp.equals("<num>") && instance.numbers.size() > 0) {
tmp = instance.numbers.get(numInd);
numInd++;
}
}
writer.write(tmp);
writer.write('\t');
// cpostags
writer.write(instance.cpostags[i]);
writer.write('\t');
// postags
if (instance.postags != null) {
writer.write(instance.postags[i + 1]);
}
else {
writer.write(instance.cpostags[i]);
}
writer.write('\t');
// feats
StringBuilder feats = new StringBuilder();
if (instance.feats != null) {
for(int j = 0; j < instance.feats[i+1].length; j++) {
if (j!= 0) {
feats.append("|");
}
feats.append(instance.feats[i+1][j]);
}
if (feats.length() == 0) {
feats = new StringBuilder("_");
}
}
else {
feats = new StringBuilder("_");
}
writer.write(feats.toString());
writer.write('\t');
// afm 03-07-08
if (instance.stacked)
{
// predicted head
writer.write(Integer.toString(instance.heads_pred[i]));
writer.write('\t');
// predicted deprel
writer.write(instance.deprels_pred[i]);
writer.write('\t');
}
// head
writer.write(Integer.toString(instance.heads[i]));
writer.write('\t');
// deprel
writer.write(instance.deprels[i]);
writer.write('\t');
// phead and pdeprel
writer.write("_\t_");
// confidence scores
if (instance.confidenceScores != null) {
writer.write('\t');
writer.write(df.format(instance.confidenceScores[i]));
}
writer.newLine();
}
writer.newLine();
}
}