package com.cognitionis.nlp_files;
import java.io.*;
import java.util.regex.*;
/**
*
* @author Héctor Llorens
* @since 2011
*/
public class TreebankFile extends NLPFile {
public TreebankFile(String filename) {
super(filename);
}
@Override
public Boolean isWellFormatted() {
int par_level = 0;
try {
if (super.getFile()==null) {
throw new Exception("No file loaded in NLPFile object");
}
BufferedReader reader = new BufferedReader(new FileReader(this.f));
try {
String line = null;
int linen = 0;
Pattern p = Pattern.compile("[\\(\\)]");
while ((line = reader.readLine()) != null) {
linen++; //System.getProperty("line.separator")
if (line.matches("\\s*[^\\(\\s].*")) {
throw new Exception("Treebank format error: line " + linen + " not begining with \\s*(");
}
Matcher m = p.matcher(line);
while (m.find()) {
if (m.group().equals("(")) {
par_level++;
} else {
par_level--;
if (par_level < 0) {
throw new Exception("Treebank format error: par_level lower than 0");
}
}
}
//System.out.println(linen+": "+line+" - parlevel="+ par_level);
}
} finally {
reader.close();
}
if (par_level != 0) {
throw new Exception("Treebank format error: positive unbalancement, par_level=" + par_level);
}
} catch (Exception e) {
System.err.println("Errors found ("+this.getClass().getSimpleName()+"):\n\t" + e.toString() + "\n");
if(System.getProperty("DEBUG")!=null && System.getProperty("DEBUG").equalsIgnoreCase("true")){e.printStackTrace(System.err);}
return false;
}
return true;
}
public String toPlain(String filename){
// one token, one space, one token, one space... (end of sentence -> \n)
return this.getFile().toString();
}
}