package experiments;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;
/**
* @author miguel
*
*/
public class Hypothesis1 {
private String corpus;
private boolean outputEnale = true;
private ArrayList<ArrayList<String>> rightParentsCollection;
public String getRightParents(int index) {
return rightParentsCollection.get(index).toString();
}
public int getRightParentsCount(int index) {
return rightParentsCollection.get(index).size();
}
private ArrayList<ArrayList<String>> rightArcsCollection;
public String getRightArcs(int index) {
return rightArcsCollection.get(index).toString();
}
public int getRightArcsCount(int index) {
return rightArcsCollection.get(index).size();
}
private ArrayList<Double> proportionsCollection;
public double getProportion(int index) {
return proportionsCollection.get(index);
}
public double totalProportions;
public double getTotalProportion() {
return this.totalProportions;
}
public Hypothesis1() {
}
public void setCorpus(String corpus) {
this.corpus = corpus;
}
/**
* Given a sentence in conll format count the right arcs for the sentence
* Given a sentence in conll format count the right parents for the sentence
*/
public void generateStatistics() {
try {
BufferedReader br = new BufferedReader(new FileReader(corpus));
try {
int contStructures2 = 0;
double totalProportion = 0.0;
double numbSentences = 0.0;
rightParentsCollection = new ArrayList<ArrayList<String>>();
rightArcsCollection = new ArrayList<ArrayList<String>>();
proportionsCollection = new ArrayList<Double>();
ArrayList<String> rightParents = new ArrayList<String>();
ArrayList<String> rightArcs = new ArrayList<String>();
boolean remain = false;
while (br.ready()) {
String line = br.readLine();
if (!line.equals("")) {
remain = true;
StringTokenizer st = new StringTokenizer(line, "\t");
String id = "";
String parent;
int cont = 1;
while (st.hasMoreTokens()) {
String tok = st.nextToken();
if (cont == 1) {
id = tok;
}
if (cont == 7) {
parent = tok;
Integer parentInt = Integer.parseInt(parent);
Integer idInt = Integer.parseInt(id);
if (idInt > parentInt) {
if (!rightParents.contains(parentInt.toString())) {
if (parentInt != 0) {
rightParents.add(parentInt.toString());
}
}
if (!rightArcs.contains(idInt.toString())) {
rightArcs.add(idInt.toString());
}
}
}
cont++;
}
} else {
remain = false;
double proportion = 0.0;
if (rightParents.size() > 0) {
proportion = (double) rightParents.size() / (double) rightArcs.size();
}
proportionsCollection.add(proportion);
if (this.outputEnale == true) {
System.out.println("#rightParents (" + rightParents.size() + ")/#rightArcs (" + rightArcs.size() + ")=" + proportion);
System.out.println(rightArcs);
System.out.println(rightParents);
}
if (proportion == 1.0) {
contStructures2++;
}
totalProportion += proportion;
numbSentences += 1.0;
rightParentsCollection.add(rightParents);
rightParents = new ArrayList<String>();
rightArcsCollection.add(rightArcs);
rightArcs = new ArrayList<String>();
}
}
if (remain == true) {
double proportion = 0.0;
if (rightParents.size() > 0) {
proportion = (double) rightParents.size() / (double) rightArcs.size();
}
proportionsCollection.add(proportion);
if (this.outputEnale == true) {
System.out.println("#rightParents (" + rightParents.size() + ")/#rightArcs (" + rightArcs.size() + ")=" + proportion);
System.out.println(rightArcs);
System.out.println(rightParents);
}
if (proportion == 1.0) {
contStructures2++;
}
totalProportion += proportion;
numbSentences += 1.0;
rightParentsCollection.add(rightParents);
rightArcsCollection.add(rightArcs);
}
this.totalProportions = totalProportion / numbSentences;
if (this.outputEnale == true) {
System.out.println("Exact Structures 2:" + contStructures2);
System.out.println("totalProp/numbSenteces=" + this.totalProportions);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static Hypothesis1 Run(String corpusPath, boolean outputEnale) {
Hypothesis1 h1 = new Hypothesis1();
h1.outputEnale = outputEnale;
h1.setCorpus(corpusPath);
h1.generateStatistics();
return h1;
}
}