package ir.ac.iust.nlp.dependencyparser.utility.parsing;
import java.io.*;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
/**
*
* @author Mojtaba Khallash
*/
public class MaltStackSettings extends MaltSettings {
// 0
public int Level;
public int AugmentNParts = 5;
public String AugmentedTrainFile;
public MaltStackSettings() {}
public MaltStackSettings(MaltStackSettings settings) {
super(settings);
this.Level = settings.Level;
this.AugmentNParts = settings.AugmentNParts;
this.AugmentedTrainFile = settings.AugmentedTrainFile;
}
@Override
public void preProcess() throws IOException {
switch (Chart) {
case Train:
createAgmentedParts(AugmentNParts, WorkingDirectory, Input);
break;
}
}
// <editor-fold defaultstate="collapsed" desc="Create Agmented Parts">
private void createAgmentedParts(int parts, String workingDir, String inputFile)
throws FileNotFoundException,
UnsupportedEncodingException,
IOException {
//==== Create N writer for train and test ====//
Writer[] trainParts = new Writer[parts];
Writer[] testParts = new Writer[parts];
for (int i = 0; i < parts; i++) {
String train = workingDir + "_train" + i + ".conll";
trainParts[i] = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(train, true), "UTF-8"));
String test = workingDir + "_test" + i + ".conll";
testParts[i] = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(test, true), "UTF-8"));
}
//==== Read input file ====//
HashMap<Integer, String> Sentences = readSentemces(inputFile);
int numInstances = Sentences.size();
// The last partition becomes bigger
int numInstancesPerPart = numInstances / parts;
//==== Write data in separate files ====//
for (int i = 0; i < numInstances; i++) {
String sen = Sentences.get(i + 1);
for (int j = 0; j < parts; j++) {
if ( (i >= j * numInstancesPerPart
&& i < (j + 1) * numInstancesPerPart) ||
(j == parts - 1 && i >= parts * numInstancesPerPart)) {
testParts[j].write(sen + "\n");
} else {
trainParts[j].write(sen + "\n");
}
}
}
//==== Close writer stream ====//
for (int i = 0; i < parts; i++) {
trainParts[i].close();
testParts[i].close();
}
}
private HashMap<Integer, String> readSentemces(String path) throws IOException {
HashMap<Integer, String> Sentences = new HashMap<>();
try (BufferedReader reader = new BufferedReader(new FileReader(path))) {
String line;
StringBuilder tokens = new StringBuilder();
int count = 0;
while ((line = reader.readLine()) != null) {
if (line.trim().length() != 0) {
tokens.append(line).append("\n");
} else {
if (tokens.length() > 0) {
count++;
Sentences.put(count, tokens.toString());
tokens = new StringBuilder();
}
}
}
}
return Sentences;
}
// </editor-fold>
@Override
public void postProcess() throws IOException {
switch (Chart) {
case Train:
String trainFile = WorkingDirectory + "train_pred.conll";
File trainTarget = new File(trainFile);
if (trainTarget != null) {
try { FileUtils.forceDelete(trainTarget); }
catch (Exception e) {}
}
for (int i = 0; i < AugmentNParts; i++) {
// merge to train_pred
String test = WorkingDirectory + "_test" + i + ".conll";
String parse = WorkingDirectory + "_parse" + i + ".conll";
createPredFile(test, parse, trainFile);
}
copyToDestination(trainFile, AugmentedTrainFile);
break;
case Parse:
// merge to test_pred
String testFile = WorkingDirectory + "test_pred.conll";
File testTarget = new File(testFile);
if (testTarget != null) {
try { FileUtils.forceDelete(testTarget); }
catch (Exception e) {}
}
createPredFile(Gold, Output, testFile);
copyToDestination(testFile, Output);
break;
}
}
// <editor-fold defaultstate="collapsed" desc="Create Pred File">
private void createPredFile(String gold, String parse, String destination)
throws IOException{
Writer pred = null;
BufferedReader reader1 = null,
reader2 = null;
try {
pred = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(destination, true), "UTF-8"));
reader1 = new BufferedReader(new FileReader(gold));
reader2 = new BufferedReader(new FileReader(parse));
String line1, line2;
while ((line1 = reader1.readLine()) != null) {
line2 = reader2.readLine();
if (line1.trim().length() != 0) {
String[] vals = line1.split("\t");
line2 = line2.replace("_\t_", vals[6] + "\t" + vals[7] + "\t_\t_");
}
pred.write(line2 + "\n");
}
}
catch(Exception exx) {}
finally {
if (pred != null) { pred.close(); }
if (reader1 != null) { reader1.close(); }
if (reader2 != null) { reader2.close(); }
}
}
// </editor-fold>
}