/**
* Copyright 2014, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.clir.clearnlp.experiment;
import java.io.BufferedReader;
import java.io.File;
import java.io.PrintStream;
import java.util.List;
import edu.emory.clir.clearnlp.bin.NLPDecode;
import edu.emory.clir.clearnlp.component.utils.NLPMode;
import edu.emory.clir.clearnlp.util.BinUtils;
import edu.emory.clir.clearnlp.util.FileUtils;
import edu.emory.clir.clearnlp.util.IOUtils;
import edu.emory.clir.clearnlp.util.Joiner;
import edu.emory.clir.clearnlp.util.Splitter;
import edu.emory.clir.clearnlp.util.constant.StringConst;
/**
* @since 3.0.0
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
*/
public class NLPMerge extends NLPDecode
{
public NLPMerge() {}
public NLPMerge(String[] args)
{
BinUtils.initArgs(args, this);
NLPMode mode = NLPMode.valueOf(s_mode);
List<String> inputFiles = FileUtils.getFileList(s_inputPath, s_inputExt, false);
decode(inputFiles, s_outputExt, s_configurationFile, n_threads, mode);
try
{
merge(inputFiles, s_outputExt, mode);
}
catch (Exception e) {e.printStackTrace();}
}
@SuppressWarnings("incomplete-switch")
public void merge(List<String> inputFiles, String outputExt, NLPMode mode) throws Exception
{
int[] eval = getEval(mode);
BufferedReader ing, ins;
PrintStream out;
List<String> tg;
String[] ts;
String line;
File file;
for (String inputFile : inputFiles)
{
file = new File(inputFile + StringConst.PERIOD + outputExt);
ing = IOUtils.createBufferedReader(inputFile);
ins = IOUtils.createBufferedReader(file);
out = IOUtils.createBufferedPrintStream(inputFile + StringConst.PERIOD + mode);
while ((line = ing.readLine()) != null)
{
tg = Splitter.splitTabsToList(line);
ts = Splitter.splitTabs(ins.readLine());
if (tg.size() > 1)
{
switch (mode)
{
case morph: evaluatePOS(tg, ts, eval); break;
case dep : evaluateDEP(tg, ts, eval); break;
}
}
out.println(Joiner.join(tg, StringConst.TAB));
}
ing.close();
ins.close();
out.close();
file.delete();
}
for (int i=1; i<eval.length; i++)
BinUtils.LOG.info(String.format("%5.2f (%d/%d)\n", 100d*eval[i]/eval[0], eval[i], eval[0]));
}
private int[] getEval(NLPMode mode)
{
switch (mode)
{
case morph: return new int[2];
case dep : return new int[3];
default : throw new IllegalArgumentException("Invalid mode: "+mode);
}
}
private void evaluatePOS(List<String> tg, String[] ts, int[] eval)
{
tg.add(3, ts[1]); // lemma
tg.add(5, ts[2]); // pos tag
tg.add(7, ts[3]); // feats
eval[0]++;
if (tg.get(4).equals(tg.get(5))) eval[1]++;
}
private void evaluateDEP(List<String> tg, String[] ts, int[] eval)
{
tg.add(9 , ts[5]);
tg.add(11, ts[6]);
// if (!StringUtils.containsPunctuationOnly(tg.get(2)))
{
eval[0]++;
if (tg.get(8).equals(tg.get(9)))
{
eval[1]++;
if (tg.get(10).equals(tg.get(11))) eval[2]++;
}
}
}
static public void main(String[] args)
{
new NLPMerge(args);
}
}