/**
* Copyright (c) 2010, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.engine;
import clear.morph.MorphEnAnalyzer;
import clear.reader.AbstractReader;
import clear.util.IOUtil;
import java.io.BufferedReader;
import java.io.PrintStream;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
/**
* Runs a morphological analyzer.
*
* @author Jinho D. Choi <b>Last update:</b> 11/4/2010
*/
public class MorphAnalyze {
@Option(name = "-i", usage = "input file", required = true, metaVar = "REQUIRED")
String inputFile;
@Option(name = "-o", usage = "output file", required = true, metaVar = "REQUIRED")
String outputFile;
@Option(name = "-d", usage = "dictionary jar-file", required = true, metaVar = "REQUIRED")
String dictFile;
public MorphAnalyze(String[] args) {
CmdLineParser cmd = new CmdLineParser(this);
try {
cmd.parseArgument(args);
BufferedReader fin = IOUtil.createBufferedFileReader(inputFile);
PrintStream fout = IOUtil.createPrintFileStream(outputFile);
MorphEnAnalyzer morph = new MorphEnAnalyzer(dictFile);
String line, form, pos, lemma;
String[] tmp;
while ((line = fin.readLine()) != null) {
if (line.trim().equals("")) {
fout.println();
continue;
}
tmp = line.split(AbstractReader.FIELD_DELIM);
form = tmp[0];
pos = tmp[1];
lemma = morph.getLemma(form, pos);
fout.println(line + AbstractReader.FIELD_DELIM + lemma);
}
fin.close();
fout.close();
} catch (CmdLineException e) {
System.err.println(e.getMessage());
cmd.printUsage(System.err);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
new MorphAnalyze(args);
}
}