/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cogroo;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import org.apache.uima.resource.ResourceInitializationException;
import org.cogroo.checker.CheckDocument;
import org.cogroo.checker.GrammarChecker;
import org.cogroo.entities.Sentence;
import org.cogroo.entities.Token;
import org.cogroo.entities.impl.MorphologicalTag;
import org.cogroo.entities.tree.Leaf;
import org.cogroo.entities.tree.Node;
import org.cogroo.entities.tree.TreeElement;
import org.cogroo.interpreters.FlorestaTagInterpreter;
import org.cogroo.interpreters.TagInterpreter;
import cogroo.uima.ae.NewTagsetBaselineCogrooAE;
public class ProcessReport {
GrammarChecker pipe;
private String report;
private String output;
public ProcessReport(String resources, String report, String output)
throws IllegalArgumentException, IOException, ResourceInitializationException {
pipe = (GrammarChecker) NewTagsetBaselineCogrooAE.createCogroo();
this.report = report;
this.output = output;
}
private void processFile() throws IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(
new FileInputStream(this.report), "utf-8"));
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(this.output), "utf-8"));
String line = in.readLine();
while (line != null) {
out.append(line).append("\n");
String text = getText(line);
if (text != null) {
out.append(process(text)).append("\n\n");
}
line = in.readLine();
}
in.close();
out.close();
}
private String process(String text) {
CheckDocument doc = new CheckDocument();
doc.setText(text);
pipe.analyze(doc);
StringBuilder sb = new StringBuilder();
for (Sentence sentence : doc.getSentencesLegacy()) {
sb.append("Flat structure for: ").append(sentence.getSentence())
.append("\n");
for (Token token : sentence.getTokens()) {
// print the text
sb.append(token.getLexeme());
// print the lemma
sb.append(" [" + Arrays.toString(token.getPrimitive()) + " ");
// print the morphological tag, we use a tag interpreter here
if(token.getMorphologicalTag() == null) {
throw new IllegalArgumentException("Morphological tag is missing! " + sentence.toString());
}
sb.append("" + mtagToStr(token.getMorphologicalTag()) + "] ");
}
sb.append("\n").append("Syntax tree: " + sentence.getSyntaxTree());
}
return sb.toString();
}
private String getText(String line) {
if (line.startsWith("[")) {
String[] parts = line.split("\\t+");
return parts[parts.length - 1];
}
return null;
}
private static String printTree(Node root) {
StringBuffer sb = new StringBuffer();
// print itself and its children
for (int i = 0; i < root.getLevel(); i++) {
sb.append("\t");
}
sb.append(root.getSyntacticTag());
sb.append("{");
if (root.getMorphologicalTag() != null) {
sb.append(root.getMorphologicalTag());
}
sb.append("\n");
for (TreeElement element : root.getElements()) {
sb.append(printTree(element));
}
sb.append("\n");
for (int i = 0; i < root.getLevel(); i++) {
sb.append("\t");
}
sb.append("}\n");
return sb.toString();
}
private static String printTree(Leaf leaf) {
StringBuffer sb = new StringBuffer();
// print itself and its children
for (int i = 0; i < leaf.getLevel(); i++) {
sb.append("\t");
}
// print the text
sb.append(leaf.getLexeme());
// print the lemma
sb.append("\tlemma[" + leaf.getLemma() + "] ");
// print the morphological tag, we use a tag interpreter here
sb.append("\ttag[" + leaf.getMorphologicalTag() + "] \n");
return sb.toString();
}
private static String printTree(TreeElement element) {
if (element instanceof Node)
return printTree((Node) element);
else
return printTree((Leaf) element);
}
// tag interpreter is responsible for serializing and reading tags.
// .. the LegacyTagInterpreter follow a variant of GC tagset:
// .. http://beta.visl.sdu.dk/visl/pt/info/portsymbol.html
private static final TagInterpreter tagInterpreter = new FlorestaTagInterpreter();
private static String mtagToStr(MorphologicalTag tag) {
return tagInterpreter.serialize(tag);
}
/**
* @param args
* @throws IOException
* @throws IllegalArgumentException
* @throws ResourceInitializationException
*/
public static void main(String[] args) throws IOException, ResourceInitializationException, IllegalArgumentException {
System.out.println("Executing MultiCogroo ProcessReport...");
System.out.println(" path: " + args[0]);
System.out.println(" in: " + args[1]);
System.out.println(" out: " + args[2]);
ProcessReport pr = new ProcessReport(args[0], args[1], args[2]);
pr.processFile();
}
}