/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.csniper.webapp.search.tgrep;
import java.util.Iterator;
import java.util.Stack;
import java.util.StringTokenizer;
import org.apache.commons.lang.mutable.MutableInt;
/**
* @author Richard Eckart de Castilho
*/
public class PennTreeUtils
{
public static String toText(String aTree)
{
return toText(parsePennTree(aTree));
}
public static String toText(PennTreeNode aNode)
{
StringBuilder buf = new StringBuilder();
toText(buf, aNode);
return buf.toString();
}
private static void toText(StringBuilder aBuffer, PennTreeNode aNode)
{
if (aNode.isTerminal()) {
if (aBuffer.length() > 0) {
aBuffer.append(" ");
}
String label = aNode.getLabel();
if ("-LRB-".equals(label)) {
aBuffer.append("(");
}
else if ("-RRB-".equals(label)) {
aBuffer.append(")");
}
else {
aBuffer.append(label);
}
}
else {
for (PennTreeNode n : aNode.getChildren()) {
toText(aBuffer, n);
}
}
}
public static PennTreeNode selectDfs(PennTreeNode aNode, int aIndex)
{
return dfs(aIndex, new MutableInt(0), aNode);
}
private static PennTreeNode dfs(int aTarget, MutableInt aIndex, PennTreeNode aNode)
{
if (aTarget == aIndex.intValue()) {
return aNode;
}
for (PennTreeNode n : aNode.getChildren()) {
aIndex.increment();
PennTreeNode r = dfs(aTarget, aIndex, n);
if (r != null) {
return r;
}
}
return null;
}
public static PennTreeNode parsePennTree(String aTree)
{
StringTokenizer st = new StringTokenizer(aTree, "() ", true);
PennTreeNode root = null;
Stack<PennTreeNode> stack = new Stack<PennTreeNode>();
boolean seenLabel = false;
int i = 0;
while (st.hasMoreTokens()) {
String t = st.nextToken().trim();
if (t.length() == 0) {
// Skip
}
else if ("(".equals(t)) {
PennTreeNode n = new PennTreeNode();
stack.push(n);
if (root == null) {
root = n;
}
seenLabel = false;
}
else if (")".equals(t)) {
PennTreeNode n = stack.pop();
if (!stack.isEmpty()) {
PennTreeNode p = stack.peek();
p.addChild(n);
}
}
else if (seenLabel) {
// If the node has two labels, its a leaf, add a new terminal node then.
PennTreeNode p = stack.peek();
PennTreeNode n = new PennTreeNode();
n.setTokenIndex(i);
i++;
n.setLabel(t);
p.addChild(n);
}
else {
PennTreeNode n = stack.peek();
n.setLabel(t);
seenLabel = true;
}
}
return root;
}
public static String toPennTree(PennTreeNode aNode)
{
StringBuilder sb = new StringBuilder();
toPennTree(sb, aNode);
return sb.toString().trim();
}
private static void toPennTree(StringBuilder aSb, PennTreeNode aNode)
{
if (!aNode.isTerminal()) {
aSb.append('(');
}
aSb.append(aNode.getLabel());
if (!aNode.isTerminal()) {
aSb.append(' ');
Iterator<PennTreeNode> i = aNode.getChildren().iterator();
while (i.hasNext()) {
toPennTree(aSb, i.next());
if (i.hasNext()) {
aSb.append(' ');
}
}
aSb.append(')');
}
}
}