/**
* Copyright 2014, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.clir.clearnlp.conversion;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import edu.emory.clir.clearnlp.constituent.CTNode;
import edu.emory.clir.clearnlp.constituent.CTTagEn;
import edu.emory.clir.clearnlp.constituent.CTTree;
import edu.emory.clir.clearnlp.conversion.headrule.HeadRule;
import edu.emory.clir.clearnlp.conversion.headrule.HeadRuleMap;
import edu.emory.clir.clearnlp.conversion.headrule.HeadTagSet;
import edu.emory.clir.clearnlp.dependency.DEPNode;
import edu.emory.clir.clearnlp.dependency.DEPTree;
import edu.emory.clir.clearnlp.util.PatternUtils;
/**
* Abstract constituent to dependency converter.
* @since 3.0.0
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
*/
abstract public class AbstractC2DConverter
{
protected HeadRuleMap m_headrules;
protected HeadRule r_default;
public AbstractC2DConverter(HeadRuleMap headrules, HeadRule defaultRule)
{
m_headrules = headrules;
r_default = defaultRule;
}
/**
* Sets the head of the specific node and all its sub-nodes.
* Calls {@link AbstractC2DConverter#findHeads(CTNode)}.
*/
protected void setHeads(CTNode curr)
{
// terminal nodes become the heads of themselves
if (curr.isTerminal())
{
curr.setC2DInfo(new C2DInfo(curr));
return;
}
// set the heads of all children
for (CTNode child : curr.getChildrenList())
setHeads(child);
// stop traversing if it is the top node
if (curr.isConstituentTag(CTTagEn.TOP))
return;
// only one child
if (curr.getChildrenSize() == 1)
{
curr.setC2DInfo(new C2DInfo(curr.getChild(0)));
return;
}
// find the headrule of the current node
HeadRule rule = m_headrules.get(curr.getConstituentTag());
if (rule == null)
{
System.err.println("Error: headrules not found for \""+curr.getConstituentTag()+"\"");
rule = r_default;
}
// abstract method
setHeadsAux(rule, curr);
}
/**
* @return the head of the specific node list according to the specific headrule.
* Every other node in the list becomes the dependent of the head node.
* @param rule the headrule to be consulted.
* @param nodes the list of nodes.
* @param flagSize the number of head flags.
*/
protected CTNode getHead(HeadRule rule, List<CTNode> nodes, int flagSize)
{
CTNode head = getDefaultHead(nodes);
if (head == null)
{
nodes = new ArrayList<>(nodes);
if (rule.isRightToLeft()) Collections.reverse(nodes);
int i, size = nodes.size(), flag;
int[] flags = new int[size];
CTNode child;
for (i=0; i<size; i++)
flags[i] = getHeadFlag(nodes.get(i));
outer: for (flag=0; flag<flagSize; flag++)
{
for (HeadTagSet tagset : rule.getHeadTags())
{
for (i=0; i<size; i++)
{
child = nodes.get(i);
if (flags[i] == flag && tagset.matches(child))
{
head = child;
break outer;
}
}
}
}
outer: for (flag=0; flag<flagSize; flag++)
{
for (HeadTagSet tagset : rule.getHeadTags())
{
for (i=0; i<size; i++)
{
child = nodes.get(i);
if (flags[i] == flag && tagset.matches(child))
{
head = child;
break outer;
}
}
}
}
}
if (head == null)
throw new IllegalStateException("Head not found");
CTNode parent = head.getParent();
for (CTNode node : nodes)
{
if (node != head && !node.getC2DInfo().hasHead())
node.getC2DInfo().setHead(head, getDEPLabel(node, parent, head));
}
return head;
}
private CTNode getDefaultHead(List<CTNode> nodes)
{
CTNode head = null;
for (CTNode node : nodes)
{
if (!node.isEmptyCategoryTerminal())
{
if (head != null) return null;
head = node;
}
}
return head;
}
/** @return the dependency tree converted from the specific constituent tree without head information. */
protected DEPTree initDEPTree(CTTree cTree)
{
List<CTNode> cNodes = cTree.getTokenList();
List<DEPNode> dNodes = new ArrayList<>();
String form, pos;
DEPNode dNode;
int id;
for (CTNode cNode : cNodes)
{
id = cNode.getTokenID() + 1;
form = PatternUtils.revertBrackets(cNode.getWordForm());
pos = cNode.getConstituentTag();
dNode = new DEPNode(id, form, pos, cNode.getC2DInfo().getDEPFeat());
dNode.initSecondaryHeads();
dNodes.add(dNode);
}
return new DEPTree(dNodes);
}
/**
* Sets the head of the specific constituent node using the specific headrule.
* Called by {@link #setHeads(CTNode)}.
*/
abstract protected void setHeadsAux(HeadRule rule, CTNode curr);
/**
* @return the head flag of the specific constituent node.
* @see EnglishC2DConverter#getHeadFlag(CTNode).
*/
abstract protected int getHeadFlag(CTNode child);
/**
* Returns a dependency label given the specific phrase structure.
* @param C the current node.
* @param P the parent of {@code C}.
* @param p the head of {@code P}.
* @return a dependency label given the specific phrase structure.
*/
abstract protected String getDEPLabel(CTNode C, CTNode P, CTNode p);
/**
* Returns the dependency tree converted from the specific constituent tree.
* If the constituent tree contains only empty categories, returns {@code null}.
* @param cTree the constituent tree to convert.
* @return the dependency tree converted from the specific constituent tree.
*/
abstract public DEPTree toDEPTree(CTTree cTree);
}