package org.maltparser.core.propagation;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.io.dataformat.ColumnDescription;
import org.maltparser.core.io.dataformat.DataFormatInstance;
import org.maltparser.core.propagation.spec.PropagationSpec;
import org.maltparser.core.symbol.SymbolTable;
import org.maltparser.core.syntaxgraph.edge.Edge;
import org.maltparser.core.syntaxgraph.node.DependencyNode;
/**
* A propagation object propagate a column value from one node to a column in another node based on the propagation specification.
*
* @author Johan Hall
*
*/
public class Propagation {
/**
*
*/
private SymbolTable fromTable;
private SymbolTable toTable;
private SymbolTable deprelTable;
private SortedSet<String> forSet;
private SortedSet<String> overSet;
private Pattern symbolSeparator;
/**
* Creates a propagation object based on the propagation specification
*
* @param spec a propagation specification
* @param dataFormatInstance a data format instance
* @throws MaltChainedException
*/
public Propagation(PropagationSpec spec, DataFormatInstance dataFormatInstance) throws MaltChainedException {
ColumnDescription fromColumn = dataFormatInstance.getColumnDescriptionByName(spec.getFrom());
if (fromColumn == null) {
throw new PropagationException("The symbol table '"+spec.getFrom()+" does not exists.");
}
fromTable = fromColumn.getSymbolTable();
ColumnDescription toColumn = dataFormatInstance.getColumnDescriptionByName(spec.getTo());
if (toColumn == null) {
toColumn = dataFormatInstance.addInternalColumnDescription(spec.getTo(), fromColumn);
toTable = toColumn.getSymbolTable();
}
forSet = new TreeSet<String>();
if (spec.getFor() != null && spec.getFor().length() > 0) {
String[] items = spec.getFor().split("\\|");
for (String item : items) {
forSet.add(item);
}
}
overSet = new TreeSet<String>();
if (spec.getOver() != null && spec.getOver().length() > 0) {
String[] items = spec.getOver().split("\\|");
for (String item : items) {
overSet.add(item);
}
}
ColumnDescription deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
deprelTable = deprelColumn.getSymbolTable();
symbolSeparator = Pattern.compile("\\|");
}
/**
* Propagate columns according to the propagation specification
*
* @param e an edge
* @throws MaltChainedException
*/
public void propagate(Edge e) throws MaltChainedException {
if (e != null && e.hasLabel(deprelTable) && !e.getSource().isRoot()) {
if (overSet.size() == 0 || overSet.contains(e.getLabelSymbol(deprelTable))) {
DependencyNode to = (DependencyNode)e.getSource();
DependencyNode from = (DependencyNode)e.getTarget();
String fromSymbol = null;
if (e.hasLabel(fromTable)) {
fromSymbol = e.getLabelSymbol(fromTable);
} else if (from.hasLabel(fromTable)) {
fromSymbol = from.getLabelSymbol(fromTable);
}
String propSymbol = null;
if (to.hasLabel(toTable)) {
propSymbol = union(fromSymbol, to.getLabelSymbol(toTable));
} else {
if (forSet.size() == 0 || forSet.contains(fromSymbol)) {
propSymbol = fromSymbol;
}
}
if (propSymbol != null) {
to.addLabel(toTable, propSymbol);
}
}
}
}
private String union(String fromSymbol, String toSymbol) {
SortedSet<String> symbolSet = new TreeSet<String>();
if (fromSymbol != null && fromSymbol.length() != 0) {
String[] fromSymbols = symbolSeparator.split(fromSymbol);
for (int i = 0; i < fromSymbols.length; i++) {
if (forSet.size() == 0 || forSet.contains(fromSymbols[i])) {
symbolSet.add(fromSymbols[i]);
}
}
}
if (toSymbol != null && toSymbol.length() != 0) {
String[] toSymbols = symbolSeparator.split(toSymbol);
for (int i = 0; i < toSymbols.length; i++) {
symbolSet.add(toSymbols[i]);
}
}
if (symbolSet.size() > 0) {
StringBuilder sb = new StringBuilder();
for (String symbol : symbolSet) {
sb.append(symbol);
sb.append('|');
}
sb.setLength(sb.length()-1);
return sb.toString();
}
return "";
}
@Override
public String toString() {
return "Propagation [forSet=" + forSet + ", fromTable=" + fromTable
+ ", overSet=" + overSet + ", toTable=" + toTable + "]";
}
}