CollinsDependency.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.trees; 
import edu.stanford.nlp.util.logging.Redwood;

import java.util.*;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.trees.CollinsRelation.Direction;
import edu.stanford.nlp.util.Generics;

/**
 * Extracts bilexical dependencies from Penn Treebank-style phrase structure trees
 * as described in (Collins, 1999) and the later Comp. Ling. paper (Collins, 2003).
 *
 * @author Spence Green
 *
 */
public class CollinsDependency implements Dependency<CoreLabel, CoreLabel, String>  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(CollinsDependency.class);

	private static final long serialVersionUID = -4236496863919294754L;

	private static final String normPOSLabel = "TAG";

	private final CoreLabel modifier;
	private final CoreLabel head;
	private final CollinsRelation relation;

	/**
	 * Modifier must have IndexAnnotation. If head has 0 as its index, then it is
	 * the start symbol ("boundary symbol" in the Dan Klein code).
	 *
	 * @param modifier
	 * @param head
	 * @param rel
	 */
	public CollinsDependency(CoreLabel modifier, CoreLabel head, CollinsRelation rel) {

		if(modifier.index() == 0)
			throw new RuntimeException("No index annotation for " + modifier.toString());

		this.modifier = modifier;
		this.head = head;
		relation = rel;
	}

	public CollinsRelation getRelation() { return relation; }

	public DependencyFactory dependencyFactory() { return null; }

	public CoreLabel dependent() { return modifier; }

	public CoreLabel governor() { return head; }

	public boolean equalsIgnoreName(Object o) { return this.equals(o); }

	public String name() { return "CollinsBilexicalDependency"; }

	public String toString(String format) { return toString(); }


	private static CoreLabel makeStartLabel(String label) {
		CoreLabel root = new CoreLabel();
		root.set(CoreAnnotations.ValueAnnotation.class, label);
		root.set(CoreAnnotations.IndexAnnotation.class, 0);
		return root;
	}


	public static Set<CollinsDependency> extractFromTree(Tree t, String startSymbol, HeadFinder hf) {
		return extractFromTree(t,startSymbol,hf,false);
	}

	public static Set<CollinsDependency> extractNormalizedFromTree(Tree t, String startSymbol, HeadFinder hf) {
		return extractFromTree(t,startSymbol,hf,true);
	}

	/**
	 * This method assumes that a start symbol node has been added to the tree.
	 *
	 * @param t  The tree
	 * @param hf  A head finding algorithm.
	 * @return A set of dependencies
	 */
	private static Set<CollinsDependency> extractFromTree(Tree t, String startSymbol, HeadFinder hf, boolean normPOS) {
		if(t == null || startSymbol.equals("") || hf == null) return null;

		final Set<CollinsDependency> deps = Generics.newHashSet();

		if(t.value().equals(startSymbol)) t = t.firstChild();

		boolean mustProcessRoot = true;
		for(final Tree node : t) {
			if(node.isLeaf() || node.numChildren() < 2) continue;

			final Tree headDaughter = hf.determineHead(node);
			final Tree head = node.headTerminal(hf);

			if(headDaughter == null || head == null) {
				log.info("WARNING: CollinsDependency.extractFromTree() could not find root for:\n" + node.pennString());

			} else { //Make dependencies
				if(mustProcessRoot) {
					mustProcessRoot = false;
					final CoreLabel startLabel = makeStartLabel(startSymbol);
					deps.add(new CollinsDependency(new CoreLabel(head.label()), startLabel, new CollinsRelation(startSymbol, startSymbol, node.value(), Direction.Right)));
				}

				Direction dir = Direction.Left;
				for(final Tree daughter : node.children()) {

					if(daughter.equals(headDaughter)) {
						dir = Direction.Right;

					} else {
						final Tree headOfDaughter = daughter.headTerminal(hf);

						final String relParent = (normPOS && node.isPreTerminal()) ? normPOSLabel : node.value();
						final String relHead = (normPOS && headDaughter.isPreTerminal()) ? normPOSLabel : headDaughter.value();
						final String relModifier = (normPOS && daughter.isPreTerminal()) ? normPOSLabel : daughter.value();

						final CollinsDependency newDep =
							new CollinsDependency(new CoreLabel(headOfDaughter.label()), new CoreLabel(head.label()), new CollinsRelation(relParent, relHead, relModifier, dir));

						deps.add(newDep);
					}
				}
			}
		}

		//TODO Combine the indexing procedure above with yield here so that two searches aren't performed.
		if(t.yield().size() != deps.size()) {
			System.err.printf("WARNING: Number of extracted dependencies (%d) does not match yield (%d):\n", deps.size(), t.yield().size());
			log.info(t.pennString());
			log.info();
			int num = 0;
			for(CollinsDependency dep : deps)
				log.info(num++ + ": " + dep.toString());
		}

		return deps;
	}


	@Override
	public String toString() {
		return String.format("%s (%d)   %s (%d)  <%s>", modifier.value(),modifier.index(),head.value(),head.index(),relation.toString());
	}

	@Override
	public boolean equals(Object other) {
		if (this == other)
			return true;
		if (!(other instanceof CollinsDependency))
			return false;

		final CollinsDependency otherDep = (CollinsDependency) other;

		return (modifier.equals(otherDep.modifier) &&
				head.equals(otherDep.head) &&
				relation.equals(otherDep.relation));
	}

	@Override
	public int hashCode() {
		int hash = 1;
		hash *= (31 + modifier.index());
		hash *= 138 * head.value().hashCode();
		return hash;
	}

}