/** * Copyright 2014, Emory University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.emory.clir.clearnlp.dependency; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.StringJoiner; import java.util.function.BiPredicate; import java.util.regex.Pattern; import edu.emory.clir.clearnlp.collection.list.SortedArrayList; import edu.emory.clir.clearnlp.collection.pair.Pair; import edu.emory.clir.clearnlp.collection.set.IntHashSet; import edu.emory.clir.clearnlp.feature.type.DirectionType; import edu.emory.clir.clearnlp.feature.type.FieldType; import edu.emory.clir.clearnlp.reader.TSVReader; import edu.emory.clir.clearnlp.util.DSUtils; import edu.emory.clir.clearnlp.util.StringUtils; import edu.emory.clir.clearnlp.util.arc.AbstractArc; import edu.emory.clir.clearnlp.util.arc.DEPArc; import edu.emory.clir.clearnlp.util.arc.SRLArc; import edu.emory.clir.clearnlp.util.constant.StringConst; /** * @since 3.0.0 * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ public class DEPNode implements Comparable<DEPNode>, Serializable { private static final long serialVersionUID = 3794720014142939766L; /** The ID of this node (default: {@link DEPLib#NULL_ID}). */ private int n_id; /** The word-form of this node. */ private String s_wordForm; /** The simplified word-form of this node. */ private String s_simplifiedWordForm; /** The lemma of the word-form. */ private String s_lemma; /** The part-of-speech tag of the word-form. */ private String s_posTag; /** The named entity tag of this node. */ private String s_namedEntityTag; /** The extra features of this node. */ private DEPFeat d_feats; /** The dependency label of this node. */ private String s_label; /** The dependency head of this node. */ private DEPNode d_head; /** The sorted list of all dependents of this node (default: empty). */ private SortedArrayList<DEPNode> l_dependents; /** The ID of this node among its sibling (starting with 0). */ private int n_siblingID; /** The list of secondary heads of this node (default: empty). */ private List<DEPArc> x_heads; /** The list of semantic heads of this node (default: empty). */ private List<SRLArc> s_heads; // ====================================== Constructors ====================================== /** * Construct an empty DEPNode. */ public DEPNode() {} /** * Construct DEPNode with id and word-from. * @param id id of the node * @param form word-from of the node */ public DEPNode(int id, String form) { init(id, form, null, null, null, new DEPFeat()); } /** * Construct DEPNode with id, word-from, POS tag, and extra features. * @param id id of the node * @param form word-from of the node * @param posTag POS tag of the node * @param feats extra features of the node */ public DEPNode(int id, String form, String posTag, DEPFeat feats) { init(id, form, null, posTag, null, feats); } /** * Construct DEPNode with id, word-from, word-form lemma, POS tag, and extra features. * @param id id of the node * @param form word-from of the node * @param lemma word-form lemma of the node * @param posTag POS tag of the node * @param feats extra features of the node */ public DEPNode(int id, String form, String lemma, String posTag, DEPFeat feats) { init(id, form, lemma, posTag, null, feats); } /** * Construct DEPNode with id, word-from, word-form lemma, POS tag, name-entity tag, and extra features. * @param id id of the node * @param form word-from of the node * @param lemma word-form lemma of the node * @param posTag POS tag of the node * @param namedEntityTag name-entity tag of the node * @param feats extra features of the node */ public DEPNode(int id, String form, String lemma, String posTag, String namedEntityTag, DEPFeat feats) { init(id, form, lemma, posTag, namedEntityTag, feats); } /** * Copy constuctor that copies the basic fields from the specific node to this node. * @param node another DEPNode you wish to copy */ public DEPNode(DEPNode node) { init(node.n_id, node.s_wordForm, node.s_lemma, node.s_posTag, node.s_namedEntityTag, new DEPFeat(node.d_feats)); } // ====================================== Initialization ====================================== /** * Initializes all fields of a DEPNode. * @param id id of the node * @param form word-from of the node * @param lemma word-form lemma of the node * @param posTag POS tag of the node * @param namedEntityTag name-entity tag of the node * @param feats extra features of the node */ public void init(int id, String form, String lemma, String posTag, String namedEntityTag, DEPFeat feats) { setID(id); setWordForm(form); setLemma(lemma); setPOSTag(posTag); setNamedEntityTag(namedEntityTag); setFeats(feats); setLabel(null); setHead(null); l_dependents = new SortedArrayList<>(); initSemanticHeads(); } /** Initializes this node as an artificial root node. */ public void initRoot() { init(DEPLib.ROOT_ID, DEPLib.ROOT_TAG, DEPLib.ROOT_TAG, DEPLib.ROOT_TAG, DEPLib.ROOT_TAG, new DEPFeat()); } /** Initializes the secondary dependency heads of a node. */ public void initSecondaryHeads() { x_heads = new ArrayList<>(); } /** Initializes semantic heads of this node. */ public void initSemanticHeads() { s_heads = new ArrayList<>(); } /** Clear all dependencies(head, label, and sibling relations) of the node. */ void clearDependencies() { d_head = null; s_label = null; n_siblingID = 0; l_dependents.clear(); } // ====================================== Basic fields ====================================== /** * Get the ID of the node. * @return ID of the node */ public int getID() { return n_id; } /** * Get the word-form of the node. * @return word-form of the node */ public String getWordForm() { return s_wordForm; } /** * Get the simplified word-from of the node. * @return simplified word-form of the node */ public String getSimplifiedWordForm() { return s_simplifiedWordForm; } /** * Get the simplified word-form of the node in all lower-case characters. * @return simplified word-from of the node in all lower-case characters */ public String getLowerSimplifiedWordForm() { return StringUtils.toLowerCase(s_simplifiedWordForm); } /** * Get the word shape of the simplified word-form of the node. * @param maxRepetitions the max count of repetition of a word shape in sequence * @return the word shape of a node's simplified word-form */ public String getWordShape(int maxRepetitions) { return StringUtils.getShape(s_simplifiedWordForm, maxRepetitions); } /** * Get the lemma of the word-form of the node. * @return lemma of the word-form of the node */ public String getLemma() { return s_lemma; } /** * Get the POS tag the node. * @return POS tag the node */ public String getPOSTag() { return s_posTag; } /** * Get the name-entity tag of the node. * @return name-entity tag of the node */ public String getNamedEntityTag() { return s_namedEntityTag; } /** * Get the extra features {@code DEDFeat} of the node. * @return extra features of the node */ public DEPFeat getFeats() { return d_feats; } /** * Get a specific feature of the extra features of the node. * @param key feature label of the extra feature * @return the value of the specific feature if exists; otherwise, {@code null} */ public String getFeat(String key) { return d_feats.get(key); } /** * Set the ID of the node. * @param id ID of the node */ public void setID(int id) { n_id = id; } /** * Set the simplified word-form of the node. * @param form simplified word-form of the node */ public void setWordForm(String form) { s_wordForm = form; s_simplifiedWordForm = StringUtils.toSimplifiedForm(form); // b_punctuation = StringUtils.containsPunctuationOnly(s_simplifiedWordForm); } /** * Set the lemma of the word-form of the node. * @param lemma lemma of the word-form of the node */ public void setLemma(String lemma) { s_lemma = lemma; } /** * Set the POS tag of the node. * @param posTag POS tag of the node */ public void setPOSTag(String posTag) { s_posTag = posTag; } /** * Set the name-entity tag of the node. * @param namedEntityTag name-entity tag of the node */ public void setNamedEntityTag(String namedEntityTag) { s_namedEntityTag = namedEntityTag; } /** * Set the extra features {@code DEPFeat} of the node. * @param feats extra features of the node */ public void setFeats(DEPFeat feats) { d_feats = feats; } /** * Puts an extra feature to this node using the specific key and value. * This method overwrites an existing value of the same key with the current value. * @param key key of the extra feature * @param value value of the extra feature */ public void putFeat(String key, String value) { d_feats.put(key, value); } /** * Clear the POS tag of the node to {@code null} and return the POS tag of the node. * @return the removed POS tag of the node */ public String clearPOSTag() { String pos = s_posTag; setPOSTag(null); return pos; } public String clearNamedEntityTag() { String ner = s_namedEntityTag; setNamedEntityTag(null); return ner; } /** * Removes the extra feature with the specific key. * @param key key of the extra feature * @return value of the removed extra feature */ public String removeFeat(String key) { return d_feats.remove(key); } // ====================================== Getters ====================================== /** * Get the dependency label of the node. * @return the dependency label of the node */ public String getLabel() { return s_label; } /** * Get the dependency head of this node. * @return the dependency head of this node */ public DEPNode getHead() { return d_head; } /** * Get the dependency grand-head of the node. * @return the dependency grand-head of the node if exists; otherwise, {@code null}. */ public DEPNode getGrandHead() { DEPNode head = getHead(); return (head == null) ? null : head.getHead(); } /** * Get the left nearest sibling node of the node. * Calls {@link #getLeftNearestSibling(int)}, where {@code order=0} * @return the left nearest sibling node */ public DEPNode getLeftNearestSibling() { return getLeftNearestSibling(0); } /** * Get the left sibling node with input displacement (0 - leftmost, 1 - second leftmost, etc.). * @param order left displacement * @return the left sibling node with input displacement */ public DEPNode getLeftNearestSibling(int order) { if (d_head != null) { order = n_siblingID - order - 1; if (order >= 0) return d_head.getDependent(order); } return null; } public DEPNode getLeftNearestSibling(String label) { if (d_head != null) { DEPNode node; for (int i=n_siblingID-1; i>=0; i--) { node = d_head.getDependent(i); if (node.isLabel(label)) return node; } } return null; } /** * Get the right nearest sibling node of the node. * Calls {@link #getRightNearestSibling(int)}, where {@code order=0}. * @return the right nearest sibling node */ public DEPNode getRightNearestSibling() { return getRightNearestSibling(0); } /** * Get the right sibling node with input displacement (0 - rightmost, 1 - second rightmost, etc.). * @param order right displacement * @return the right sibling node with input displacement */ public DEPNode getRightNearestSibling(int order) { if (d_head != null) { order = n_siblingID + order + 1; if (order < d_head.getDependentSize()) return d_head.getDependent(order); } return null; } public DEPNode getRightNearestSibling(String label) { if (d_head != null) { int i, size = d_head.getDependentSize(); DEPNode node; for (i=n_siblingID+1; i<size; i++) { node = d_head.getDependent(i); if (node.isLabel(label)) return node; } } return null; } /** * Get the left most dependency node of the node. * Calls {@link #getLeftMostDependent(int)}, where {@code order=0} * @return the left most dependency node of the node */ public DEPNode getLeftMostDependent() { return getLeftMostDependent(0); } /** * Get the left dependency node with input displacement (0 - leftmost, 1 - second leftmost, etc.). * The leftmost dependent must be on the left-hand side of this node. * @param order left displacement * @return the leftmost dependent of this node if exists; otherwise, {@code null} */ public DEPNode getLeftMostDependent(int order) { if (DSUtils.isRange(l_dependents, order)) { DEPNode dep = getDependent(order); if (dep.n_id < n_id) return dep; } return null; } /** * Get the right most dependency node of the node. * Calls {@link #getRightMostDependent(int)}, where {@code order=0}. * @return the right most dependency node of the node */ public DEPNode getRightMostDependent() { return getRightMostDependent(0); } /** * Get the right dependency node with input displacement (0 - rightmost, 1 - second rightmost, etc.). * The rightmost dependent must be on the right-hand side of this node. * @param order right displacement * @return the rightmost dependent of this node if exists; otherwise, {@code null} */ public DEPNode getRightMostDependent(int order) { order = getDependentSize() - 1 - order; if (DSUtils.isRange(l_dependents, order)) { DEPNode dep = getDependent(order); if (dep.n_id > n_id) return dep; } return null; } /** * Get the left nearest dependency node. * Calls {@link #getLeftNearestDependent(int)}, where {@code order=0}. * @return the left nearest dependency node */ public DEPNode getLeftNearestDependent() { return getLeftNearestDependent(0); } /** * Get the left nearest dependency node with input displacement (0 - left-nearest, 1 - second left-nearest, etc.). * The left nearest dependent must be on the left-hand side of this node. * @param order left displacement * @return the left-nearest dependent of this node if exists; otherwise, {@code null} */ public DEPNode getLeftNearestDependent(int order) { int index = l_dependents.getInsertIndex(this) - order - 1; return (index >= 0) ? getDependent(index) : null; } /** * Get the right nearest dependency node. * Calls {@link #getRightNearestDependent(int)}, where {@code order=0}. * @return the right nearest dependency node */ public DEPNode getRightNearestDependent() { return getRightNearestDependent(0); } /** * Get the right nearest dependency node with input displacement (0 - right-nearest, 1 - second right-nearest, etc.). * The right-nearest dependent must be on the right-hand side of this node. * @param order right displacement * @return the right-nearest dependent of this node if exists; otherwise, {@code null} */ public DEPNode getRightNearestDependent(int order) { int index = l_dependents.getInsertIndex(this) + order; return (index < getDependentSize()) ? getDependent(index) : null; } public DEPNode getFirstDependent(BiPredicate<DEPNode,String> p, String tag) { for (DEPNode node : l_dependents) { if (p.test(node, tag)) return node; } return null; } /** * Get the first dependency node of the node by label. * @param label string label of the first-dependency node * @return the first-dependency node of the specific label */ public DEPNode getFirstDependentByLabel(String label) { return getFirstDependent((n, t) -> n.isLabel(t), label); } public DEPNode getFirstDependentByPOS(String label) { return getFirstDependent((n, t) -> n.isPOSTag(t), label); } public DEPNode getFirstDependentByLemma(String lemma) { return getFirstDependent((n, t) -> n.isLemma(t), lemma); } /** * Get the first dependency node of the node by label. * @param pattern pattern label of the first-dependency node * @return the first-dependency node of the specific label */ public DEPNode getFirstDependentByLabel(Pattern pattern) { for (DEPNode node : l_dependents) { if (node.isLabel(pattern)) return node; } return null; } /** * Get the list of all the dependency nodes of the node. * @return list of all the dependency nodes of the node */ public List<DEPNode> getDependentList() { return l_dependents; } /** * Get the list of all the dependency nodes of the node by label. * @param label string label * @return list of all the dependency nodes of the node by label */ public List<DEPNode> getDependentListByLabel(String label) { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (node.isLabel(label)) list.add(node); } return list; } /** * Get the list of all the dependency nodes of the node by labels set. * @param label labels set * @return list of all the dependency nodes of the node by labels set */ public List<DEPNode> getDependentListByLabel(Set<String> labels) { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (labels.contains(node.getLabel())) list.add(node); } return list; } /** * Get the list of all the dependency nodes of the node by label pattern. * @param label label pattern * @return list of all the dependency nodes of the node by label pattern */ public List<DEPNode> getDependentListByLabel(Pattern pattern) { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (node.isLabel(pattern)) list.add(node); } return list; } /** * Get the list of all the left dependency nodes of the node. * @return list of all the left dependency nodes of the node */ public List<DEPNode> getLeftDependentList() { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (node.n_id > n_id) break; list.add(node); } return list; } /** * Get the list of all the left dependency nodes of the node by label pattern. * @param label label pattern * @return list of all the left dependency nodes of the node by label pattern */ public List<DEPNode> getLeftDependentListByLabel(Pattern pattern) { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (node.n_id > n_id) break; if (node.isLabel(pattern)) list.add(node); } return list; } /** * Get the list of all the right dependency nodes of the node. * @return list of all the right dependency nodes of the node */ public List<DEPNode> getRightDependentList() { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (node.n_id < n_id) continue; list.add(node); } return list; } /** * Get the list of all the right dependency nodes of the node by label pattern. * @param label label pattern * @return list of all the right dependency nodes of the node by label pattern */ public List<DEPNode> getRightDependentListByLabel(Pattern pattern) { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) { if (node.n_id < n_id) continue; if (node.isLabel(pattern)) list.add(node); } return list; } /** * Get the list of all grand-dependents of the node. * @return an unsorted list of grand-dependents of the node */ public List<DEPNode> getGrandDependentList() { List<DEPNode> list = new ArrayList<>(); for (DEPNode node : l_dependents) list.addAll(node.getDependentList()); return list; } /** * Get the list of all descendant nodes of the node with specified height. * If {@code height == 1}, return {@link #getDependentList()}. * If {@code height > 1} , return all descendants within the depth. * If {@code height < 1} , return an empty list. * @param height height level of the descendant nodes * @return an unsorted list of descendants. */ public List<DEPNode> getDescendantList(int height) { List<DEPNode> list = new ArrayList<>(); if (height > 0) getDescendantListAux(this, list, height-1); return list; } private void getDescendantListAux(DEPNode node, List<DEPNode> list, int height) { list.addAll(node.getDependentList()); if (height > 0) { for (DEPNode dep : node.getDependentList()) getDescendantListAux(dep, list, height-1); } } /** * Get any descendant node with POS tag. * @param tag POS tag * @return s descendant node with the POS tag */ public DEPNode getAnyDescendantByPOSTag(String tag) { return getAnyDescendantByPOSTagAux(this, tag); } private DEPNode getAnyDescendantByPOSTagAux(DEPNode node, String tag) { for (DEPNode dep : node.getDependentList()) { if (dep.isPOSTag(tag)) return dep; dep = getAnyDescendantByPOSTagAux(dep, tag); if (dep != null) return dep; } return null; } /** * Get the sorted list of all the nodes in the subtree of the node. * @return a sorted list of nodes in the subtree of this node (inclusive) */ public List<DEPNode> getSubNodeList() { List<DEPNode> list = new ArrayList<>(); getSubNodeCollectionAux(list, this); Collections.sort(list); return list; } /** * Get a set of all the nodes is the subtree of the node. * @return a set of nodes in the subtree of this node (inclusive) */ public Set<DEPNode> getSubNodeSet() { Set<DEPNode> set = new HashSet<>(); getSubNodeCollectionAux(set, this); return set; } private void getSubNodeCollectionAux(Collection<DEPNode> col, DEPNode node) { col.add(node); for (DEPNode dep : node.getDependentList()) getSubNodeCollectionAux(col, dep); } /** * Get the IntHashSet of all the nodes in the subtree (Node ID -> DEPNode). * @return the ntHashSet of all the nodes in the subtree (inclusive) */ public IntHashSet getSubNodeIDSet() { IntHashSet set = new IntHashSet(); getSubNodeIDSetAux(set, this); return set; } private void getSubNodeIDSetAux(IntHashSet set, DEPNode node) { set.add(node.n_id); for (DEPNode dep : node.getDependentList()) getSubNodeIDSetAux(set, dep); } /** * Get a sorted array of IDs of all the nodes in the subtree of the node. * @return a sorted array of IDs from the subtree of the node (inclusive) */ public int[] getSubNodeIDSortedArray() { IntHashSet set = getSubNodeIDSet(); int[] list = set.toArray(); Arrays.sort(list); return list; } /** * Get the dependency node with specific index. * @return the dependency node of the node with the specific index if exists; otherwise, {@code null}. * @throws IndexOutOfBoundsException */ public DEPNode getDependent(int index) { return l_dependents.get(index); } /** * Get the index of the dependency node of a specified DEPNode. * If the specific node is not a dependent of this node, returns a negative number. * @return the index of the dependent node among other siblings (starting with 0). */ public int getDependentIndex(DEPNode node) { return l_dependents.indexOf(node); } /** * Get the size of the dependents of the node. * @return the number of dependents of the node */ public int getDependentSize() { return l_dependents.size(); } /** * Get the the valency of the node. * @param direction DirectionType of l, r, a * @return "0" - no dependents, "<" - left dependents, ">" - right dependents, "<>" - left and right dependents. */ public String getValency(DirectionType direction) { switch (direction) { case l: return getLeftValency(); case r: return getRightValency(); case a: return getLeftValency()+"-"+getRightValency(); default: return null; } } /** * Get the left valency of the node. * @return "<" - left dependents */ public String getLeftValency() { StringBuilder build = new StringBuilder(); if (getLeftMostDependent() != null) { build.append(StringConst.LESS_THAN); if (getLeftMostDependent(1) != null) build.append(StringConst.LESS_THAN); } return build.toString(); } /** * Get the right valency of the node. * @return ">" - right dependents */ public String getRightValency() { StringBuilder build = new StringBuilder(); if (getRightMostDependent() != null) { build.append(StringConst.GREATER_THAN); if (getRightMostDependent(1) != null) build.append(StringConst.GREATER_THAN); } return build.toString(); } /** * Get sub-categorization of the node. * @param direction direction DirectionType of l, r, a * @param field FieldType of tag feature * @return "< {@code TagFeature}" for left sub-categorization, "> {@code TagFeature}" for right-categorization, and {@code null} if not exist */ public String getSubcategorization(DirectionType direction, FieldType field) { switch (direction) { case l: return getLeftSubcategorization (field); case r: return getRightSubcategorization(field); case a: String left = getLeftSubcategorization(field); if (left == null) return getRightSubcategorization(field); String right = getRightSubcategorization(field); return (right == null) ? left : left+right; default: return null; } } /** * Get left sub-categorization of the node. * @param field FieldType of tag feature * @return "< {@code TagFeature}" for left sub-categorization, {@code null} if not exist. */ public String getLeftSubcategorization(FieldType field) { StringBuilder build = new StringBuilder(); int i, size = getDependentSize(); DEPNode node; for (i=0; i<size; i++) { node = getDependent(i); if (node.getID() > n_id) break; build.append(StringConst.LESS_THAN); build.append(node.getTagFeature(field)); } return build.length() > 0 ? build.toString() : null; } /** * Get right sub-categorization of the node. * @param field FieldType of tag feature * @return "> {@code TagFeature}" for right sub-categorization, {@code null} if not exist. */ public String getRightSubcategorization(FieldType field) { StringBuilder build = new StringBuilder(); int i, size = getDependentSize(); DEPNode node; for (i=size-1; i>=0; i--) { node = getDependent(i); if (node.getID() < n_id) break; build.append(StringConst.GREATER_THAN); build.append(node.getTagFeature(field)); } return build.length() > 0 ? build.toString() : null; } /** * Find the path of between this nodes and the input DEPNode. * @param node the node that you want to find the path from this node * @param field FieldType of the the node for search * @return the path between the two nodes */ public String getPath(DEPNode node, FieldType field) { DEPNode lca = getLowestCommonAncestor(node); return (lca != null) ? getPath(node, lca, field) : null; } /** * Find the path of between this nodes and the input DEPNode with the lowest common ancestor specified. * @param node the node that you want to find the path from this node * @param lca the lowest common ancestor DEPNode that you specified for the path * @param field FieldType of the the node for search * @return the path between the two nodes */ public String getPath(DEPNode node, DEPNode lca, FieldType field) { if (node == lca) return getPathAux(lca, this, field, "^", true); if (this == lca) return getPathAux(lca, node, field, "|", true); return getPathAux(lca, this, field, "^", true) + getPathAux(lca, node, field, "|", false); } private String getPathAux(DEPNode top, DEPNode bottom, FieldType field, String delim, boolean includeTop) { StringBuilder build = new StringBuilder(); DEPNode node = bottom; int dist = 0; String s; do { s = node.getTagFeature(field); if (s != null) { build.append(delim); build.append(s); } else { dist++; } node = node.getHead(); } while (node != top && node != null); if (field == FieldType.t) { build.append(delim); build.append(dist); } else if (field != FieldType.d && includeTop) { build.append(delim); build.append(top.getTagFeature(field)); } return build.length() == 0 ? null : build.toString(); } /** * Get a set of all the ancestor nodes of the node (ie. Parent node, Grandparent node, etc.). * @return set of all the ancestor nodes */ public Set<DEPNode> getAncestorSet() { Set<DEPNode> set = new HashSet<>(); DEPNode node = getHead(); while (node != null) { set.add(node); node = node.getHead(); } return set; } /** * Get the first/lowest common ancestor of the two given nodes (this node and the input DEPNode). * @param node the node that you want to find the lowest common ancestor with the node with * @return the lowest common ancestor of the node and the specified node */ public DEPNode getLowestCommonAncestor(DEPNode node) { Set<DEPNode> set = getAncestorSet(); set.add(this); while (node != null) { if (set.contains(node)) return node; node = node.getHead(); } return null; } /** * Get a specific tag feature of the node. * @param field FieldType of the feature * @return the value of the feature in the node */ public String getTagFeature(FieldType field) { switch (field) { case f : return getWordForm(); case m : return getLemma(); case p : return getPOSTag(); case n : return getNamedEntityTag(); case d : return getLabel(); default: return null; } } public String getFormFeature(FieldType field) { switch (field) { case f : return getWordForm(); case m : return getLemma(); case f2: return getSimplifiedWordForm(); case f3: return getLowerSimplifiedWordForm(); default: return null; } } // ====================================== Setters ====================================== /** * Sets the dependency label of this node with the specific label. * @param label label of the node */ public void setLabel(String label) { s_label = label; } /** * Sets the dependency head of this node with the specific node. * @param node head node of the node */ public void setHead(DEPNode node) { if (hasHead()) d_head.l_dependents.remove(this); if (node != null) n_siblingID = node.l_dependents.addItem(this); d_head = node; } /** * Sets the dependency head of this node with the specific node and the label. * @param node head node of the node * @param label label of the node */ public void setHead(DEPNode node, String label) { setHead (node); setLabel(label); } /** * Add the node as a dependent to a specified node. * @param node head node that you wish to add the node as a dependent to */ public void addDependent(DEPNode node) { node.setHead(this); } /** * Add the node as a dependent to a specified node and set the label of the node. * @param node head node that you wish to add the node as a dependent to * @param label label of the node */ public void addDependent(DEPNode node, String label) { node.setHead(this, label); } // ====================================== Booleans ====================================== /** * Check if the node has a head node. * @return {@code true} if this node has the dependency head; otherwise {@code false} if head is {@code null}. */ public boolean hasHead() { return d_head != null; } /** * Check if the node contain another as dependent. * @param node dependent code for check * @return {@code true} if the node has the input DEPNode as a dependent */ public boolean containsDependent(DEPNode node) { return l_dependents.contains(node); } /** * Check if the node has the label for its first dependent. * @param label label of the node for check * @return {@code true} if the node's first dependent has the input label */ public boolean containsDependent(String label) { return getFirstDependentByLabel(label) != null; } public boolean containsDependentPOS(String tag) { return getFirstDependentByPOS(tag) != null; } public boolean containsDependentLemma(String lemma) { return getFirstDependentByLemma(lemma) != null; } /** * Check if the node has the pattern for its first dependent. * @param pattern pattern of the node for check * @return {@code true} if the node's first dependent has the input pattern */ public boolean containsDependent(Pattern pattern) { return getFirstDependentByLabel(pattern) != null; } /** * Check if the node has word-form of as the input string. * @param form word-form for check * @return {@code true} if the node's word-form is equal to the input string */ public boolean isWordForm(String form) { return form.equals(s_wordForm); } /** * Check if the node has simplified word-form as the input string. * @param form simplified word-form for check * @return {@code true} if the node's simplified word-form is equal to the input string */ public boolean isSimplifiedForm(String form) { return form.equals(s_simplifiedWordForm); } /** * Check if the node has word-form lemma as the input string. * @param lemma word-form lemma for check * @return {@code true} if the node's word-form lemma is equal to the input string */ public boolean isLemma(String lemma) { return lemma.equals(s_lemma); } /** * Check if the node has POS tag as the input string. * @param tag POS tag string for check * @return {@code true} if the part-of-speech tag of this node equals to the specific tag */ public boolean isPOSTag(String tag) { return tag.equals(s_posTag); } /** * Check if the node has POS tag as the input pattern. * @param pattern POS tag pattern for check * @return {@code true} if the part-of-speech tag of this node matches the specific pattern */ public boolean isPOSTag(Pattern pattern) { return pattern.matcher(s_posTag).find(); } /** * Check if the node has the name-entity tag as the input string. * @param tag name-entity tag string for check * @return {@code true} if the named entity tag of this node equals to the specific tag */ public boolean isNamedEntityTag(String tag) { return tag.equals(s_namedEntityTag); } /** * Check if the node has the label as the input string. * @param label label string for check * @return {@code true} if the dependency label of this node equals to the specific label */ public boolean isLabel(String label) { return label.equals(s_label); } /** * Check if the node has the label as any label in the input strings array. * @param labels label string array for check * @return {@code true} if the dependency label of this node equals to any of the specific labels */ public boolean isLabelAny(String... labels) { for (String label : labels) { if (label.equals(s_label)) return true; } return false; } /** * Check if the node has the label as the input label pattern. * @param pattern label pattern for check * @return {@code true} if the dependency label of this node matches the specific pattern */ public boolean isLabel(Pattern pattern) { return pattern.matcher(s_label).find(); } /** * Check if the node has the input dependent node. * @param node dependent node for check * @return {@code true} if this node is a dependent of the specific node */ public boolean isDependentOf(DEPNode node) { return d_head == node; } /** * Check if the node has the input dependent node and the input label string. * @param node dependent node for check * @param label label string for check * @return @return {@code true} if the node has the specific dependent node and the specific label string */ public boolean isDependentOf(DEPNode node, String label) { return isDependentOf(node) && isLabel(label); } /** * Check if the node is the descendant of the input head node. * @param label label string for check * @return {@code true} if the node is the dependent of the specific node */ public boolean isDescendantOf(DEPNode node) { DEPNode head = getHead(); while (head != null) { if (head == node) return true; head = head.getHead(); } return false; } /** * Check if the node has the sibling node. * @param node sibling node of the node for check * @return {@code true} if the node has the sibling node */ public boolean isSiblingOf(DEPNode node) { return hasHead() && node.isDependentOf(d_head); } // ====================================== Secondary ====================================== /** * Add a secondary head {@code DEPArc} to the node. * @param arc secondary head arc for add */ public void addSecondaryHead(DEPArc arc) { x_heads.add(arc); } /** * Add a secondary head {@code DEPArc} with specified label to the node. * @param arc secondary head arc for add * @param label label of the secondary head */ public void addSecondaryHead(DEPNode head, String label) { addSecondaryHead(new DEPArc(head, label)); } /** * Get a list of all secondary head arc {@code DEPArc} of the node. * @return list of secondary head arc {@code DEPArc} */ public List<DEPArc> getSecondaryHeadArcList() { return x_heads; } /** * Get a list of all secondary head arc {@code DEPArc} with the specific label of the node. * @param label label string of the secondary head arc * @return list of secondary head arc {@code DEPArc} with specific label */ public List<DEPArc> getSecondaryHeadArcList(String label) { List<DEPArc> list = new ArrayList<>(); for (DEPArc arc : x_heads) { if (arc.isLabel(label)) list.add(arc); } return list; } /** * Set list of secondary head arc {@code DEPArc} as the node's secondary heads * @param arcs list of secondary head arc {@code DEPArc} */ public void setSecondaryHeads(List<DEPArc> arcs) { x_heads = arcs; } // ====================================== Semantics ====================================== /** * Get the PropBank roleset ID of the node(ie. verb.01). * @return the PropBank roleset ID of the node if exists; otherwise, {@code null}. */ public String getRolesetID() { return d_feats.get(DEPLib.FEAT_PB); } /** * Set PropBank roleset ID of the node. * @param rolesetID PropBank roleset ID * @return the PropBank roleset ID of the node */ public String setRolesetID(String rolesetID) { return d_feats.put(DEPLib.FEAT_PB, rolesetID); } /** * Remove the PropBank roleset ID of the node. */ public void clearRolesetID() { d_feats.remove(DEPLib.FEAT_PB); } /** * Check if the node has a semantic head in its extra features. * @return {@code true} if the node has a semantic head in its extra features */ public boolean isSemanticHead() { return d_feats.containsKey(DEPLib.FEAT_PB); } /** * Get a set of semantic head of the node that contains the given label. * @param label label string to search for in all semantic head of the node * @return a set of semantic head of the node that contains the given label */ public Set<DEPNode> getSemanticHeadSet(String label) { Set<DEPNode> set = new HashSet<>(); for (SRLArc arc : s_heads) { if (arc.isLabel(label)) set.add(arc.getNode()); } return set; } /** * Get a set of semantic head of the node that contains the given pattern. * @param pattern label pattern to search for in all semantic head of the node * @return a set of semantic head of the node that contains the given pattern */ public Set<DEPNode> getSemanticHeadSet(Pattern pattern) { Set<DEPNode> set = new HashSet<>(); for (SRLArc arc : s_heads) { if (arc.isLabel(pattern)) set.add(arc.getNode()); } return set; } /** * Get a list of all semantic head arc of the node. * @return a list of all semantic head arc of the node */ public List<SRLArc> getSemanticHeadArcList() { return s_heads; } /** * Get a list of all semantic head arc of the node with the given label. * @param label String label for the semantic head arc search * @return Get a list of all semantic head arc of the node with the given label */ public List<SRLArc> getSemanticHeadArcList(String label) { List<SRLArc> list = new ArrayList<>(); for (SRLArc arc : s_heads) { if (arc.isLabel(label)) list.add(arc); } return list; } /** * Get the semantic arc relationship between the node and another given node. * @param node DEPNode to find the semantic arc relationship with the node * @return semantic arc relationship between the node and another given node */ public SRLArc getSemanticHeadArc(DEPNode node) { for (SRLArc arc : s_heads) { if (arc.isNode(node)) return arc; } return null; } /** * Get the semantic arc relationship between the node and another given node with a given label. * @param node DEPNode to find the semantic arc relationship with the node * @param label String label of the semantic head arc * @return the semantic arc relationship between the node and another given node with a given label */ public SRLArc getSemanticHeadArc(DEPNode node, String label) { for (SRLArc arc : s_heads) { if (arc.equals(node, label)) return arc; } return null; } /** * Get the semantic arc relationship between the node and another given node with a given pattern. * @param node DEPNode to find the semantic arc relationship with the node * @param pattern label pattern of the semantic head arc * @return the semantic arc relationship between the node and another given node with a given pattern */ public SRLArc getSemanticHeadArc(DEPNode node, Pattern pattern) { for (SRLArc arc : s_heads) { if (arc.equals(node, pattern)) return arc; } return null; } /** * Get the semantic label of the node that is related to the given node. * @param node DEPNode that you want to check the semantic label relationship with * @return the semantic label of the given in relation to the node */ public String getSemanticLabel(DEPNode node) { for (SRLArc arc : s_heads) { if (arc.isNode(node)) return arc.getLabel(); } return null; } /** * Get the first node that is found to have the semantic head of the given label from the node. * @param label String label of the semantic head * @return the first node that is found to have the semantic head of the given label from the node */ public DEPNode getFirstSemanticHead(String label) { for (SRLArc arc : s_heads) { if (arc.isLabel(label)) return arc.getNode(); } return null; } /** * Get the first node that is found to have the semantic head of the given pattern from the node. * @param label label pattern of the semantic head * @return the first node that is found to have the semantic head of the given pattern from the node */ public DEPNode getFirstSemanticHead(Pattern pattern) { for (SRLArc arc : s_heads) { if (arc.isLabel(pattern)) return arc.getNode(); } return null; } /** * Add a collection of semantic heads to the node. * @param arcs {@code Collection<SRLArc>} of the semantic heads */ public void addSemanticHeads(Collection<SRLArc> arcs) { s_heads.addAll(arcs); } /** * Add a node a give the given semantic label to the node. * @param head DEPNode the semantic head node for adding * @param label String label of the semantic label */ public void addSemanticHead(DEPNode head, String label) { addSemanticHead(new SRLArc(head, label)); } /** * Add a semantic arc to the node. * @param arc semantic arc for adding */ public void addSemanticHead(SRLArc arc) { s_heads.add(arc); } /** * Set semantic heads of the node. * @param arcs a list of semantic heads */ public void setSemanticHeads(List<SRLArc> arcs) { s_heads = arcs; } /** * Remove all semantic heads of the node in relation to a given node. * @param node DEPNode for semantic head removal in relation to the node * @return {@code true}, else {@code false} if nothing gets removed */ public boolean removeSemanticHead(DEPNode node) { for (SRLArc arc : s_heads) { if (arc.isNode(node)) return s_heads.remove(arc); } return false; } /** * Remove a specific semantic head of the node. * @param arc a semantic head for removal */ public void removeSemanticHead(SRLArc arc) { s_heads.remove(arc); } /** * Remove a collection of specific semantic heads of the node. * @param arcs a collection of semantic heads for removal */ public void removeSemanticHeads(Collection<SRLArc> arcs) { s_heads.removeAll(arcs); } /** * Remove all semantic heads of the node that have the given label. * @param label String label of the semantic head for removal */ public void removeSemanticHeads(String label) { s_heads.removeAll(getSemanticHeadArcList(label)); } /** * Remove all semantic heads of the node. */ public void clearSemanticHeads() { s_heads.clear(); } /** * Check if the node has a semantic role arc with a given node. * @param node DEPNode to check the SRL relation with * @return {@code true}, else {@code false} if there is no SRLArc between the two nodes */ public boolean isArgumentOf(DEPNode node) { return getSemanticHeadArc(node) != null; } /** * Check if the node has a semantic role arc of the label of a given label. * @param label String label of a certain semantic role label * @return {@code true}, else {@code false} if there is no SRLArc with the given label */ public boolean isArgumentOf(String label) { return getFirstSemanticHead(label) != null; } /** * Check if the node has a semantic role arc of the label of a given pattern. * @param pattern label pattern of a certain semantic role label * @return {@code true}, else {@code false} if there is no SRLArc with the given pattern */ public boolean isArgumentOf(Pattern pattern) { return getFirstSemanticHead(pattern) != null; } /** * Check if the node has a semantic role arc with a given node of a given label. * @param node DEPNode to check the SRL relation with * @param label String label of a certain semantic role label * @return {@code true}, else {@code false} if there is no SRLArc with the given label between the two node */ public boolean isArgumentOf(DEPNode node, String label) { return getSemanticHeadArc(node, label) != null; } /** * Check if the node has a semantic role arc with a given node of a given pattern. * @param node DEPNode to check the SRL relation with * @param pattern String label of a certain semantic role label * @return {@code true}, else {@code false} if there is no SRLArc with the given pattern between the two node */ public boolean isArgumentOf(DEPNode node, Pattern pattern) { return getSemanticHeadArc(node, pattern) != null; } // /** // * Get a list of all DEPNode nodes that are potential argument candidate of the node. // * @param depth the depth of how many level (going up) to search for candidates // * @param includeSelf whether to include yourself as a candidate or not // * @return a list of all DEPNode nodes that are potential argument candidate of the node // */ // public Set<DEPNode> getArgumentCandidateSet(int depth, boolean includeSelf) // { // Set<DEPNode> set = new HashSet<>(getDescendantList(depth)); // DEPNode head = getHead(); // // while (head != null) // { // set.add(head); // set.addAll(head.getDependentList()); // head = head.getHead(); // } // // if (includeSelf) set.add (this); // else set.remove(this); // // return set; // } /** * Consider this node as a predicate. * @param maxDepth > 0. * @param maxHeight > 0. * @return list of (argument, lowest common ancestor) pairs. */ public List<Pair<DEPNode,DEPNode>> getArgumentCandidateList(int maxDepth, int maxHeight) { List<Pair<DEPNode,DEPNode>> list = new ArrayList<>(); int i, j, beginIndex, endIndex = 0; DEPNode lca = this, prev; // descendents for (DEPNode node : lca.getDependentList()) list.add(new Pair<>(node, lca)); for (i=1; i<maxDepth; i++) { if (endIndex == list.size()) break; beginIndex = endIndex; endIndex = list.size(); for (j=beginIndex; j<endIndex; j++) { for (DEPNode node : list.get(j).o1.getDependentList()) list.add(new Pair<>(node, lca)); } } // ancestors for (i=0; i<maxHeight; i++) { prev = lca; lca = lca.getHead(); if (lca == null || lca.getID() == DEPLib.ROOT_ID) break; list.add(new Pair<>(lca, lca)); for (DEPNode node : lca.getDependentList()) if (node != prev) list.add(new Pair<>(node, lca)); } return list; } // ====================================== String ====================================== public String toStringPOS() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(s_wordForm); build.add(s_posTag); build.add(d_feats.toString()); return build.toString(); } public String toStringMorph() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(s_wordForm); build.add(s_lemma); build.add(s_posTag); build.add(d_feats.toString()); return build.toString(); } public String toStringDEP() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(Integer.toString(n_id)); build.add(toStringMorph()); build.add(toStringHead()); return build.toString(); } public String toStringNER() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(toStringDEP()); if (s_namedEntityTag != null) build.add(s_namedEntityTag); else build.add(TSVReader.BLANK); return build.toString(); } public String toStringDAG() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(toStringDEP()); build.add(toString(x_heads)); return build.toString(); } public String toStringSRL() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(toStringDEP()); build.add(toString(s_heads)); return build.toString(); } @Override public String toString() { StringJoiner build = new StringJoiner(TSVReader.DELIM_COLUMN); build.add(toStringSRL()); build.add(toString(x_heads)); if (s_namedEntityTag != null) build.add(s_namedEntityTag); else build.add(TSVReader.BLANK); return build.toString(); } public String joinSubNodeFields(FieldType field, String delim) { StringBuilder build = new StringBuilder(); for (DEPNode node : getSubNodeList()) { build.append(delim); build.append(node.getTagFeature(field)); } return build.substring(delim.length()); } private String toStringHead() { StringBuilder build = new StringBuilder(); if (hasHead()) { build.append(d_head.n_id); build.append(TSVReader.DELIM_COLUMN); build.append(s_label); } else { build.append(TSVReader.BLANK); build.append(TSVReader.DELIM_COLUMN); build.append(TSVReader.BLANK); } return build.toString(); } private <T extends AbstractArc<DEPNode>>String toString(List<T> arcs) { if (arcs == null || arcs.isEmpty()) return TSVReader.BLANK; StringBuilder build = new StringBuilder(); Collections.sort(arcs); for (T arc : arcs) { build.append(TSVReader.DELIM_ARCS); build.append(arc.toString()); } return build.substring(TSVReader.DELIM_ARCS.length()); } @Override public int compareTo(DEPNode node) { return n_id - node.n_id; } }