// Stanford Dependencies - Code for producing and using Stanford dependencies.
// Copyright © 2005-2014 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
// parser-support@lists.stanford.edu
// http://nlp.stanford.edu/software/stanford-dependencies.shtml
package edu.stanford.nlp.trees.international.pennchinese;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.international.Language;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import static edu.stanford.nlp.trees.GrammaticalRelation.DEPENDENT;
/**
* ChineseGrammaticalRelations is a
* set of {@link GrammaticalRelation} objects for the Chinese language.
* Examples are from CTB_001.fid
*
* TODO(pliang): need to take some of these relations and move them into a
* Universal Stanford Dependencies class (e.g., dep, arg, mod).
* Currently, we have an external data structure that stores information about
* whether a relation is universal or not, but that should probably be moved
* into GrammaticalRelation.
*
* TODO(pliang): add an option to produce trees which use only the USD
* relations rather than the more specialized Chinese ones.
*
* @author Galen Andrew
* @author Pi-Chuan Chang
* @author Huihsin Tseng
* @author Marie-Catherine de Marneffe
* @author Percy Liang
* @author Peng Qi
* @see edu.stanford.nlp.trees.GrammaticalStructure
* @see GrammaticalRelation
* @see UniversalChineseGrammaticalStructure
*/
public class UniversalChineseGrammaticalRelations {
/** This class is just a holder for static classes
* that act a bit like an enum.
*/
private UniversalChineseGrammaticalRelations() {}
// By setting the HeadFinder to null, we find out right away at
// runtime if we have incorrectly set the HeadFinder for the
// dependency tregexes
private static final TregexPatternCompiler tregexCompiler = new TregexPatternCompiler((HeadFinder) null);
private static final String COMMA_PATTERN = "/^,|,$/";
private static final String MODAL_PATTERN = "/^(可(以|能)?)|能够?|应该?|将要?|必须|会$/";
private static final String LOCATION_NOUNS = "/^((东|西|南|北)(边|侧|部|岸|麓|畔))|附近|近?旁|旁?边$/";
/** Return an unmodifiable list of grammatical relations.
* Note: the list can still be modified by others, so you
* should still get a lock with {@code valuesLock()} before
* iterating over this list.
*
* @return A list of grammatical relations
*/
public static List<GrammaticalRelation> values() {
return Collections.unmodifiableList(values);
}
private static final ReadWriteLock valuesLock = new ReentrantReadWriteLock();
public static Lock valuesLock() {
return valuesLock.readLock();
}
public static GrammaticalRelation valueOf(String s) {
return GrammaticalRelation.valueOf(s, values(), valuesLock());
}
////////////////////////////////////////////////////////////
// ARGUMENT relations
////////////////////////////////////////////////////////////
/**
* The "argument" (arg) grammatical relation (abstract).
* Arguments are required by their heads.
*/
public static final GrammaticalRelation ARGUMENT =
new GrammaticalRelation(Language.UniversalChinese, "arg", "argument", DEPENDENT);
/**
* The "subject" (subj) grammatical relation (abstract).
*/
public static final GrammaticalRelation SUBJECT =
new GrammaticalRelation(Language.UniversalChinese, "subj", "subject", ARGUMENT);
/**
* The "nominal subject" (nsubj) grammatical relation. A nominal subject is
* a subject which is an noun phrase.
* <p>
* <code>
* <pre>
* Input:
* (ROOT
* (IP
* (NP
* (NP (NR 上海) (NR 浦东))
* (NP (NN 开发)
* (CC 与)
* (NN 法制) (NN 建设)))
* (VP (VV 同步))))
* Output:
* nsubj(同步, 建设)
*
* </pre>
* </code>
*/
public static final GrammaticalRelation NOMINAL_SUBJECT =
new GrammaticalRelation(Language.UniversalChinese, "nsubj", "nominal subject",
SUBJECT, "IP|NP", tregexCompiler,
"IP <( ( NP|QP=target!< NT ) $+ ( /^VP|VCD|IP/ !< VE !<VC !<SB !<LB !<:NP !<:PP )) !$- BA",
// Handle the case where the subject and object is separated by a comma
"IP <( ( NP|QP=target!< NT ) $+ (PU (<: " + COMMA_PATTERN + " $+ ( /^VP|VCD|IP/ !< VE !<VC !<SB !<LB !<:NP !<:PP )))) !$- BA",
// Handle the case where the subject and object is separated by a LCP
"IP <( ( NP|QP=target!< NT ) $+ (LCP ($+ ( /^VP|VCD|IP/ !< VE !<VC !<SB !<LB !<:NP !<:PP )))) !$- BA",
// There are a number of cases of NP-SBJ not under IP, and we should try to get some of them as this
// pattern does. There are others under CP, especially CP-CND
"NP !$+ VP < ( ( NP|DP|QP=target !< NT ) $+ ( /^VP|VCD/ !<VE !< VC !<SB !<LB))",
"IP < (/^NP/=target $+ (VP < VC))" // Go over copula
);
/**
* The "nominal passive subject" (nsubjpass) grammatical relation.
* The noun is the subject of a passive sentence.
* The passive marker in Chinese is "被".
* <p>
* <code>
* <pre>
* Input:
* (IP
* (NP (NN 镍))
* (VP (SB 被)
* (VP (VV 称作)
* (NP (PU “)
* (DNP
* (NP
* (ADJP (JJ 现代))
* (NP (NN 工业)))
* (DEG 的))
* (NP (NN 维生素))
* (PU ”)))))
* Output:
* nsubjpass(称作-3, 镍-1)
* </pre>
* </code>
*/
public static final GrammaticalRelation NOMINAL_PASSIVE_SUBJECT =
new GrammaticalRelation(Language.UniversalChinese,
"nsubjpass", "nominal passive subject",
NOMINAL_SUBJECT, "IP", tregexCompiler,
"IP < (NP=target $+ (VP|IP < SB|LB))");
/**
* The "clausal subject" grammatical relation. A clausal subject is
* a subject which is a clause.
* <p /> Examples:
* <code>
* <pre>
* </pre>
* </code>
* <p />
* Note: This one might not exist in Chinese, or very rare.
* cdm 2016: There are a few CP-SBJ in the CTB like this one:
* 我 估计 [CP-SBJ 他 欺负 别人 的 ] 多
* but it doesn't seem like there would be any way to detect them without using -SBJ
*/
public static final GrammaticalRelation CLAUSAL_SUBJECT =
new GrammaticalRelation(Language.UniversalChinese,
"csubj", "clausal subject",
SUBJECT, "IP|VP", tregexCompiler,
"IP|VP < ( /^IP(-SBJ)?/ < NP|QP|LCP $+ VP=target )",
"IP|VP < ( /^IP(-SBJ)?/ < NP|QP|LCP $+ (PU $+ VP=target ))");
/**
* The "complement" (comp) grammatical relation.
*/
public static final GrammaticalRelation COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese, "comp", "complement", ARGUMENT);
/**
* The "object" (obj) grammatical relation.
*/
public static final GrammaticalRelation OBJECT =
new GrammaticalRelation(Language.UniversalChinese, "obj", "object", COMPLEMENT);
/**
* The "direct object" (dobj) grammatical relation.
* <p>
* <code>
* <pre>
* Input:
* (IP
* (NP (NR 上海) (NR 浦东))
* (VP
* (VCD (VV 颁布) (VV 实行))
* (AS 了)
* (QP (CD 七十一)
* (CLP (M 件)))
* (NP (NN 法规性) (NN 文件))))
*
* In recent years Shanghai 's Pudong has promulgated and implemented
* some regulatory documents.
* Output:
* dobj(颁布, 文件)
* </pre>
* </code>
*/
public static final GrammaticalRelation DIRECT_OBJECT =
new GrammaticalRelation(Language.UniversalChinese,
"dobj", "direct object",
OBJECT, "CP|VP", tregexCompiler,
"VP < ( /^V*/ $+ NP|DP=target ) !< VC ",
// 进入/VV 了/AS 夏季/NN
"VP < ( /^V*/ $+ (AS $+ NP|DP=target) ) !< VC ",
" VP < ( /^V*/ $+ NP|DP=target ! $+ NP|DP) !< VC ",
"CP < (IP $++ NP=target ) !<< VC");
/**
* The "indirect object" (iobj) grammatical relation.
*/
public static final GrammaticalRelation INDIRECT_OBJECT =
new GrammaticalRelation(Language.UniversalChinese,
"iobj", "indirect object",
OBJECT, "VP", tregexCompiler,
" CP !> VP < ( VV $+ ( NP|DP|QP|CLP=target . NP|DP ) )");
/**
* The "clausal complement" (ccomp) grammatical relation.
* <p>
* <code>
* <pre>
* Input:
* (IP
* (VP
* (VP
* (ADVP (AD 一))
* (VP (VV 出现)))
* (VP
* (ADVP (AD 就))
* (VP (SB 被)
* (VP (VV 纳入)
* (NP (NN 法制) (NN 轨道)))))))))))
* Output:
* ccomp(出现, 纳入)
* </pre>
* </code>
*/
public static final GrammaticalRelation CLAUSAL_COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese,
"ccomp", "clausal complement",
COMPLEMENT, "VP|ADJP|IP", tregexCompiler,
" VP < (VV|VC|VRD|VCD|VSB|VE $++ IP|VP|VRD|VCD|VSB|CP=target) !< NP|QP|LCP > IP|VP ",
"VP < (VV $+ NP $++ IP=target)");
// " VP|IP < ( VV|VC|VRD|VCD !$+ NP|QP|LCP ) > (IP < IP|VP|VRD|VCD=target) "
// "VP < (S=target < (VP !<, TO|VBG) !$-- NP)",
/**
* The "xclausal complement" (xcomp) grammatical relation.
*/
// pichuan: this is difficult to recognize in Chinese.
// remove the rules since it (always) collides with ccomp
public static final GrammaticalRelation XCLAUSAL_COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese,
"xcomp", "xclausal complement",
COMPLEMENT, "VP", tregexCompiler,
// fixme [pengqi 2016]: this is just a temporary solution to deal with VV $+ VP structures
// that are clearly not aux:modal
"VP < (VV=target $+ VP !< " + MODAL_PATTERN + ")"
);
////////////////////////////////////////////////////////////
// MODIFIER relations
////////////////////////////////////////////////////////////
/**
* The "modifier" (mod) grammatical relation (abstract).
*/
public static final GrammaticalRelation MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "mod", "modifier", DEPENDENT);
/**
* The "number modifier" (nummod) grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (NP
* (NP (NN 拆迁) (NN 工作))
* (QP (CD 若干))
* (NP (NN 规定)))
* Output:
* nummod(规定-48, 若干-47)
* </code>
* </pre>
*/
public static final GrammaticalRelation NUMERIC_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "nummod", "numeric modifier",
MODIFIER,
"QP|NP|DP", tregexCompiler,
"NP|QP < ( QP =target << M $++ NN|NP|QP)",
"NP|QP < ( DNP=target < (QP < CD !< OD) !< JJ|ADJP $++ NP|QP )"
// the following rule is merged into mark:clf
//"DP < ( DT $+ CLP=target )"
);
/**
* The "appositional modifier" (appos) grammatical relation (abstract).
*/
public static final GrammaticalRelation APPOSITIONAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "appos", "appositional modifier", MODIFIER,
"NP", tregexCompiler,
"NP < (/^NP(-APP)?$/=target !<<- " + LOCATION_NOUNS + " !< NT !<: NR $+ (NP <: NR !$+ __))");
public static final GrammaticalRelation PARATAXIS =
new GrammaticalRelation(Language.UniversalChinese, "parataxis", "parataxis", DEPENDENT);
/**
* The "parenthetical modifier" (prnmod) grammatical relation (Chinese-specific).
*/
public static final GrammaticalRelation PARENTHETICAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "parataxis:prnmod", "parenthetical modifier",
PARATAXIS, "NP", tregexCompiler,
"NP < PRN=target ");
/**
* The "noun modifier" grammatical relation.
*/
public static final GrammaticalRelation NOUN_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "nmod", "noun modifier", MODIFIER,
"NP", tregexCompiler,
"NP < (NP=target < NR !$+ PU|CC|NP|NN $++ NP|PRN)",
"NP < (NP=target $+ (NP <: NR)) [$- P|LC | $+ P|LC]",
"NP|QP < ( DNP =target < (NP < NT) $++ NP|QP )",
"NP|QP < ( DNP =target < LCP|PP $++ NP|QP )");
/**
* The "range" grammatical relation (Chinese only). The indirect
* object of a VP is the quantifier phrase which is the (dative) object
* of the verb.<p>
* <p>
* <code>
* <pre>
* Input:
* (VP (VV 成交)
* (NP (NN 药品))
* (QP (CD 一亿多)
* (CLP (M 元))))
* Output:
* range(成交, 元)
* </pre>
* </code>
*/
public static final GrammaticalRelation RANGE =
new GrammaticalRelation(Language.UniversalChinese,
"nmod:range", "range",
NOUN_MODIFIER, "VP", tregexCompiler,
"VP < ( NP|DP|QP $+ DP|QP=target)",
"VP < ( VV $+ QP=target )");
public static final GrammaticalRelation POSSESSIVE_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "nmod:poss", "possessive modifier", NOUN_MODIFIER,
"NP", tregexCompiler,
"NP < (PN=target $+ NN)");
/**
* The "temporal modifier" grammatical relation.
* (IP
* (VP
* (NP (NT 以前))
* (ADVP (AD 不))
* (ADVP (AD 曾))
* (VP (VV 遇到) (AS 过))))
*(VP
* (LCP
* (NP (NT 近年))
* (LC 来))
* (VP
* (VCD (VV 颁布) (VV 实行))
* {@code tmod } (遇到, 以前)
*/
public static final GrammaticalRelation TEMPORAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"nmod:tmod", "temporal modifier",
NOUN_MODIFIER, "VP|IP", tregexCompiler,
"VP|IP < (NP=target < NT $++ VP)");
/* This rule actually matches nothing.
There's another tmod rule. This is removed for now.
(pichuan) Sun Mar 8 18:22:40 2009
*/
/*
public static final GrammaticalRelation TEMPORAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"tmod", "temporal modifier",
MODIFIER, "VP|IP|ADJP", tregexCompiler,
new String[]{
" VC|VE ! >> VP|ADJP < NP=target < NT",
"VC|VE !>>IP <( NP=target < NT $++ VP !< VC|VE )"
});
*/
public static final GrammaticalRelation CLAUSAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "acl", "clausal modifier of noun",
MODIFIER, "NP", tregexCompiler,
//"NP $++ (CP=target << VV) > NP ",
"NP < ( CP=target $++ NP << VV)",
"NP < IP=target ");
/**
* The "relative clause modifier" (relcl) grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (NP-PRD (CP (WHNP-3 (-NONE- *OP*))
* (CP (IP (NP-SBJ (-NONE- *pro*))
* (VP (NP-TMP (NT 以前))
* (ADVP (AD 不))
* (ADVP (AD 曾))
* (VP (VV 遇到)
* (AS 过)
* (NP-OBJ (-NONE- *T*-3)))))
* (DEC 的)))
* (NP (NP (ADJP (JJ 新))
* (NP (NN 情况)))
* (PU 、)
* (NP (ADJP (JJ 新))
* (NP (NN 问题)))))
* The new problem that has not been encountered.
* Output:
* relcl(问题, 遇到)
* </code>
* </pre>
*/
/* merged into acl
public static final GrammaticalRelation RELATIVE_CLAUSE_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "acl", "adjectival clause modifier",
CLAUSAL_MODIFIER, "NP", tregexCompiler,
"NP $++ (CP=target << VV) > NP ",
"NP < ( CP=target $++ NP << VV)",
"NP < IP=target ");
*/
/*
* The "non-finite clause" grammatical relation.
* This used to be verb modifier (vmod).
*/
/* merged into acl
public static final GrammaticalRelation NONFINITE_CLAUSE_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"acl:nfincl", "non-finite clause modifier (examples: stores[head] based[modifier] in Boston",
CLAUSAL_MODIFIER, "NP", tregexCompiler,
"NP < IP=target ");
*/
/**
* The "adjective modifier" (amod) grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (NP
* (ADJP (JJ 新))
* (NP (NN 情况)))
* Output:
* amod(情况-34, 新-33)
* </code>
* </pre>
*/
public static final GrammaticalRelation ADJECTIVAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"amod", "adjectival modifier",
MODIFIER, "NP|CLP|QP", tregexCompiler,
"NP|CLP|QP < (ADJP=target $++ NP|CLP|QP ) ",
"NP $++ (CP=target << VA !<< VV) > NP ",
"NP < ( CP=target $++ NP << VA !<< VV)",
"NP|QP < ( DNP=target < JJ|ADJP !< NP|QP $++ NP|QP )");
/**
* The "ordinal modifier" (ordmod) grammatical relation.
*/
public static final GrammaticalRelation ORDINAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "amod:ordmod", "ordinal numeric modifier",
ADJECTIVAL_MODIFIER,
"NP|QP", tregexCompiler,
"NP < (QP=target < OD !< CLP)",
"NP|QP < ( DNP=target < (QP < OD !< CD) !< JJ|ADJP $++ NP|QP )"
// the following rule is merged into mark:clf
//"QP < (OD=target $+ CLP)"
);
/**
* The "determiner modifier" (det) grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (NP (DP (DT 这些))
* (NP (NN 经济) (NN 活动)))
* Output:
* det(活动-61, 这些-59)
* </code>
* </pre>
*/
public static final GrammaticalRelation DETERMINER =
new GrammaticalRelation(Language.UniversalChinese, "det", "determiner",
MODIFIER, "^NP|DP", tregexCompiler,
"/^NP/ < (DP=target $++ NP )"
//"DP < DT < QP=target"
);
/**
* The "negative modifier" (neg) grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (VP
* (NP (NT 以前))
* (ADVP (AD 不))
* (ADVP (AD 曾))
* (VP (VV 遇到) (AS 过))))
* Output:
* neg(遇到-30, 不-28)
* </code>
* </pre>
*/
public static final GrammaticalRelation NEGATION_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"neg", "negation modifier",
MODIFIER, "VP|ADJP|IP", tregexCompiler,
"VP|ADJP|IP < (AD|VV=target < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/)",
"VP|ADJP|IP < (ADVP|VV=target < (AD < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/))");
/**
* The "adverbial modifier" (advmod) grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (VP
* (ADVP (AD 基本))
* (VP (VV 做到) (AS 了)
* Output:
* advmod(做到-74, 基本-73)
* </code>
* </pre>
*/
public static final GrammaticalRelation ADVERBIAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"advmod", "adverbial modifier",
MODIFIER,
"VP|ADJP|IP|CP|PP|NP|QP", tregexCompiler,
"VP|ADJP|IP|CP|PP|NP < (ADVP=target !< (AD < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/))",
"VP|ADJP < AD|CS=target",
"QP < (ADVP=target $+ QP)",
"QP < ( QP $+ ADVP=target)");
public static final GrammaticalRelation ADV_CLAUSAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"advcl", "clausal adverb", ADVERBIAL_MODIFIER);
/**
* The "dvp modifier" grammatical relation.
* <p>
* <code>
* <pre>
* Input:
* (VP (DVP
* (VP (VA 简单))
* (DEV 的))
* (VP (VV 采取) ...))
* Output:
* dvpmod(采取-9, 简单-7)
* </pre>
* </code>
*/
public static final GrammaticalRelation DVPM_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "advmod:dvp", "dvp modifier",
ADVERBIAL_MODIFIER, "VP", tregexCompiler,
" VP < ( DVP=target $+ VP) ");
////////////////////////////////////////////////////////////
// Special clausal dependents
////////////////////////////////////////////////////////////
/**
* The "auxiliary" (aux) grammatical relation.
*/
public static final GrammaticalRelation AUX_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "aux", "auxiliary (example: should[modifier] leave[head])",
DEPENDENT, "VP", tregexCompiler
);
/**
* The "modal" grammatical relation.
* (IP
* (NP (NN 利益))
* (VP (VV 能)
* (VP (VV 得到)
* (NP (NN 保障)))))))))
* <code> mmod </code> (得到-64, 能-63)
*/
public static final GrammaticalRelation MODAL_VERB =
new GrammaticalRelation(Language.UniversalChinese, "aux:modal", "modal verb",
AUX_MODIFIER, "VP", tregexCompiler,
// todo [pengqi]: using MODAL_PATTERN would render many cases of VV $+ VP
// as dep, need to assign a type to that structure. Also in that case
// need to clarify which verb is the head
"VP < ( VV=target < " + MODAL_PATTERN + " !< /^没有$/ $+ VP|VRD )");
/**
* The "aspect marker" grammatical relation.
* (VP
* (ADVP (AD 基本))
* (VP (VV 做到) (AS 了)
* <code> asp </code> (做到,了)
*/
public static final GrammaticalRelation ASPECT_MARKER =
new GrammaticalRelation(Language.UniversalChinese, "aux:asp", "aspect",
AUX_MODIFIER, "VP", tregexCompiler,
"VP < ( /^V*/ $+ AS=target)");
/**
* The "auxiliary passive" (auxpass) grammatical relation.
*/
public static final GrammaticalRelation AUX_PASSIVE_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "auxpass", "auxiliary passive",
MODIFIER, "VP", tregexCompiler,
"VP < SB|LB=target");
/**
* The "copula" grammatical relation.
* <p>
* <pre>
* <code>
* Input:
* (IP (NP (NR 浦东))
* (VP (VC 是)
* (NP (NN 工程)))))
* Output (formerly reverse(attr)):
* cop(工程,是)
* </code>
* </pre>
*/
public static final GrammaticalRelation COPULA =
new GrammaticalRelation(Language.UniversalChinese, "cop", "copula",
DEPENDENT, "VP", tregexCompiler,
" VP < VC=target");
/**
* The "marker" (mark) grammatical relation. A marker is the word
* introducing a finite clause subordinate to another clause.
* <p>
* <pre>
* <code>
* Input:
* (PP (P 因为)
* (IP
* (VP
* (VP
* (ADVP (AD 一))
* (VP (VV 开始)))
* (VP
* (ADVP (AD 就))
* (ADVP (AD 比较))
* (VP (VA 规范))))))
* Output (formerly reverse(pccomp)):
* mark(开始-20,因为-18)
*
* Input:
* (LCP (IP (NP-SBJ (-NONE- *pro*))
* (VP (VV 积累) (AS 了) (NP-OBJ (NN 经验)))) (LC 以后))
* Output (formerly reverse(lccomp)):
* mark(积累, 以后)
*
* Input:
* (CP
* (IP
* (VP
* (VP (VV 振兴)
* (NP (NR 上海)))
* (PU ,)
* (VP (VV 建设)
* (NP
* (NP (NN 现代化))
* (NP (NN 经济) (PU 、) (NN 贸易) (PU 、) (NN 金融))
* (NP (NN 中心))))))
* (DEC 的))
* Output (formerly cpm):
* mark(振兴, 的)
*
* Input:
* (DVP
* (VP (VA 简单))
* (DEV 的))
* Output (formerly dvpm):
* mark(简单-7, 的-8)
* </code>
* </pre>
*/
public static final GrammaticalRelation MARK =
new GrammaticalRelation(Language.UniversalChinese, "mark",
"marker (examples: that[modifier] expanded[head]; 开发/expand[head] 浦东/Pudong 的[modifier])",
DEPENDENT, "^PP|^LCP|^CP|^DVP", tregexCompiler,
"/^PP/ < (P=target $+ VP)",
"/^LCP/ < (P=target $+ VP)",
"/^CP/ < (__ $++ DEC=target)",
"DVP < (__ $+ DEV=target)");
/**
* The "punctuation" grammatical relation. This is used for any piece of
* punctuation in a clause, if punctuation is being retained in the
* typed dependencies.
*/
public static final GrammaticalRelation PUNCTUATION =
new GrammaticalRelation(Language.UniversalChinese, "punct", "punctuation",
DEPENDENT, ".*", tregexCompiler,
"__ < PU=target");
////////////////////////////////////////////////////////////
// Other (compounding, coordination)
////////////////////////////////////////////////////////////
/**
* The "compound" grammatical relation (abstract).
*/
public static final GrammaticalRelation COMPOUND =
new GrammaticalRelation(Language.UniversalChinese, "compound", "compound (examples: phone book, three thousand)", ARGUMENT);
/**
* The "noun compound" (nn) grammatical relation.
* Example:
* (ROOT
* (IP
* (NP
* (NP (NR 上海) (NR 浦东))
* (NP (NN 开发)
* (CC 与)
* (NN 法制) (NN 建设)))
* (VP (VV 同步))))
* <code> compound:nn </code> (浦东, 上海)
*/
public static final GrammaticalRelation NOUN_COMPOUND =
new GrammaticalRelation(Language.UniversalChinese,
"compound:nn", "noun compound",
COMPOUND, "^NP", tregexCompiler,
"NP < (NN|NR|NT=target [$+ NN|NT $- NN|NP | $+ (NN|NT $+ NN|NP|NR)])",
"NP < (NN|NR|NT=target !$+ PU|CC|DNP $++ NN|NT)",
"NP < (NN|NR|NT $+ FW=target)",
"NP < (NP=target !< NR !$+ PU|CC|DNP $++ (NP|PRN !< NR|QP))",
// the following rule captures some exceptions from nmod:assmod
"NP < (NP=target < NR $+ (NP [<<# NR | $+ NR|NN | $+ (__ <<# NR) | $+ /^[^N]/]))",
"NP < (NP=target < NN !< NR $+ (NP < NN|NT))");
/**
* The "name" grammatical relation.
*/
public static final GrammaticalRelation NAME =
new GrammaticalRelation(Language.UniversalChinese,
"name", "name",
COMPOUND, "^NP", tregexCompiler,
"NP < (NR=target $+ NR)");
/**
* The "coordinated verb compound" grammatical relation.
* (VCD (VV 颁布) (VV 实行))
* comod(颁布-5, 实行-6)
*/
public static final GrammaticalRelation VERB_COMPOUND =
new GrammaticalRelation(Language.UniversalChinese, "compound:vc", "coordinated verb compound",
COMPOUND, "VCD|VSB", tregexCompiler,
"VCD < ( VV|VA $+ VV|VA=target)",
"VSB < ( VV|VA=target $+ VV|VA)");
/**
* The "conjunct" (conj) grammatical relation.
* <p>
* <code>
* <pre>
* Input:
* (ROOT
* (IP
* (NP
* (NP (NR 上海) (NR 浦东))
* (NP (NN 开发)
* (CC 与)
* (NN 法制) (NN 建设)))
* (VP (VV 同步))))
*
* The development of Shanghai 's Pudong is in step with the establishment
* of its legal system.
* Output:
* conj(建设, 开发) [should be reversed]
* </pre>
* </code>
*
* TODO(pliang): make first item the head and the subsequent ones modifiers.
*/
public static final GrammaticalRelation CONJUNCT =
new GrammaticalRelation(Language.UniversalChinese,
"conj", "conjunct",
DEPENDENT, "FRAG|INC|IP|VP|NP|ADJP|PP|ADVP|UCP", tregexCompiler,
"NP|ADJP|PP|ADVP|UCP < (!PU|CC=target $+ CC)",
// Split the first rule to the second rule to avoid the duplication:
// ccomp(前来-12, 投资-13)
// conj(前来-12, 投资-13)
//
// (IP
// (VP
// (VP (VV 前来))
// (VP
// (VCD (VV 投资) (VV 办厂)))
// (CC 和)
// (VP (VV 洽谈)
// (NP (NN 生意))))))
"VP < (!PU|CC=target !$- VP $+ CC)",
// TODO: this following line has to be fixed.
// I think for now it just doesn't match anything.
"VP|NP|ADJP|PP|ADVP|UCP < ( __=target $+ PU $+ CC)",
//"VP|NP|ADJP|PP|ADVP|UCP < ( __=target $+ (PU < 、) )",
// Consider changing the rule ABOVE to these rules.
"VP < ( /^V/=target $+ ((PU < 、) $+ /^V/))",
"NP < ( /^N/=target $+ ((PU < 、) $+ /^N/))",
"ADJP < ( JJ|ADJP=target $+ ((PU < 、) $+ JJ|ADJP))",
"PP < ( /^P/=target $+ ((PU < 、) $+ /^P/))",
//"ADVP < ( /^AD/=target $+ ((PU < 、) $+ /^AD/))",
"ADVP < ( /^AD/ $+ ((PU < 、) $+ /^AD/=target))",
"UCP < ( !PU|CC=target $+ (PU < 、) )",
// This is for the 'conj's separated by commas.
// For now this creates too much duplicates with 'ccomp'.
// Need to look at more examples.
"PP < (PP $+ PP=target )",
"NP <( NP=target $+ ((PU < 、) $+ NP) )",
"NP <( NN|NR|NT|PN=target $+ ((PU < ,|、) $+ NN|NR|NT|PN) )",
"VP < (CC $+ VV=target)",
// Original version of this did not have the outer layer of
// the FRAG|INC|IP|VP. This caused a bug where the basic
// dependencies could have cycles.
"FRAG|INC|IP|VP < (VP < VV|VC|VRD|VCD|VE|VA < NP|QP|LCP $ IP|VP|VRD|VCD|VE|VC|VA=target) ",
// splitting the following into two rules for accuracy
// "IP|VP < ( IP|VP < NP|QP|LCP $ IP|VP=target )",
"IP|VP < ( IP < NP|QP|LCP $ IP=target )",
"IP|VP < ( VP $ VP=target )");
/**
* The "coordination" grammatical relation.
* A coordination is the relation between
* an element and a conjunction.<p>
* <code>
* <pre>
* Input:
* (ROOT
* (IP
* (NP
* (NP (NR 上海) (NR 浦东))
* (NP (NN 开发)
* (CC 与)
* (NN 法制) (NN 建设)))
* (VP (VV 同步))))
* Output:
* cc(建设, 与) [should be cc(开发, 与)]
* </pre>
* </code>
* TODO(pliang): by convention, the first item in the coordination should be
* chosen, but currently, it's the head, which happens to be the last.
*/
public static final GrammaticalRelation COORDINATION =
new GrammaticalRelation(Language.UniversalChinese,
"cc", "coordination", DEPENDENT,
"VP|NP|ADJP|PP|ADVP|UCP|IP|QP", tregexCompiler,
"VP|NP|ADJP|PP|ADVP|UCP|IP|QP < (CC=target)");
/**
* The "case" grammatical relation.
* This covers prepositions, localizers, and associative markers.
* <p>
* <pre>
* <code>
* Input:
* (PP (P 根据)
* (NP
* (DNP
* (NP
* (NP (NN 国家))
* (CC 和)
* (NP (NR 上海市)))
* (DEG 的))
* (ADJP (JJ 有关))
* (NP (NN 规定))))
* Output (formerly reverse(pobj)):
* case(规定-19, 根据-13)
*
* Input:
* (LCP
* (NP (NT 近年))
* (LC 来))
* Output (formerly reverse(lobj)):
* case(近年-3, 来-4)
*
* Input:
* (NP (DNP
* (NP (NP (NR 浦东))
* (NP (NN 开发)))
* (DEG 的))
* (ADJP (JJ 有序))
* (NP (NN 进行)))
* Output (formerly reverse(assm)):
* case(开发-31, 的-32)
*
* Input:
* (PP (P 在)
* (LCP
* (NP
* (DP (DT 这)
* (CLP (M 片)))
* (NP (NN 热土)))
* (LC 上)))
* Output (formerly reverse(plmod)):
* case(热土, 在)
* </code>
* </pre>
*/
public static final GrammaticalRelation CASE =
new GrammaticalRelation(Language.UniversalChinese, "case",
"case marking (examples: Chair[head] 's[modifier], 根据/according[modifier] ... 规定/rule[head]; 近年/this year[head] 来[modifier])",
DEPENDENT, "^PP|^LCP|^DNP", tregexCompiler,
"/^PP/ < P=target",
"/^LCP/ < LC=target",
"/^DNP/ < DEG=target",
"PP < ( P=target $++ LCP )");
/**
* The "associative modifier" (nmod:assmod) grammatical relation (Chinese-specific).
* See "case" for example.
*/
public static final GrammaticalRelation ASSOCIATIVE_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"nmod:assmod", "associative modifier (examples: 上海市/Shanghai[modifier] 的 规定/law[head])",
NOUN_MODIFIER, "NP|QP|DNP", tregexCompiler,
"NP|QP < ( DNP =target < (NP !< NT) $++ NP|QP ) ",
"NP|DNP < (/^NP(-PN)?$/=target < NR $+ (NP !<<# NR !$+ NR|NN !$+ (__ <<# NR) !$+ /^[^N]/) !$- NP|NN)",
// the following rule is merged into compound:nn
//"NP < (NR=target $+ NN)",
"NP < (NP=target !< NR !$+ PU|CC $++ (NP|PRN < QP))");
/**
* The "nominal topic" (nmod:topic) grammatical relation (Chinese-specific).
* Example:
* <code>
* Input:
* (IP (NP-TPC (NP-APP (ADJP (JJ 现任))
* (NP (NN 总统)))
* (NP-PN (NR 米洛舍维奇)))
* (NP-TMP (NT 2001年))
* (NP-SBJ (NN 总统)
* (NN 任期))
* (VP (VV 到期)))
* Output:
* nmod:topic(到期, 米洛舍维奇)
* </code>
*/
public static final GrammaticalRelation NOMINAL_TOPIC_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"nmod:topic", "nominal topic",
NOUN_MODIFIER, "IP", tregexCompiler,
"IP < (/^NP.*(-TPC)?/=target $++ (NP $+ VP) !< NT)");
////////////////////////////////////////////////////////////
// Other stuff: pliang: not sure exactly where they should go.
////////////////////////////////////////////////////////////
/*
* The "prepositional localizer modifier" grammatical relation.
* (PP (P 在)
* (LCP
* (NP
* (DP (DT 这)
* (CLP (M 片)))
* (NP (NN 热土)))
* (LC 上)))
* plmod(在-25, 上-29)
*/
/*
* pengqi Jul 2016: This shouldn't exist in UD and is replaced by case
*
public static final GrammaticalRelation PREPOSITIONAL_LOCALIZER_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"plmod", "prepositional localizer modifier",
MODIFIER, "PP", tregexCompiler,
"PP < ( P $++ LCP=target )");
*/
/**
* The "adjectival complement" grammatical relation.
* Example:
*/
// deleted by pichuan: no real matches
/*
public static final GrammaticalRelation ADJECTIVAL_COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese,
"acomp", "adjectival complement",
COMPLEMENT, "VP", tregexCompiler,
new String[]{
"VP < (ADJP=target !$-- NP)"
});
*/
/**
* The "localizer complement" grammatical relation.
* (VP (VV 占)
* (LCP
* (QP (CD 九成))
* (LC 以上)))
* (PU ,)
* (vp (VV 达)
* (QP (CD 四百三十八点八亿)
* (CLP (M 美元))))
* <code> loc </code> (占-11, 以上-13)
*/
public static final GrammaticalRelation LOCALIZER_COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese,
"advmod:loc", "localizer complement",
ADVERBIAL_MODIFIER, "VP|IP", tregexCompiler,
"VP|IP < (LCP=target !< IP) ");
public static final GrammaticalRelation CLAUSAL_LOCALIZER_COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese,
"advcl:loc", "localizer complement",
ADV_CLAUSAL_MODIFIER, "VP|IP", tregexCompiler,
"VP|IP < (LCP=target < IP) ");
/**
* The "resultative complement" grammatical relation.
*/
public static final GrammaticalRelation RESULTATIVE_COMPLEMENT =
new GrammaticalRelation(Language.UniversalChinese,
"advmod:rcomp", "result verb",
ADVERBIAL_MODIFIER, "VRD", tregexCompiler,
"VRD < ( /V*/ $+ /V*/=target )");
/**
* The "ba" grammatical relation.
*/
public static final GrammaticalRelation BA =
new GrammaticalRelation(Language.UniversalChinese, "aux:ba", "ba",
AUX_MODIFIER, "VP|IP", tregexCompiler,
"VP|IP < BA=target ");
/**
* The "classifier marker" grammatical relation.
* <p>
* <code>
* <pre>
* Input:
* ((QP (CD 七十一)
* (CLP (M 件)))
* (NP (NN 法规性) (NN 文件)))
* Output:
* mark:clf(七十一, 件)
* </pre>
* </code>
*/
public static final GrammaticalRelation CLASSIFIER_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"mark:clf", "classifier marker",
MARK, "QP|DP", tregexCompiler,
"QP < M=target",
"QP < CLP=target",
"DP < ( DT $+ CLP=target )");
/**
* The "possession modifier" grammatical relation.
*/
// Fri Feb 20 15:40:13 2009 (pichuan)
// I think this "poss" relation is just WRONG.
// DEC is a complementizer or a nominalizer,
// this rule probably originally want to capture "DEG".
// But it seems like it's covered by "assm" (associative marker).
/*
public static final GrammaticalRelation POSSESSION_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"poss", "possession modifier",
MODIFIER, "NP", tregexCompiler,
new String[]{
"NP < ( PN=target $+ DEC $+ NP )"
});
*/
/**
* The "possessive marker" grammatical relation.
*/
// Similar to the comments to "poss",
// I think this relation is wrong and will not appear.
/*
public static final GrammaticalRelation POSSESSIVE_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese, "possm", "possessive marker",
MODIFIER, "NP", tregexCompiler,
new String[]{
"NP < ( PN $+ DEC=target ) "
});
*/
/**
* The "prepositional modifier" grammatical relation.
*(IP
* (PP (P 对)
* (NP (PN 此)))
* (PU ,)
* (NP (NR 浦东))
* (VP
* (VP
* (ADVP (AD 不))
* (VP (VC 是)
* (VP
* (DVP
* (VP (VA 简单))
* (DEV 的))
* (VP (VV 采取)
* <code> nmod </code> (采取-9, 此-1)
*/
public static final GrammaticalRelation PREPOSITIONAL_MODIFIER =
new GrammaticalRelation(Language.UniversalChinese,
"nmod:prep", "prepositional modifier",
NOUN_MODIFIER, "^NP|VP|IP", tregexCompiler,
"/^NP/ < /^PP/=target",
"VP < /^PP/=target",
"IP < /^PP/=target ");
/**
* The "participial modifier" (prtmod) grammatical relation.
*/
public static final GrammaticalRelation PART_VERB =
new GrammaticalRelation(Language.UniversalChinese,
"aux:prtmod", "particle verb",
AUX_MODIFIER, "VP|IP", tregexCompiler,
"VP|IP < ( MSP=target )");
/**
* The "etc" grammatical relation.
* (NP
* (NP (NN 经济) (PU 、) (NN 贸易) (PU 、) (NN 建设) (PU 、) (NN 规划) (PU 、) (NN 科技) (PU 、) (NN 文教) (ETC 等))
* (NP (NN 领域)))
* <code> etc </code> (办法-70, 等-71)
*/
public static final GrammaticalRelation ETC =
new GrammaticalRelation(Language.UniversalChinese, "etc", "ETC",
MODIFIER, "^NP", tregexCompiler,
"/^NP/ < (NN|NR . ETC=target)");
/**
* The "xsubj" grammatical relation, replaced with "nsubj:xsubj".
*(IP
* (NP (PN 有些))
* (VP
* (VP
* (ADVP (AD 还))
* (ADVP (AD 只))
* (VP (VC 是)
* (NP
* (ADJP (JJ 暂行))
* (NP (NN 规定)))))
* (PU ,)
* (VP (VV 有待)
* (IP
* (VP
* (PP (P 在)
* (LCP
* (NP (NN 实践))
* (LC 中)))
* (ADVP (AD 逐步))
* (VP (VV 完善))))))))))
* <code> nsubj </code> (完善-26, 规定-14)
*/
public static final GrammaticalRelation CONTROLLED_SUBJECT =
new GrammaticalRelation(Language.UniversalChinese,
"nsubj:xsubj", "controlled subject",
NOMINAL_SUBJECT, "VP", tregexCompiler,
"VP !< NP < VP > (IP !$- NP !< NP !>> (VP < VC ) >+(VP) (VP $-- NP=target))");
/**
* The "discourse" (discourse) grammatical relation.
*/
public static final GrammaticalRelation DISCOURSE =
new GrammaticalRelation(Language.UniversalChinese, "discourse", "discourse", ARGUMENT,
"CP", tregexCompiler,
"CP < SP=target");
// Universal GrammaticalRelations
private static final GrammaticalRelation chineseOnly = null; // Place-holder: put this after a relation to mark it as Chinese-only
private static final GrammaticalRelation[] rawValues = {
DEPENDENT,
ARGUMENT,
SUBJECT,
NOMINAL_SUBJECT,
NOMINAL_PASSIVE_SUBJECT,
CLAUSAL_SUBJECT,
//CLAUSAL_PASSIVE_SUBJECT, // Exists in Chinese?
COMPLEMENT,
OBJECT,
DIRECT_OBJECT,
INDIRECT_OBJECT,
CLAUSAL_COMPLEMENT,
XCLAUSAL_COMPLEMENT, // Exists in Chinese?
MODIFIER,
// Nominal heads, nominal dependents
NUMERIC_MODIFIER,
ORDINAL_MODIFIER, chineseOnly,
APPOSITIONAL_MODIFIER,
PARENTHETICAL_MODIFIER, chineseOnly,
NOUN_MODIFIER,
RANGE, chineseOnly,
ASSOCIATIVE_MODIFIER, chineseOnly,
TEMPORAL_MODIFIER, chineseOnly,
POSSESSIVE_MODIFIER,
NOMINAL_TOPIC_MODIFIER, chineseOnly,
// Nominal heads, predicate dependents
//NOMINALIZED_CLAUSE_MODIFIER, // Exists in Chinese?
ADJECTIVAL_MODIFIER,
DETERMINER,
NEGATION_MODIFIER,
CLAUSAL_MODIFIER,
// Predicate heads
ADVERBIAL_MODIFIER,
DVPM_MODIFIER, chineseOnly,
ADV_CLAUSAL_MODIFIER,
CLAUSAL_LOCALIZER_COMPLEMENT, chineseOnly,
// Special clausal dependents
//VOCATIVE,
//DISCOURSE,
//EXPL,
AUX_MODIFIER,
MODAL_VERB, chineseOnly,
ASPECT_MARKER, chineseOnly,
AUX_PASSIVE_MODIFIER,
COPULA,
MARK,
CLASSIFIER_MODIFIER, chineseOnly,
PUNCTUATION,
// Other
COMPOUND,
NOUN_COMPOUND, chineseOnly,
VERB_COMPOUND, chineseOnly,
NAME,
CONJUNCT,
COORDINATION,
CASE,
DISCOURSE,
// Don't know what to do about these
//PREPOSITIONAL_LOCALIZER_MODIFIER, chineseOnly,
LOCALIZER_COMPLEMENT, chineseOnly,
RESULTATIVE_COMPLEMENT, chineseOnly,
BA, chineseOnly,
PREPOSITIONAL_MODIFIER, chineseOnly,
PART_VERB, chineseOnly,
ETC, chineseOnly,
CONTROLLED_SUBJECT, chineseOnly,
};
private static final List<GrammaticalRelation> values = new ArrayList<>();
// Cache frequently used views of the values list
private static final List<GrammaticalRelation> synchronizedValues =
Collections.synchronizedList(values);
public static final Set<GrammaticalRelation> universalValues = new HashSet<>();
// Map from GrammaticalRelation short names to their corresponding
// GrammaticalRelation objects
public static final Map<String, GrammaticalRelation> shortNameToGRel = new ConcurrentHashMap<>();
static {
for (int i = 0; i < rawValues.length; i++) {
GrammaticalRelation gr = rawValues[i];
if (gr == chineseOnly) continue;
synchronizedValues.add(gr);
if (i + 1 == rawValues.length || rawValues[i + 1] != chineseOnly) {
universalValues.add(gr);
}
}
valuesLock().lock();
try {
for (GrammaticalRelation gr : UniversalChineseGrammaticalRelations.values()) {
shortNameToGRel.put(gr.getShortName(), gr);
}
} finally {
valuesLock().unlock();
}
}
/**
* Prints out the Chinese grammatical relations hierarchy.
*
* @param args Args are ignored.
*/
public static void main(String[] args) {
System.out.println(DEPENDENT.toPrettyString());
}
}