package ruc.irm.similarity.word.hownet2.concept; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.StringTokenizer; import ruc.irm.similarity.word.hownet.HownetMeta; /** * 知网的概念表示类 <br/>example和英文部分对于相似度的计算不起作用,考虑到内存开销, 在概念的表示中去掉了这部分数据的对应定义 * * @author <a href="mailto:iamxiatian@gmail.com">夏天</a> * @organization 中国人民大学信息资源管理学院 知识工程实验室 */ public class Concept implements HownetMeta { /** 中文概念名称 */ protected String word; /** 词性: Part of Speech */ protected String pos; /** 定义 */ protected String define; /** 是否是实词,false表示为虚词, 一般为实词 */ protected boolean bSubstantive; /** 第一基本义原 */ protected String mainSememe; /** 其他基本义原 */ protected String[] secondSememes; /** 关系义元原 */ protected String[] relationSememes; /** 关系符号描述 */ protected String[] symbolSememes; static String[][] Concept_Type = { { "=", "事件" }, { "aValue|属性值", "属性值" }, { "qValue|数量值", "数量值" }, { "attribute|属性", "属性" }, { "quantity|数量", "数量" }, { "unit|", "单位" }, { "%", "部件" } }; public Concept(String word, String pos, String def) { this.word = word; this.pos = pos; this.define = (def == null) ? "" : def.trim(); // 虚词用{***}表示 if (define.length() > 0 && define.charAt(0) == '{' && define.charAt(define.length() - 1) == '}'){ this.bSubstantive = false; } else { this.bSubstantive = true; } parseDefine(); } /** * 处理定义,把定义分为第一基本义元、其他基本义元、关系义元和符号义元四类 */ private void parseDefine() { List<String> secondList = new ArrayList<String>(); //其他基本义原 List<String> relationList = new ArrayList<String>(); //关系义原 List<String> symbolList = new ArrayList<String>(); //符号义原 String tokenString = this.define; //如果不是实词,则处理“{}”中的内容 if (!this.bSubstantive) { tokenString = define.substring(1, define.length() - 1); } StringTokenizer token = new StringTokenizer(tokenString, ",", false); // 第一个为第一基本义元 if (token.hasMoreTokens()) { this.mainSememe = token.nextToken(); } main_loop: while (token.hasMoreTokens()) { String item = token.nextToken(); if (item.equals("")) continue; // 先判断是否为符号义元 String symbol = item.substring(0, 1); for(int i=0;i< Symbol_Descriptions.length;i++){ if(symbol.equals( Symbol_Descriptions[i][0])){ symbolList.add(item); continue main_loop; } } //如果不是符号义元,则进一步判断是关系义元还是第二基本义元, 带有“=”表示关系义原 if (item.indexOf('=') > 0){ relationList.add(item); } else { secondList.add(item); } } this.secondSememes = secondList.toArray(new String[secondList.size()]); this.relationSememes = relationList.toArray(new String[relationList.size()]); this.symbolSememes = symbolList.toArray(new String[symbolList.size()]); } /** * 获取第一义元 * * @return */ public String getMainSememe() { return mainSememe; } /** * 获取其他基本义元描述 * * @return */ public String[] getSecondSememes() { return secondSememes; } /** * 获取关系义元描述 * * @return */ public String[] getRelationSememes() { return relationSememes; } /** * 获取符号义元描述 * * @return */ public String[] getSymbolSememes() { return symbolSememes; } public Set<String> getAllSememeNames(){ Set<String> names = new HashSet<String>(); //加入主义原 names.add(getMainSememe()); //加入关系义原 for(String item:getRelationSememes()){ names.add(item.substring(item.indexOf("=") + 1)); } //加入符号义原 for(String item:getSymbolSememes()){ names.add(item.substring(1)); } //加入其他义原集合 for(String item:getSecondSememes()){ names.add(item); } return names; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("name="); sb.append(this.word); sb.append("; pos="); sb.append(this.pos); sb.append("; define="); sb.append(this.define); sb.append("; 第一基本义元:[" + mainSememe); sb.append("]; 其他基本义元描述:["); for(String sem: secondSememes){ sb.append(sem); sb.append(";"); } sb.append("]; [关系义元描述:"); for(String sem: relationSememes){ sb.append(sem); sb.append(";"); } sb.append("]; [关系符号描述:"); for(String sem: symbolSememes){ sb.append(sem); sb.append(";"); } sb.append("]"); return sb.toString(); } /** * 是实词还是虚词 * * @return true:实词;false:虚词 */ public boolean isSubstantive() { return this.bSubstantive; } public String getWord() { return word; } public void setWord(String word) { this.word = word; } public String getPos() { return pos; } public void setPos(String pos) { this.pos = pos; } public String getDefine() { return define; } public void setDefine(String define) { this.define = define; } /** * 获取该概念的类型 * * @return */ public String getType() { for (int i = 0; i < Concept_Type.length; i++) { if (define.toUpperCase().indexOf(Concept_Type[i][0].toUpperCase()) >= 0) { return Concept_Type[i][1]; } } return "普通概念"; } @Override public int hashCode(){ return define==null?word.hashCode():define.hashCode(); } @Override public boolean equals(Object anObject){ if(anObject instanceof Concept){ Concept c = (Concept)anObject; return word.equals(c.word) && define.equals(c.define); }else{ return false; } } }