package ruc.irm.tendency.word; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Set; import ruc.irm.similarity.word.hownet2.concept.BaseConceptParser; import ruc.irm.similarity.word.hownet2.concept.Concept; import ruc.irm.similarity.word.hownet2.concept.XiaConceptParser; import ruc.irm.similarity.word.hownet2.sememe.BaseSememeParser; import ruc.irm.similarity.word.hownet2.sememe.XiaSememeParser; /** * 基于知网实现的词语倾向性判别 * * @author <a href="mailto:iamxiatian@gmail.com">夏天</a> * @organization 中国人民大学信息资源管理学院 知识工程实验室 */ public class HownetWordTendency implements WordTendency { public static String[] POSITIVE_SEMEMES = new String[]{ "良", "喜悦", "夸奖", "满意", "期望", "注意", "致敬", "喜欢", "专", "敬佩", "同意", "爱惜", "愿意", "思念", "拥护", "祝贺", "福", "需求", "奖励", "致谢", "欢迎", "羡慕", "感激", "爱恋" }; public static String[] NEGATIVE_SEMEMES = new String[]{ "莠", "谴责", "害怕", "生气", "悲哀", "着急", "轻视", "羞愧", "烦恼", "灰心", "犹豫", "为难", "懊悔", "厌恶", "怀疑", "怜悯", "忧愁", "示怒", "不满", "仇恨", "埋怨", "失望", "坏" }; private BaseConceptParser conceptParser = null; private BaseSememeParser sememeParser = null; public HownetWordTendency(){ this.conceptParser =XiaConceptParser.getInstance(); try { this.sememeParser = new XiaSememeParser(); } catch (IOException e) { e.printStackTrace(); } } @Override public double getTendency(String word) { double positive = getSentiment(word, POSITIVE_SEMEMES); double negative = getSentiment(word, NEGATIVE_SEMEMES);; return positive - negative; } public double getSentiment(String word, String[] candidateSememes) { Collection<Concept> concepts = conceptParser.getConcepts(word); Set<String> sememes = new HashSet<String>(); for (Concept c : concepts) { sememes.addAll(c.getAllSememeNames()); } double max = 0.0; for(String item:sememes){ double total = 0.0; for(String positiveSememe:candidateSememes){ //如果有特别接近的义原,直接返回该相似值,避免其他干扰 double value = sememeParser.getSimilarity(item, positiveSememe); if(value>0.9){ return value; } total += value; } double sim = total / candidateSememes.length; if(sim>max){ max = sim; } } return max; } }