/******************************************************************************* * Copyright 2007, 2009 Jorge Villalon (jorge.villalon@uai.cl) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package tml.vectorspace.operations; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import tml.annotators.PennTreeAnnotator; import tml.utils.StanfordUtils; import tml.vectorspace.operations.results.TermRankedResult; import edu.stanford.nlp.trees.Tree; public class CompoundNounsSummarized extends AbstractOperation<TermRankedResult> implements Operation<TermRankedResult> { public CompoundNounsSummarized() { this.name = "Compound nounds summarized"; } @Override public void start() throws Exception { super.start(); List<String> nouns = new ArrayList<String>(); for(String passageId : corpus.getPassages()) { String annotation = null; try { annotation = this.repository.getDocumentField(passageId, PennTreeAnnotator.FIELD_NAME); } catch (IOException e) { e.printStackTrace(); logger.error(e); } if(annotation != null) { Tree pennTree = StanfordUtils.getTreeFromString(passageId, annotation); List<String> allNouns = StanfordUtils.extractNouns(pennTree); if(allNouns != null) for(String noun : allNouns) { noun = noun.toLowerCase(); if(!nouns.contains(noun)) { nouns.add(noun); TermRankedResult result = new TermRankedResult(); result.setTerm(noun.toLowerCase()); result.setRank(0); this.results.add(result); } } } } Collections.sort(this.results, new Comparator<TermRankedResult>() { @Override public int compare(TermRankedResult o1, TermRankedResult o2) { return o1.getTerm().compareTo(o2.getTerm()); } }); super.end(); } }