/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.summarisation.parser; import java.util.ArrayList; import org.apache.commons.lang.ArrayUtils; import cern.colt.list.DoubleArrayList; import at.tuwien.ifs.somtoolbox.data.InputData; import at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData; import at.tuwien.ifs.somtoolbox.data.SOMLibTemplateVector; import at.tuwien.ifs.somtoolbox.summarisation.methods.CombinedMethod; import at.tuwien.ifs.somtoolbox.summarisation.methods.KeywordMethod; import at.tuwien.ifs.somtoolbox.summarisation.methods.LocationMethod; import at.tuwien.ifs.somtoolbox.summarisation.methods.PartOfSpeech; import at.tuwien.ifs.somtoolbox.summarisation.methods.TFxIDF; import at.tuwien.ifs.somtoolbox.summarisation.methods.TitleMethod; import at.tuwien.ifs.somtoolbox.util.StringUtils; /** * @author Julius Penaranda * @version $Id: Scorer.java 3589 2010-05-21 10:42:01Z mayer $ */ public class Scorer { public static final String ALL = "all"; public static final String TFxIDF = "tfxidf"; public static final String LOCATION = "location"; public static final String TITLE_METHOD = "title-method"; public static final String KEYWORD_BOTH = "keyword_both"; public static final String KEYWORD_NOUN = "keyword_noun"; public static final String KEYWORD_VERB = "keyword_verb"; public static final String COMBINED = "combined"; public static final String[] methods = { TFxIDF, KEYWORD_NOUN, KEYWORD_VERB, KEYWORD_BOTH, TITLE_METHOD, LOCATION, COMBINED }; private TFxIDF tfxidf = null; private KeywordMethod key = null; private TitleMethod th = null; private LocationMethod lm = null; private CombinedMethod cm = null; private ArrayList<String>[] parsedDocuments = null; private ArrayList<String> filenames = null; private PartOfSpeech pos = null; private SentenceParser sParser = null; private Object[] itemNames = null; private String prefix = null; private InputData inputvectors = null; private SOMLibTemplateVector templatevectors = null; public Scorer(Object[] itemN, InputData input, SOMLibTemplateVector template) { this.pos = new PartOfSpeech(); this.pos.readModel(); this.itemNames = itemN; this.inputvectors = input; this.templatevectors = template; } public Scorer(String itemN, InputData input, SOMLibTemplateVector template) { this(new String[] { itemN }, input, template); } public void setFileNamePrefix(String fnprefix) { prefix = fnprefix; } public void parseDocuments() { sParser = new SentenceParser(itemNames); sParser.setFileNamePrefix(this.prefix); for (Object itemName : itemNames) { sParser.find_parse_Document((String) itemName); } setparsedDocuments(sParser.getParsedDocuments()); setFileNames(sParser.getFileNames()); } private void setparsedDocuments(ArrayList<String>[] pd) { this.parsedDocuments = pd; } public ArrayList<String>[] getParsedDocuments() { return this.parsedDocuments; } public ArrayList<String> getParsedDocument(int id) { return this.parsedDocuments[id]; } public void setFileNames(ArrayList<String> fn) { filenames = fn; } public ArrayList<String> getFileNames() { return filenames; } public int getNumbOfSent(int i) { return this.parsedDocuments[i].size() - 1; // ignore title } public void setVectors(SOMLibSparseInputData input, SOMLibTemplateVector template) { this.inputvectors = input; this.templatevectors = template; } public DoubleArrayList computeScores(String algorithm, String filename, ArrayList<String> doc) { DoubleArrayList finalarray = null; // if TFxIDF if (algorithm.equals(TFxIDF)) { if (tfxidf == null) { tfxidf = new TFxIDF(this.pos); tfxidf.setVectors(this.inputvectors, this.templatevectors); } tfxidf.setDocument(filename, doc); finalarray = tfxidf.computeScores(ALL); } // if keyword else if (StringUtils.equalsAny(algorithm, KEYWORD_BOTH, KEYWORD_NOUN, KEYWORD_VERB)) { if (key == null) { key = new KeywordMethod(this.pos); key.setVectors(this.inputvectors, this.templatevectors); } key.setDocument(filename, doc); finalarray = key.computeScores(algorithm); } // if title-header else if (algorithm.equals(TITLE_METHOD)) { if (th == null) { th = new TitleMethod(this.pos); th.setVectors(this.inputvectors, this.templatevectors); } th.setDocument(filename, doc); finalarray = th.computeScores(); } // if location else if (algorithm.equals(LOCATION)) { if (lm == null) { lm = new LocationMethod(this.pos); lm.setVectors(this.inputvectors, this.templatevectors); } lm.setDocument(filename, doc); finalarray = lm.computeScores(); } // if combined else if (algorithm.equals(COMBINED)) { if (cm == null) { cm = new CombinedMethod(this.pos); cm.setVectors(this.inputvectors, this.templatevectors); } cm.setDocument(filename, doc); finalarray = cm.computeScores(); } return finalarray; } /** * returns an array of scores of each sentence */ public DoubleArrayList getScores(int docID, String algorithm) { if (ArrayUtils.contains(methods, algorithm)) { // FIXME: maybe implement some caching of these results ? return computeScores(algorithm, filenames.get(docID), parsedDocuments[docID]); } else { System.out.println("Scorer: getScores(): cannot identify type of algorithm"); return null; } } }