/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.summarisation.methods;
import java.util.ArrayList;
import java.util.StringTokenizer;
import cern.colt.list.DoubleArrayList;
import at.tuwien.ifs.somtoolbox.summarisation.parser.Scorer;
/**
* @author Julius Penaranda
* @version $Id: TitleMethod.java 3589 2010-05-21 10:42:01Z mayer $
*/
public class TitleMethod extends TFxIDF {
private ArrayList<String> doc;
private DoubleArrayList tharray;
private DoubleArrayList finalarray;
public TitleMethod(PartOfSpeech p) {
super(p);
}
@Override
public void setDocument(String filename, ArrayList<String> doc) {
super.setDocument(filename, doc);
this.doc = doc;
}
// # of phrases in title is divided by the total phrases in title.
// this value is then multiplied by the constant 0.1, and adds to sentence weights
public DoubleArrayList computeScores() {
StringTokenizer strtok = new StringTokenizer(this.doc.get(0));
DoubleArrayList sentscores = super.computeScores(Scorer.ALL);
tharray = new DoubleArrayList();
finalarray = new DoubleArrayList();
String sent;
int totalphrase = strtok.countTokens();
double weight;
double numphrase;
// for each sentence in document, ignore title
for (int i = 1; i < this.doc.size(); i++) {
strtok = new StringTokenizer(this.doc.get(0));
numphrase = 0;
sent = this.doc.get(i).toLowerCase();
while (strtok.hasMoreElements()) {
int index = 0;
String word = strtok.nextToken().toLowerCase();
while (sent.indexOf(word, index) != -1) {
numphrase = numphrase + 1.0;
// System.out.println("word found: "+word+" in sentence nr: "+i);
index = sent.indexOf(word, index) + word.length();
}
}
if (numphrase != 0.0) {
weight = numphrase / new Integer(totalphrase).doubleValue() * 0.1;
} else {
weight = 0.0;
}
// System.out.println("weight for sentence "+i+": "+weight);
// System.out.println("total for sentence "+i+": "+ (weight+ sentscores.get(i)));
this.tharray.add(weight);
this.finalarray.add(weight + sentscores.get(i - 1));
}
return this.finalarray;
}
/** returns Title-Scores */
public DoubleArrayList getTitleScores() {
return this.tharray;
}
}