/*
* Copyright 2013 SciFY NPO <info@scify.org>.
*
* This product is part of the NewSum Free Software.
* For more information about NewSum visit
*
* http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* If this code or its output is used, extended, re-engineered, integrated,
* or embedded to any extent in another software or hardware, there MUST be
* an explicit attribution to this work in the resulting source code,
* the packaging (where such packaging exists), or user interface
* (where such an interface exists).
* The attribution must be of the form "Powered by NewSum, SciFY"
*/
package org.scify.NewSumServer.Server.Summarisation;
import gr.demokritos.iit.jinsect.documentModel.comparators.NGramCachedGraphComparator;
import gr.demokritos.iit.jinsect.documentModel.representations.DocumentNGramSymWinGraph;
import gr.demokritos.iit.jinsect.structs.GraphSimilarity;
import gr.demokritos.iit.jinsect.utils;
import java.util.*;
import org.scify.NewSumServer.Server.Structures.Sentence;
/**
* Removes redundancy from a summary.
* @author ggianna
*/
public class RedundancyRemover {
/**
* Removes redundant sentences from a given list of sentences (supposed
* to constitute a summary).
* @param lToCleanUp The list to cleanup.
* @return A list containing only non-redundant sentences.
*/
public List<Sentence> removeRedundantSentences(final List<Sentence> lToCleanUp) {
LinkedList<Sentence> llRes = new LinkedList<Sentence>(lToCleanUp);
// Order by size DESCENDING
Collections.sort(llRes, new Comparator<Sentence>() {
@Override
public int compare(Sentence t, Sentence t1) {
int iRes = t1.getSnippet().length() - t.getSnippet().length();
if (iRes == 0)
return t.getSnippet().compareTo(t1.getSnippet());
return iRes;
}
});
// For every sentence
ListIterator<Sentence> isCur = llRes.listIterator();
NGramCachedGraphComparator ngc = new NGramCachedGraphComparator();
while (isCur.hasNext()) {
// Init changed flag
boolean bChanged = false;
// Create graph
DocumentNGramSymWinGraph dg = new DocumentNGramSymWinGraph();
dg.setDataString(isCur.next().getSnippet());
// For every sentence following
ListIterator<Sentence> isSecondCur = llRes.listIterator(isCur.nextIndex());
if (isSecondCur.hasNext()) {
while (isSecondCur.hasNext())
{
// Create graph
DocumentNGramSymWinGraph dg2 = new DocumentNGramSymWinGraph();
dg2.setDataString(isSecondCur.next().getSnippet());
// If NVS over threshold, consider redundant
GraphSimilarity gs = ngc.getSimilarityBetween(dg, dg2);
double dNVS = gs.SizeSimilarity == 0.0 ? 0.0 :
gs.ValueSimilarity / gs.SizeSimilarity;
// If very similar
if (dNVS > 0.3) {
// Remove shorter sentence
isSecondCur.remove();
bChanged = true;
}
}
}
// On change
if (bChanged)
// If more things to check, reset original list iterator to before removal
if (isCur.nextIndex() < llRes.size())
// Continue from the last valid item
isCur = llRes.listIterator(isCur.nextIndex());
else
// Set the iterator to the last available object
isCur = llRes.listIterator(llRes.size() -1);
}
// Reorder remaining sentences according to original order
Collections.sort(llRes, new Comparator<Sentence> () {
@Override
public int compare(Sentence t, Sentence t1) {
return lToCleanUp.indexOf(t) - lToCleanUp.indexOf(t1);
}
});
return llRes;
}
// public static void main(String[] sArgs) {
// List<Sentence> al = new ArrayList<Sentence>();
// for (int iCnt = 0; iCnt < 2; iCnt++) {
// Sentence s1 = new Sentence(utils.getNormalString(), "", "");
// if (iCnt == 0)
// s1 = new Sentence("======", "", "");
// al.add(s1);
//// if (new Random().nextBoolean())
// al.add(s1);
// }
// al.add(new Sentence("Testing", "", ""));
// al.add(new Sentence("Testing", "", ""));
// al.add(new Sentence("hica", "", ""));
// al.add(new Sentence("hica", "", ""));
// System.out.println(utils.printIterable(al, "\n") + "\n");
//
// RedundancyRemover rr = new RedundancyRemover();
// al = rr.removeRedundantSentences(al);
//
// System.out.println(utils.printIterable(al, "\n"));
// }
}