/*******************************************************************************
* Copyright 2010 Stephen O'Rourke (stephen.orourke@sydney.edu.au)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package tml.test;
import static org.hamcrest.CoreMatchers.*;
import static org.junit.Assert.*;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import org.junit.BeforeClass;
import org.junit.Test;
import tml.corpus.TextDocument;
import tml.storage.importers.TextImporter;
import tml.vectorspace.TermWeighting;
import tml.vectorspace.operations.RapidAutomaticKeywordExtraction;
import tml.vectorspace.operations.results.RapidAutomaticKeywordExtractionResult;
/**
* This class tests the {@link RapidAutomaticKeywordExtraction} operation.
*
* @author Stephen O'Rourke
*
*/
public class RapidAutomaticKeywordExtractionTest extends AbstractTmlIndexingTest {
private static TextDocument document;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
String content = "Compatibility of Systems of Linear Constraints over the Set of Natural Numbers" +
"\nCriteria of compatibility of a system of linear Diophantine equations, strict inequations, " +
"and nonstrict inequations are considered. Upper bounds for components of a minimal set " +
"of solutions and algorithms of construction of minimal generating sets of solutions for all " +
"types of systems are given. These criteria and the corresponding algorithms for " +
"constructing a minimal supporting set of solutions can be used in solving all the " +
"considered types of systems and systems of mixed types.";
AbstractTmlIndexingTest.setUpBeforeClass();
repository.addDocument("1", content, "Title", "N/A", new TextImporter());
document = repository.getTextDocument("1");
document.getParameters().setTermWeightLocal(TermWeighting.LocalWeight.TF);
document.getParameters().setTermWeightGlobal(TermWeighting.GlobalWeight.None);
document.load(repository);
}
@Test
public void shouldExtractKeywords() throws Exception {
Map<String, Double> expectedKeywords = new LinkedHashMap<String, Double>();
expectedKeywords.put("minimal generating sets", 8.7);
expectedKeywords.put("linear diophantine equations", 8.5);
expectedKeywords.put("minimal supporting set", 7.7);
expectedKeywords.put("minimal set", 4.7);
expectedKeywords.put("linear constraints", 4.5);
expectedKeywords.put("natural numbers", 4.0);
expectedKeywords.put("strict inequations", 4.0);
expectedKeywords.put("nonstrict inequations", 4.0);
expectedKeywords.put("upper bounds", 4.0);
expectedKeywords.put("mixed types", 3.7);
expectedKeywords.put("considered types", 3.2);
expectedKeywords.put("set", 2.0);
expectedKeywords.put("types", 1.7);
expectedKeywords.put("considered", 1.5);
expectedKeywords.put("compatibility", 1.0);
expectedKeywords.put("systems", 1.0);
expectedKeywords.put("criteria", 1.0);
expectedKeywords.put("system", 1.0);
expectedKeywords.put("components", 1.0);
expectedKeywords.put("solutions", 1.0);
expectedKeywords.put("algorithms", 1.0);
expectedKeywords.put("construction", 1.0);
expectedKeywords.put("constructing", 1.0);
expectedKeywords.put("solving", 1.0);
RapidAutomaticKeywordExtraction operation = new RapidAutomaticKeywordExtraction();
operation.setCorpus(document.getSentenceCorpus());
operation.start();
assertThat(operation.getResultsNumber(), equalTo(expectedKeywords.size()));
Iterator<String> keywords = expectedKeywords.keySet().iterator();
for (RapidAutomaticKeywordExtractionResult result : operation.getResults()) {
String keyword = keywords.next();
assertThat(result.getKeyword(), equalTo(keyword));
assertEquals(result.getWeighting(), expectedKeywords.get(keyword), 0.05);
}
}
}