/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.vsm;
import java.util.Map;
import org.carrot2.core.attribute.Init;
import org.carrot2.text.linguistic.ILexicalDataFactory;
import org.carrot2.text.linguistic.IStemmerFactory;
import org.carrot2.text.preprocessing.PreprocessingComponentTestBase;
import org.carrot2.text.preprocessing.TestLexicalDataFactory;
import org.carrot2.text.preprocessing.TestStemmerFactory;
import org.carrot2.text.preprocessing.pipeline.CompletePreprocessingPipeline;
import org.carrot2.text.preprocessing.pipeline.CompletePreprocessingPipelineDescriptor;
import org.carrot2.util.attribute.AttributeBinder;
import org.carrot2.util.attribute.Input;
import org.junit.Before;
import org.carrot2.shaded.guava.common.collect.Maps;
/**
* A base class for tests requiring that the main term-document document matrix is built.
*/
public class TermDocumentMatrixBuilderTestBase extends PreprocessingComponentTestBase
{
/** Matrix builder */
protected TermDocumentMatrixBuilder matrixBuilder;
/** VSM processing context with all the data */
protected VectorSpaceModelContext vsmContext;
/** Preprocessing pipeline used for tests */
protected CompletePreprocessingPipeline preprocessingPipeline;
@Before
public void setUpMatrixBuilder() throws Exception
{
preprocessingPipeline = new CompletePreprocessingPipeline();
preprocessingPipeline.labelFilterProcessor.minLengthLabelFilter.enabled = false;
Map<String,Object> attrs = Maps.newHashMap();
CompletePreprocessingPipelineDescriptor.attributeBuilder(attrs)
.lexicalDataFactory(createLexicalDataFactory())
.stemmerFactory(createStemmerFactory())
.tokenizerFactory(createTokenizerFactory());
AttributeBinder.set(preprocessingPipeline, attrs, Input.class, Init.class);
matrixBuilder = new TermDocumentMatrixBuilder();
matrixBuilder.termWeighting = new TfTermWeighting();
matrixBuilder.maxWordDf = 1.0;
}
protected void buildTermDocumentMatrix()
{
context = preprocessingPipeline.preprocess(
context.documents, context.query, context.language.getLanguageCode());
vsmContext = new VectorSpaceModelContext(context);
matrixBuilder.buildTermDocumentMatrix(vsmContext);
matrixBuilder.buildTermPhraseMatrix(vsmContext);
}
@Override
protected IStemmerFactory createStemmerFactory()
{
return new TestStemmerFactory();
}
@Override
protected ILexicalDataFactory createLexicalDataFactory()
{
return new TestLexicalDataFactory();
}
}