/*
* Carrot2 project.
*
* Copyright (C) 2002-2010, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.examples.source;
import java.util.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.core.*;
import org.carrot2.core.attribute.*;
import org.carrot2.examples.ConsoleFormatter;
import org.carrot2.examples.SampleDocumentData;
import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;
/**
* This example shows how to implement a simple Carrot2 {@link IDocumentSource}.
*/
@Bindable
public class ExampleDocumentSource extends ProcessingComponentBase implements
IDocumentSource
{
@Processing
@Input
@Attribute(key = CommonAttributesDescriptor.Keys.QUERY)
public String query;
@Processing
@Input
@Attribute(key = CommonAttributesDescriptor.Keys.RESULTS)
@IntRange(min = 1, max = 1000)
public int results = 20;
/**
* Documents produced by this document source. The documents are returned in an output
* attribute with key equal to {@link CommonAttributesDescriptor.Keys#DOCUMENTS},
*/
@Processing
@Output
@Attribute(key = CommonAttributesDescriptor.Keys.DOCUMENTS)
@Internal
public List<Document> documents;
/**
* Modulo to fetch the documents with. This dummy input attribute is just to show how
* custom input attributes can be implemented.
*/
@Processing
@Input
@Attribute
public int modulo = 1;
/**
* Another dummy attribute. This one shows that if the attribute is not a primitive
* type for the implementation), {@link ImplementingClasses} constraint must be added to specify
* which assignable types are allowed as values for the attribute. To allow all
* assignable values, specify empty {@link ImplementingClasses#classes()} and
* {@link ImplementingClasses#strict()} equal to <code>false</code>.
*/
@Processing
@Input
@Attribute
@ImplementingClasses(classes = {}, strict = false)
public Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
@Override
public void process() throws ProcessingException
{
// The input attributes will have already been bound at this point
// Create a place holder for the results
this.documents = new ArrayList<Document>();
// Fetch results.
final List<Document> inputDocuments =
new ArrayList<Document>(SampleDocumentData.DOCUMENTS_DATA_MINING);
int resultsToPush = Math.min(inputDocuments.size(), this.results);
for (int i = 0; i < resultsToPush; i++)
{
if (i % this.modulo == 0)
{
final Document originalDocument = inputDocuments.get(i);
// For the sake of example we just copy the original document fields
final Document document = new Document();
document.setField(Document.TITLE, originalDocument
.getField(Document.TITLE));
document.setField(Document.SUMMARY, "");
document.setField(Document.CONTENT_URL, originalDocument
.getField(Document.CONTENT_URL));
documents.add(document);
}
}
// We've assigned and populated the documents field and we're done, Carrot2 core
// will take care of the rest.
}
public static void main(String [] args)
{
final Controller controller = ControllerFactory.createSimple();
final Map<String, Object> params = new HashMap<String, Object>();
/*
* This computes the attribute key dynamically based on the class and field name.
*/
params.put(
AttributeUtils.getKey(ExampleDocumentSource.class, "modulo"),
2);
params.put(
AttributeUtils.getKey(ExampleDocumentSource.class, "analyzer"),
new WhitespaceAnalyzer());
/*
* An alternative is to generate additional descriptor classes for bindables.
* These classes provide type-safe attribute builders. Unfortunately due to
* limitations of java compiler preprocessors, the generated class cannot be used
* in the same compilation round as the code it is generated from (you can try
* to split the compilation into more than one phase, however).
*
* ExampleDocumentSourceDescriptor.attributes()
* .modulo(2)
* .analyzer(new WhitespaceAnalyzer())
* .build();
*/
final ProcessingResult result = controller.process(params,
ExampleDocumentSource.class, LingoClusteringAlgorithm.class);
ConsoleFormatter.displayResults(result);
}
}