/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.examples.source; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.carrot2.core.*; import org.carrot2.core.attribute.*; import org.carrot2.core.attribute.CommonAttributesDescriptor.Keys; import org.carrot2.util.attribute.*; import org.carrot2.util.attribute.constraint.ImplementingClasses; import org.carrot2.util.attribute.constraint.IntRange; /** * An example {@link IDocumentSource} that accepts a list of {@link Document}s * and returns a filtered list ({@link #modulo}). */ @Bindable public class ModuloDocumentSource extends ProcessingComponentBase implements IDocumentSource { /** * The query won't matter to us but we bind it anyway. */ @Processing @Input @Attribute(key = CommonAttributesDescriptor.Keys.QUERY) public String query; /** * Maximum number of results to return. */ @Processing @Input @Attribute(key = CommonAttributesDescriptor.Keys.RESULTS) @IntRange(min = 1, max = 1000) public int results = 20; /** * Modulo to fetch the documents with. This dummy input attribute is just to show how * custom input attributes can be implemented. */ @Processing @Input @Attribute public int modulo = 1; /** * Documents accepted and returned by this document source. * The documents are returned in an output * attribute with key equal to {@link Keys#DOCUMENTS}, */ @Processing @Input @Output @Attribute(key = CommonAttributesDescriptor.Keys.DOCUMENTS) @Internal public List<Document> documents; /** * A non-primitive attribute do demonstrate the need for * {@link org.carrot2.util.attribute.constraint.ImplementingClasses} constraint. * It must be added to specify * which assignable types are allowed as values for the attribute. To allow all * assignable values, specify empty * {@link org.carrot2.util.attribute.constraint.ImplementingClasses#classes()} and * {@link org.carrot2.util.attribute.constraint.ImplementingClasses#strict()} equal to <code>false</code>. */ @Processing @Input @Attribute @ImplementingClasses(classes = {}, strict = false) public Analyzer analyzer = new StandardAnalyzer(); /** * Processing routine. */ @Override public void process() throws ProcessingException { // The input attributes will have already been bound at this point // Create a copy of the input list and filter. final List<Document> filtered = new ArrayList<Document>(); for (int i = 0; i < documents.size() && filtered.size() < results ; i++) { if (i % this.modulo == 0) { final Document originalDocument = documents.get(i); // For the sake of example we just copy the original document fields. final Document document = new Document(); document.setField(Document.TITLE, originalDocument.getTitle()); document.setField(Document.SUMMARY, ""); document.setField(Document.CONTENT_URL, originalDocument.getField(Document.CONTENT_URL)); filtered.add(document); } } // We've assigned and populated the documents field and we're done. Write // the output list of documents to an output attribute. this.documents = filtered; } }