/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.source.ambient; import java.util.List; import java.util.Set; import org.carrot2.core.*; import org.carrot2.core.attribute.*; import org.carrot2.util.attribute.*; import org.carrot2.util.attribute.constraint.IntRange; /** * A base document source for test collections developed at Fondazione Ugo Bordoni. */ @Bindable(prefix = "FubDocumentSource", inherit = CommonAttributes.class) public class FubDocumentSource extends ProcessingComponentBase implements IDocumentSource { /** {@link Group} name. */ protected static final String TOPIC_ID = "Topic ID"; @Processing @Output @Attribute(key = AttributeNames.DOCUMENTS, inherit = true) @Internal public List<Document> documents; /** * Topics and subtopics covered in the output documents. The set is computed for the * output {@link #documents} and it may vary for the same main topic based e.g. on the * requested number of requested results or {@link #minTopicSize}. */ @Processing @Output @Attribute @Group(TOPIC_ID) @Level(AttributeLevel.ADVANCED) public Set<Object> topicIds; @Processing @Output @Attribute(key = AttributeNames.QUERY, inherit = true) public String query; /** * Minimum topic size. Documents belonging to a topic with fewer documents than * minimum topic size will not be returned. */ @Input @Processing @Attribute @IntRange(min = 1) @Group(DefaultGroups.FILTERING) @Level(AttributeLevel.MEDIUM) public int minTopicSize = 1; /** * Include documents without topics. */ @Input @Processing @Attribute @Group(DefaultGroups.FILTERING) @Level(AttributeLevel.MEDIUM) public boolean includeDocumentsWithoutTopic = false; protected void processInternal(FubTestCollection data, int topicId, int requestedResults) throws ProcessingException { this.documents = data.getDocumentsForTopic(topicId, requestedResults, minTopicSize, includeDocumentsWithoutTopic); for (Document document : documents) { document.setLanguage(LanguageCode.ENGLISH); } } }