//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.collectionreaders;
import java.io.IOException;
import java.io.InputStream;
import javax.jms.JMSException;
import javax.jms.Message;
import javax.jms.MessageConsumer;
import javax.jms.TextMessage;
import org.apache.tika.io.IOUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import uk.gov.dstl.baleen.core.utils.BaleenDefaults;
import uk.gov.dstl.baleen.exceptions.InvalidParameterException;
import uk.gov.dstl.baleen.resources.SharedActiveMQResource;
import uk.gov.dstl.baleen.uima.BaleenCollectionReader;
import uk.gov.dstl.baleen.uima.IContentExtractor;
/**
* This collection reader will process all waiting messages available on the
* ActiveMQ message broker, and then watch for new messages.
*
* <p>
* Currently limited to JMS messages of type {@link javax.jms.TextMessage}
* </p>
*
*
* @baleen.javadoc
*/
public class ActiveMQReader extends BaleenCollectionReader {
/**
* Connection to ActiveMQ
*
* @baleen.resource uk.gov.dstl.baleen.resources.SharedActiveMQResource
*/
public static final String KEY_ACTIVEMQ = "activemq";
@ExternalResource(key = KEY_ACTIVEMQ)
SharedActiveMQResource activeMQ;
/**
* The ActiveMQ endpoint - queue or VirtualTopic - to read data from
*
* @baleen.config input
*/
public static final String PARAM_ENDPOINT = "endpoint";
@ConfigurationParameter(name = PARAM_ENDPOINT, defaultValue = "input")
private String endpoint;
/**
* The message selector with which to filter messages
*
* @baleen.config
*/
public static final String PARAM_MESSAGE_SELECTOR = "messageSelector";
@ConfigurationParameter(name = PARAM_MESSAGE_SELECTOR, defaultValue = "")
private String messageSelector;
/**
* The content extractor to use to extract content from files
*
* @baleen.config Value of BaleenDefaults.DEFAULT_CONTENT_EXTRACTOR
*/
public static final String PARAM_CONTENT_EXTRACTOR = "contentExtractor";
@ConfigurationParameter(name = PARAM_CONTENT_EXTRACTOR, defaultValue=BaleenDefaults.DEFAULT_CONTENT_EXTRACTOR)
private String contentExtractor;
private IContentExtractor extractor;
private MessageConsumer consumer;
@Override
protected void doInitialize(final UimaContext context) throws ResourceInitializationException {
try {
this.extractor = getContentExtractor(contentExtractor);
} catch (final InvalidParameterException ipe) {
throw new ResourceInitializationException(ipe);
}
this.extractor.initialize(context, getConfigParameters(context));
try {
this.consumer = activeMQ.createConsumer(endpoint, messageSelector);
} catch (final JMSException e) {
throw new ResourceInitializationException(e);
}
}
@Override
protected void doGetNext(final JCas jCas) throws IOException, CollectionException {
final String source = String.join(".", activeMQ.getResourceName(), endpoint);
try {
final Message msg = this.consumer.receive();
if (msg instanceof TextMessage) {
final String text = ((TextMessage) msg).getText();
final InputStream is = IOUtils.toInputStream(text);
this.extractor.processStream(is, source, jCas);
} else {
throw new IOException(String.format("Unexpected message type for message with id %1 from source %2",
msg.getJMSMessageID(), source));
}
} catch (final JMSException e) {
throw new CollectionException(e);
}
}
@Override
protected void doClose() throws IOException {
try {
this.consumer.close();
} catch (final JMSException e) {
throw new IOException(e);
}
}
@Override
public boolean doHasNext() throws IOException, CollectionException {
try {
return activeMQ.createQueueBrowser(endpoint, messageSelector).getEnumeration().hasMoreElements();
} catch (final JMSException e) {
throw new CollectionException(e);
}
}
}