package eu.europeana.cloud.service.dps.storm.io; import com.google.gson.Gson; import com.rits.cloning.Cloner; import eu.europeana.cloud.common.model.Representation; import eu.europeana.cloud.mcs.driver.DataSetServiceClient; import eu.europeana.cloud.mcs.driver.RepresentationIterator; import eu.europeana.cloud.service.commons.urls.UrlParser; import eu.europeana.cloud.service.commons.urls.UrlPart; import eu.europeana.cloud.service.dps.PluginParameterKeys; import eu.europeana.cloud.service.dps.storm.AbstractDpsBolt; import eu.europeana.cloud.service.dps.storm.StormTaskTuple; import eu.europeana.cloud.service.mcs.exception.DataSetNotExistsException; import eu.europeana.cloud.service.mcs.exception.MCSException; import org.apache.storm.task.OutputCollector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.MalformedURLException; import java.util.List; /** * @author krystian. */ public class ReadDatasetBolt extends AbstractDpsBolt { private static final Logger LOGGER = LoggerFactory.getLogger(ReadDatasetBolt.class); private final String ecloudMcsAddress; public ReadDatasetBolt(String ecloudMcsAddress) { this.ecloudMcsAddress = ecloudMcsAddress; } /** * Should be used only on tests. */ public static ReadDatasetBolt getTestInstance(String ecloudMcsAddress, OutputCollector outputCollector ) { ReadDatasetBolt instance = new ReadDatasetBolt(ecloudMcsAddress); instance.outputCollector = outputCollector; return instance; } @Override public void prepare() { } @Override public void execute(StormTaskTuple t) { DataSetServiceClient datasetClient = new DataSetServiceClient(ecloudMcsAddress); final String authorizationHeader = t.getParameter(PluginParameterKeys.AUTHORIZATION_HEADER); datasetClient.useAuthorizationHeader(authorizationHeader); emitSingleRepresentationFromDataSet(t, datasetClient); } public void emitSingleRepresentationFromDataSet(StormTaskTuple t, DataSetServiceClient dataSetServiceClient) { final String dataSetUrl = t.getParameter(PluginParameterKeys.DATASET_URL); final String representationName = t.getParameter(PluginParameterKeys.REPRESENTATION_NAME); t.getParameters().remove(PluginParameterKeys.REPRESENTATION_NAME); t.getParameters().remove(PluginParameterKeys.DATASET_URL); if (dataSetUrl != null) { try { final UrlParser urlParser = new UrlParser(dataSetUrl); if (urlParser.isUrlToDataset()) { RepresentationIterator iterator = dataSetServiceClient.getRepresentationIterator(urlParser.getPart(UrlPart.DATA_PROVIDERS), urlParser.getPart(UrlPart.DATA_SETS)); while (iterator.hasNext()) { Representation representation = iterator.next(); if (representationName == null || representation.getRepresentationName().equals(representationName)) { StormTaskTuple next = buildStormTaskTuple(t, representation); outputCollector.emit(inputTuple, next.toStormTuple()); } } } else { LOGGER.warn("dataset url is not formulated correctly {}", dataSetUrl); emitDropNotification(t.getTaskId(), dataSetUrl, "dataset url is not formulated correctly", ""); } } catch (MalformedURLException ex) { LOGGER.error("ReadFileBolt error:" + ex.getMessage()); emitErrorNotification(t.getTaskId(), dataSetUrl, ex.getMessage(), t.getParameters().toString()); } } else { String message = "Missing dataset URL"; LOGGER.warn(message); emitDropNotification(t.getTaskId(), "", message, ""); } } private StormTaskTuple buildStormTaskTuple(StormTaskTuple t, Representation representation) { StormTaskTuple stormTaskTuple = new Cloner().deepClone(t); String RepresentationsJson = new Gson().toJson(representation); stormTaskTuple.addParameter(PluginParameterKeys.REPRESENTATION, RepresentationsJson); return stormTaskTuple; } }