package eu.dnetlib.iis.wf.importer.content;
import static eu.dnetlib.iis.wf.importer.content.DocumentContentUrlDispatcher.PROPERTY_MULTIPLEOUTPUTS;
import static eu.dnetlib.iis.wf.importer.content.DocumentContentUrlDispatcher.PROPERTY_PREFIX_MIMETYPES_CSV;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import org.apache.avro.mapred.AvroKey;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import eu.dnetlib.iis.common.javamapreduce.MultipleOutputs;
import eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl;
/**
* @author mhorst
*
*/
@RunWith(MockitoJUnitRunner.class)
@SuppressWarnings({"rawtypes", "unchecked"})
public class DocumentContentUrlDispatcherTest {
@Mock
private Context context;
@Mock
private MultipleOutputs multipleOutputs;
@Captor
private ArgumentCaptor<String> mosKeyCaptor;
@Captor
private ArgumentCaptor<AvroKey<?>> mosValueCaptor;
private DocumentContentUrlDispatcher dispatcher;
@Before
public void init() throws Exception {
dispatcher = new DocumentContentUrlDispatcher() {
@Override
protected MultipleOutputs instantiateMultipleOutputs(Context context) {
return multipleOutputs;
}
};
}
// ------------------------------------- TESTS -----------------------------------
@Test(expected=IllegalArgumentException.class)
public void testSetupNoParams() throws Exception {
// given
Configuration conf = new Configuration();
doReturn(conf).when(context).getConfiguration();
// execute
dispatcher.setup(context);
}
@Test
public void testDispatch() throws Exception {
// given
String mimeTypePdf = "pdf,application/pdf";
String mimeTypeHtml = "text/html";
String outputNamePdf = "pdf";
String outputNameHtml = "html";
Configuration conf = new Configuration();
conf.set(PROPERTY_MULTIPLEOUTPUTS, buildMultipleOutputsProperty(outputNamePdf, outputNameHtml));
conf.set(PROPERTY_PREFIX_MIMETYPES_CSV + outputNamePdf, mimeTypePdf);
conf.set(PROPERTY_PREFIX_MIMETYPES_CSV + outputNameHtml, mimeTypeHtml);
doReturn(conf).when(context).getConfiguration();
dispatcher.setup(context);
DocumentContentUrl pdfUrl = buildDocumentContentUrl("pdfId", "pdf");
DocumentContentUrl htmlUrl = buildDocumentContentUrl("htmlId", mimeTypeHtml);
// execute
dispatcher.map(new AvroKey<>(pdfUrl), null, context);
dispatcher.map(new AvroKey<>(htmlUrl), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, times(2)).write(mosKeyCaptor.capture(), mosValueCaptor.capture());
assertEquals(outputNamePdf, mosKeyCaptor.getAllValues().get(0));
DocumentContentUrl obtainedPdfUrl = (DocumentContentUrl) mosValueCaptor.getAllValues().get(0).datum();
assertNotNull(obtainedPdfUrl);
assertTrue(pdfUrl == obtainedPdfUrl);
assertEquals(outputNameHtml, mosKeyCaptor.getAllValues().get(1));
DocumentContentUrl obtainedHtmlUrl = (DocumentContentUrl) mosValueCaptor.getAllValues().get(1).datum();
assertNotNull(obtainedHtmlUrl);
assertTrue(htmlUrl == obtainedHtmlUrl);
}
@Test
public void testDispatchForUndefinedMimeType() throws Exception {
// given
String mimeTypeHtml = "text/html";
String outputNameHtml = "html";
Configuration conf = new Configuration();
conf.set(PROPERTY_MULTIPLEOUTPUTS, buildMultipleOutputsProperty(outputNameHtml));
conf.set(PROPERTY_PREFIX_MIMETYPES_CSV + outputNameHtml, mimeTypeHtml);
doReturn(conf).when(context).getConfiguration();
dispatcher.setup(context);
DocumentContentUrl htmlUrl = buildDocumentContentUrl("htmlId", null);
// execute
dispatcher.map(new AvroKey<>(htmlUrl), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, never()).write(any(), any());
}
@Test
public void testDispatchForUnsupportedMimeType() throws Exception {
// given
String mimeTypePdf = "pdf,application/pdf";
String outputNamePdf = "pdf";
Configuration conf = new Configuration();
conf.set(PROPERTY_MULTIPLEOUTPUTS, buildMultipleOutputsProperty(outputNamePdf));
conf.set(PROPERTY_PREFIX_MIMETYPES_CSV + outputNamePdf, mimeTypePdf);
doReturn(conf).when(context).getConfiguration();
dispatcher.setup(context);
DocumentContentUrl xmlUrl = buildDocumentContentUrl("xmlId", "xml");
// execute
dispatcher.map(new AvroKey<>(xmlUrl), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, never()).write(any(), any());
}
@Test
public void testCleanup() throws Exception {
// given
String outputNamePdf = "pdf";
Configuration conf = new Configuration();
conf.set(PROPERTY_MULTIPLEOUTPUTS, buildMultipleOutputsProperty(outputNamePdf));
doReturn(conf).when(context).getConfiguration();
dispatcher.setup(context);
// execute
dispatcher.cleanup(context);
// assert
verify(multipleOutputs, times(1)).close();
}
// ---------------------------------- PRIVATE ------------------------------------
private static String buildMultipleOutputsProperty(String... outputNames) {
StringBuilder strBuilder = new StringBuilder();
for (int i=0; i < outputNames.length; i++) {
strBuilder.append(outputNames[i]);
if (i < outputNames.length - 1) {
strBuilder.append(' ');
}
}
return strBuilder.toString();
}
private static DocumentContentUrl buildDocumentContentUrl(String id, String mimeType) {
DocumentContentUrl.Builder docContentUrlBuilder = DocumentContentUrl.newBuilder();
docContentUrlBuilder.setId(id);
docContentUrlBuilder.setUrl("docUrl");
docContentUrlBuilder.setMimeType(mimeType);
docContentUrlBuilder.setContentSizeKB(1l);
return docContentUrlBuilder.build();
}
}