package eu.dnetlib.iis.wf.ingest.pmc.metadata;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.MetadataImporter.EXCLUDED_IDS;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.MetadataImporter.NAMED_OUTPUT_FAULT;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.MetadataImporter.NAMED_OUTPUT_META;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.IOException;
import org.apache.avro.mapred.AvroKey;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.jdom.input.JDOMParseException;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import eu.dnetlib.iis.audit.schemas.Fault;
import eu.dnetlib.iis.common.javamapreduce.MultipleOutputs;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ExtractedDocumentMetadata;
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
/**
* @author mhorst
*
*/
@RunWith(MockitoJUnitRunner.class)
@SuppressWarnings({"rawtypes", "unchecked"})
public class MetadataImporterTest {
private static final String XML_FILE = "/eu/dnetlib/iis/wf/ingest/pmc/metadata/data/document_jats23_nested_in_oai.xml";
private static final String NON_XML_FILE = "/eu/dnetlib/iis/wf/ingest/pmc/metadata/data/document_invalid.xml";
@Mock
private Context context;
@Mock
private MultipleOutputs multipleOutputs;
@Captor
private ArgumentCaptor<String> mosKeyCaptor;
@Captor
private ArgumentCaptor<AvroKey<?>> mosValueCaptor;
private MetadataImporter mapper;
@Before
public void init() throws Exception {
mapper = new MetadataImporter() {
@Override
protected MultipleOutputs instantiateMultipleOutputs(Context context) {
return multipleOutputs;
}
};
}
// ------------------------------------- TESTS -----------------------------------
@Test(expected=RuntimeException.class)
public void testSetupWithoutNamedOutputMeta() throws Exception {
// given
Configuration conf = new Configuration();
doReturn(conf).when(context).getConfiguration();
// execute
mapper.setup(context);
}
@Test(expected=RuntimeException.class)
public void testSetupWithoutNamedOutputFault() throws Exception {
// given
Configuration conf = new Configuration();
conf.set(NAMED_OUTPUT_META, "meta");
doReturn(conf).when(context).getConfiguration();
// execute
mapper.setup(context);
}
@Test
public void testMap() throws Exception {
// given
Configuration conf = new Configuration();
conf.set(NAMED_OUTPUT_META, "meta");
conf.set(NAMED_OUTPUT_FAULT, "fault");
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
String id = "id";
DocumentText.Builder docTextBuilder = DocumentText.newBuilder();
docTextBuilder.setId(id);
docTextBuilder.setText(getContent(XML_FILE));
// execute
mapper.map(new AvroKey<>(docTextBuilder.build()), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, times(1)).write(mosKeyCaptor.capture(), mosValueCaptor.capture());
// doc meta
assertEquals(conf.get(NAMED_OUTPUT_META), mosKeyCaptor.getValue());
ExtractedDocumentMetadata docMeta = (ExtractedDocumentMetadata) mosValueCaptor.getValue().datum();
assertNotNull(docMeta);
assertEquals(id, docMeta.getId());
}
@Test
public void testMapWithExcludedIds() throws Exception {
// given
String id = "id";
DocumentText.Builder docTextBuilder = DocumentText.newBuilder();
docTextBuilder.setId(id);
docTextBuilder.setText(getContent(XML_FILE));
Configuration conf = new Configuration();
conf.set(NAMED_OUTPUT_META, "meta");
conf.set(NAMED_OUTPUT_FAULT, "fault");
conf.set(EXCLUDED_IDS, id);
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
// execute
mapper.map(new AvroKey<>(docTextBuilder.build()), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, never()).write(any(), any());
}
@Test
public void testMapWithEmptryContent() throws Exception {
// given
String id = "id";
DocumentText.Builder docTextBuilder = DocumentText.newBuilder();
docTextBuilder.setId(id);
docTextBuilder.setText("");
Configuration conf = new Configuration();
conf.set(NAMED_OUTPUT_META, "meta");
conf.set(NAMED_OUTPUT_FAULT, "fault");
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
// execute
mapper.map(new AvroKey<>(docTextBuilder.build()), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, never()).write(any(), any());
}
@Test
public void testMapWithIvalidXml() throws Exception {
// given
Configuration conf = new Configuration();
conf.set(NAMED_OUTPUT_META, "meta");
conf.set(NAMED_OUTPUT_FAULT, "fault");
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
String id = "id";
DocumentText.Builder docTextBuilder = DocumentText.newBuilder();
docTextBuilder.setId(id);
docTextBuilder.setText(getContent(NON_XML_FILE));
// execute
mapper.map(new AvroKey<>(docTextBuilder.build()), null, context);
// assert
verify(context, never()).write(any(), any());
verify(multipleOutputs, times(2)).write(mosKeyCaptor.capture(), mosValueCaptor.capture());
// doc meta
assertEquals(conf.get(NAMED_OUTPUT_META), mosKeyCaptor.getAllValues().get(0));
ExtractedDocumentMetadata docMeta = (ExtractedDocumentMetadata) mosValueCaptor.getAllValues().get(0).datum();
assertNotNull(docMeta);
assertEquals(id, docMeta.getId());
assertEquals("", docMeta.getText());
assertEquals(JatsXmlHandler.ENTITY_TYPE_UNKNOWN, docMeta.getEntityType());
// fault
assertEquals(conf.get(NAMED_OUTPUT_FAULT), mosKeyCaptor.getAllValues().get(1));
Fault fault = (Fault) mosValueCaptor.getAllValues().get(1).datum();
assertNotNull(fault);
assertEquals(id, fault.getInputObjectId());
assertEquals(JDOMParseException.class.getName(), fault.getCode());
assertTrue(fault.getTimestamp() > 0);
}
@Test
public void testCleanup() throws Exception {
// given
Configuration conf = new Configuration();
conf.set(NAMED_OUTPUT_META, "meta");
conf.set(NAMED_OUTPUT_FAULT, "fault");
conf.set(NAMED_OUTPUT_FAULT, "fault");
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
// execute
mapper.cleanup(context);
// assert
verify(multipleOutputs, times(1)).close();
}
// --------------------------------------- PRIVATE ----------------------------------------
private String getContent(String location) throws IOException {
return IOUtils.toString(MetadataImporter.class.getResourceAsStream(location), "utf8");
}
}