package eu.dnetlib.iis.wf.ingest.webcrawl.fundings; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.mockito.Matchers.any; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import java.util.List; import org.apache.avro.mapred.AvroKey; import org.apache.hadoop.mapreduce.Mapper.Context; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import eu.dnetlib.iis.common.java.io.JsonUtils; import eu.dnetlib.iis.metadataextraction.schemas.DocumentText; /** * @author mhorst * */ @RunWith(MockitoJUnitRunner.class) @SuppressWarnings({"rawtypes", "unchecked"}) public class WebcrawlFundingsIngesterTest { private final static String EXPECTED_OUTPUT_ROOT_LOCATION = "eu/dnetlib/iis/wf/ingest/webcrawl/fundings/data/"; @Mock private Context context; @Captor private ArgumentCaptor<AvroKey<DocumentText>> resultCaptor; private final WebcrawlFundingsIngester ingester = new WebcrawlFundingsIngester(); // ------------------------------------- TESTS ----------------------------------- @Test public void testMapOnValidWebcrawlResources() throws Exception { // given List<DocumentText> webcrawlResources = loadResources("wos_document_text.json"); // execute for (DocumentText text : webcrawlResources) { ingester.map(new AvroKey<DocumentText>(text), null, context); } // assert verify(context, times(3)).write(resultCaptor.capture(), any()); DocumentText value = resultCaptor.getAllValues().get(0).datum(); assertNotNull(value); assertEquals("id-1", value.getId().toString()); assertEquals("\"This work was partially supported by the 7th EC Framework Programme " + "Project \"\"NEWTBVac\"\" Grant #241745.\"", value.getText()); value = resultCaptor.getAllValues().get(1).datum(); assertNotNull(value); assertEquals("id-2", value.getId().toString()); assertEquals("This work was supported by the Archimedes Center for Modeling, " + "Analysis and Computation (ACMAC) (project FP7-REGPOT-2009-1).", value.getText()); value = resultCaptor.getAllValues().get(2).datum(); assertNotNull(value); assertEquals("id-3", value.getId().toString()); assertEquals("This work was supported by the Archimedes Center for Modeling, " + "Analysis and Computation (ACMAC) (project FP7-REGPOT-2009-1).\n" + "\"This work is part of the research program of the \"\"Stichting voor " + "Fundamenteel Onderzoek der Materie (FOM)\"\", which is financially " + "supported by the \"\"Nederlandse organisatie voor Wetenschappelijk " + "Onderzock (NWO)\"\". D.P. thanks the European Commission for support " + "with a Marie Curic grant (Project No. TERASPIN 039223).\"", value.getText()); } @Test public void testMapOnInvalidWebcrawlResources() throws Exception { // given List<DocumentText> webcrawlResources = loadResources("wos_document_text_broken.json"); // execute for (DocumentText text : webcrawlResources) { ingester.map(new AvroKey<DocumentText>(text), null, context); } // assert verify(context, times(0)).write(any(), any()); } // ------------------------------------- TESTS ----------------------------------- private List<DocumentText> loadResources(String fileName) { return JsonUtils.convertToList(EXPECTED_OUTPUT_ROOT_LOCATION + fileName, DocumentText.SCHEMA$, DocumentText.class); } }