/* Copyright 2012-2013 Fabian Steeg. Licensed under the Eclipse Public License 1.0 */ package org.lobid.lodmill.hadoop; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; import org.lobid.lodmill.hadoop.CollectSubjects.CollectSubjectsMapper; import org.lobid.lodmill.hadoop.CollectSubjects.CollectSubjectsReducer; /** * Test the {@link CollectSubjects} class. * * @author Fabian Steeg (fsteeg) */ @SuppressWarnings("javadoc") public final class UnitTestCollectSubjects { static final String GND_CREATOR_ID = "118643606"; private static final String GND_ID = "http://d-nb.info/gnd/" + GND_CREATOR_ID; private static final String LOBID_ID_GND = "http://lobid.org/resource/HT000000716"; private static final String LOBID_TRIPLE_GND = "<" + LOBID_ID_GND + "> " + "<http://purl.org/dc/terms/creator>" + "<" + GND_ID + ">."; private static final String LOBID_ID_DEWEY = "http://lobid.org/resource/HT007307035"; private static final String DEWEY_ID = "http://dewey.info/class/325/"; private MapDriver<LongWritable, Text, Text, Text> mapDriver; private ReduceDriver<Text, Text, Text, Text> reduceDriver; @Before public void setUp() { final CollectSubjectsMapper mapper = new CollectSubjectsMapper(); final CollectSubjectsReducer reducer = new CollectSubjectsReducer(); mapDriver = MapDriver.newMapDriver(mapper); reduceDriver = ReduceDriver.newReduceDriver(reducer); } static String gnd(final String predicate, final String literal) { return String.format("<http://d-nb.info/gnd/%s> " + "<http://d-nb.info/standards/elementset/gnd#%s>" + "\"%s\".", GND_CREATOR_ID, predicate, literal); } @SuppressWarnings("static-method") @Test public void testProperties() { assertEquals("number of entries to resolve", 31, CollectSubjects.TO_RESOLVE.size()); assertEquals("number of predicates", 19, CollectSubjects.PREDICATES.size()); assertEquals("number of parents", 1, CollectSubjects.PARENTS.size()); } @Test public void testMapperGnd() throws IOException { // NOPMD (MRUnit) mapDriver.addInput(new LongWritable(), new Text(LOBID_TRIPLE_GND)); mapDriver.addInput(new LongWritable(), new Text(gnd("preferredNameForThePerson", "Adamucci, Antonio"))); mapDriver.addOutput(new Text(GND_ID), new Text(LOBID_ID_GND)); mapDriver.runTest(); } @Test public void testMapperDewey() throws IOException { // NOPMD (MRUnit) mapDriver.addInput(new LongWritable(), new Text("<" + LOBID_ID_DEWEY + "> " + "<http://purl.org/dc/terms/subject> " + "<" + DEWEY_ID + "> .")); mapDriver.addInput(new LongWritable(), new Text("<" + DEWEY_ID + "> " + "<http://www.w3.org/2004/02/skos/core#prefLabel> " + "\"International migration & colonization\"@en .")); mapDriver.addOutput(new Text(DEWEY_ID), new Text(LOBID_ID_DEWEY)); mapDriver.runTest(); } @Test public void testReducerGnd() throws IOException { // NOPMD (MRUnit) reduceDriver.addInput(new Text(GND_ID), Arrays.asList(new Text(LOBID_ID_GND), new Text(LOBID_ID_DEWEY))); reduceDriver.addOutput(new Text(GND_ID), new Text(LOBID_ID_GND + "," + LOBID_ID_DEWEY)); reduceDriver.runTest(); } @Test public void testReducerDewey() throws IOException { // NOPMD (MRUnit) reduceDriver.addInput(new Text(DEWEY_ID), Arrays.asList(new Text(LOBID_ID_GND), new Text(LOBID_ID_DEWEY))); reduceDriver.addOutput(new Text(DEWEY_ID), new Text(LOBID_ID_GND + "," + LOBID_ID_DEWEY)); reduceDriver.runTest(); } private enum BlankGeo { /*@formatter:off*/ LOBID_1("<http://lobid.org/organisation/AF-KaIS> " + "<http://www.w3.org/2003/01/geo/wgs84_pos#location> _:node16vicghfdx21 ."), LOBID_2("<http://lobid.org/organisation/AE-ShAU> " + "<http://www.w3.org/2003/01/geo/wgs84_pos#location> _:node16vicghfdx21 ."), BLANK("_:node16vicghfdx21 " + "<http://www.w3.org/2003/01/geo/wgs84_pos#lat> \"-25.6494315\" ."); /*@formatter:on*/ final String triple; BlankGeo(String triple) { this.triple = triple; } } @Test public void testMapperBlanksGeo() throws IOException { final List<Pair<Text, Text>> result = runGeoMapDriver(); assertEquals(new Text("_:node16vicghfdx21:somefile"), result.get(0) .getFirst()); assertEquals(new Text("http://lobid.org/organisation/AF-KaIS"), result.get(0).getSecond()); } private List<Pair<Text, Text>> runGeoMapDriver() throws IOException { mapDriver.addInput(new LongWritable(), new Text(BlankGeo.LOBID_1.triple)); mapDriver.addInput(new LongWritable(), new Text(BlankGeo.LOBID_2.triple)); mapDriver.addInput(new LongWritable(), new Text(BlankGeo.BLANK.triple)); return mapDriver.run(); } @Test public void testReducerBlanksGeo() throws IOException {// NOPMD (MRUnit) setUpReduceInput(runGeoMapDriver()); reduceDriver.addOutput(new Text("_:node16vicghfdx21:somefile"), new Text( "http://lobid.org/organisation/AF-KaIS," + "http://lobid.org/organisation/AE-ShAU")); reduceDriver.runTest(false); } private enum BlankAddress { /*@formatter:off*/ LOBID_1("<http://lobid.org/organisation/AE-ShAU> " + "<http://www.w3.org/2006/vcard/ns#adr> _:node16vicghfdx20 ."), LOBID_2("<http://lobid.org/organisation/AF-KaIS> " + "<http://www.w3.org/2006/vcard/ns#adr> _:node16vicghfdx20 ."), BLANK("_:node16vicghfdx20 " + "<http://www.w3.org/2006/vcard/ns#country-name> \"United Arab Emirates\" ."); /*@formatter:on*/ final String triple; BlankAddress(String triple) { this.triple = triple; } } @Test public void testMapperBlanksAddress() throws IOException { final List<Pair<Text, Text>> result = runAddressMapDriver(); assertEquals(new Text("_:node16vicghfdx20:somefile"), result.get(0) .getFirst()); assertEquals(new Text("http://lobid.org/organisation/AE-ShAU"), result.get(0).getSecond()); } private List<Pair<Text, Text>> runAddressMapDriver() throws IOException { for (BlankAddress elem : BlankAddress.values()) { mapDriver.addInput(new LongWritable(), new Text(elem.triple)); } return mapDriver.run(); } @Test public void testReducerBlanksAddress() throws IOException { // NOPMD (MRUnit) setUpReduceInput(runAddressMapDriver()); reduceDriver.addOutput(new Text("_:node16vicghfdx20:somefile"), new Text( "http://lobid.org/organisation/AE-ShAU," + "http://lobid.org/organisation/AF-KaIS")); reduceDriver.runTest(false); } private void setUpReduceInput(final List<Pair<Text, Text>> mapResult) { final List<Text> values = new ArrayList<>(); for (Pair<Text, Text> pair : mapResult) { values.add(pair.getSecond()); } reduceDriver.addInput(mapResult.get(0).getFirst(), values); } }