package org.ilrt.mca.harvester.xml; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.sdb.SDBFactory; import com.hp.hpl.jena.vocabulary.DC; import com.hp.hpl.jena.vocabulary.RDF; import org.ilrt.mca.Common; import org.ilrt.mca.harvester.AbstractTest; import org.ilrt.mca.harvester.Harvester; import org.ilrt.mca.rdf.SdbManagerImpl; import org.ilrt.mca.rdf.StoreWrapper; import org.ilrt.mca.rdf.StoreWrapperManager; import org.ilrt.mca.vocab.MCA_REGISTRY; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.util.Calendar; import java.util.GregorianCalendar; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; public class HtmlHarvesterTest extends AbstractTest { @Before public void setUp() throws Exception { // clear data store setUpStore(); // create data set StoreWrapperManager manager = getStoreWrapperManager(); StoreWrapper storeWrapper = manager.getStoreWrapper(); Dataset dataset = SDBFactory.connectDataset(storeWrapper.getStore()); // add the harvest source details to the default graph (registry) Model m = dataset.getDefaultModel(); Resource r = m.createResource(uri); m.add(m.createStatement(r, RDF.type, MCA_REGISTRY.HtmlSource)); m.add(m.createStatement(r, MCA_REGISTRY.hasXslSource, m.createResource("xsl://xsl/pcavailability.xsl"))); // create a last visited date and add it to the audit graph Model audit = dataset.getNamedModel(Common.AUDIT_GRAPH_URI); Calendar calendar = new GregorianCalendar(2009, Calendar.SEPTEMBER, 30, 11, 38); date = Common.parseXsdDate(calendar.getTime()); audit.add(audit.createStatement(r, DC.date, audit.createTypedLiteral(date, XSDDatatype.XSDdateTime))); // clean up storeWrapper.close(); // start the web server super.startServer(resourcePath, mediaType); // data manager that can be used by the harvester dataManager = new SdbManagerImpl(manager); } @After public void tearDown() { super.stopServer(); } @Test public void harvest() throws Exception { // ---------- test the data before we harvest StoreWrapper beforeWrapper = getStoreWrapper(); // check there the registry has a source to harvest Model registry = SDBFactory.connectDefaultModel(beforeWrapper.getStore()); Assert.assertTrue("There should be a harvest source in the registry (default graph)", registry.contains(registry.getResource(uri), RDF.type, MCA_REGISTRY.HtmlSource)); // check that the audit graph has a date Model auditModel = SDBFactory.connectNamedModel(beforeWrapper.getStore(), Common.AUDIT_GRAPH_URI); Assert.assertTrue(auditModel.getResource(uri).hasProperty(DC.date)); assertEquals(date, auditModel.getResource(uri).getProperty(DC.date).getLiteral() .getLexicalForm()); beforeWrapper.close(); // ---------- fire the harvester Harvester harvester = new XhtmlHarvesterImpl(dataManager); harvester.harvest(); // ---------- test the data after the harvest StoreWrapper afterWrapper = getStoreWrapper(); // check that the harvest graph has got data Model harvestedData = SDBFactory.connectNamedModel(afterWrapper.getStore(), uri); assertFalse("The model shouldn't be empty", harvestedData.isEmpty()); auditModel = SDBFactory.connectNamedModel(afterWrapper.getStore(), Common.AUDIT_GRAPH_URI); // check that the audit date has been updated String newDate = auditModel.getResource(uri).getProperty(DC.date).getLiteral() .getLexicalForm(); assertFalse(date.equals(newDate)); afterWrapper.close(); } String date; private final String resourcePath = "/pcavailability.html"; private final String mediaType = "text/html"; String uri = host + ":" + portNumber + resourcePath; }