/* * Copyright (c) 2013, University of Toronto. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package edu.toronto.cs.xcurator.discoverer; import edu.toronto.cs.xcurator.TestConfigs; import edu.toronto.cs.xcurator.common.DataDocument; import edu.toronto.cs.xcurator.mapping.Mapping; import edu.toronto.cs.xcurator.mapping.XmlBasedMapping; import edu.toronto.cs.xcurator.mapping.Schema; import edu.toronto.cs.xcurator.common.RdfUriBuilder; import edu.toronto.cs.xcurator.common.XPathFinder; import edu.toronto.cs.xcurator.common.XmlDocumentBuilder; import edu.toronto.cs.xcurator.common.XmlParser; import edu.toronto.cs.xcurator.common.XmlUriBuilder; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerFactory; import javax.xml.xpath.XPathExpressionException; import junit.framework.Assert; import org.junit.Before; import org.junit.Test; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * * @author zhuerkan */ public class MappingDiscoveryTests { private BasicEntityDiscovery basicEntitiesDiscovery; private SerializeMapping serializeMapping; private MappingDiscoverer discoverer; private Document dataDoc; private XmlParser parser; private Mapping mapping; private Transformer transformer; private RdfUriBuilder rdfUriBuilder; private XmlUriBuilder xmlUriBuilder; @Before public void setup() { try { parser = new XmlParser(); transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); rdfUriBuilder = new RdfUriBuilder(TestConfigs.testRdfUriConfig()); xmlUriBuilder = new XmlUriBuilder(); } catch (TransformerConfigurationException ex) { Logger.getLogger(MappingDiscoveryTests.class.getName()).log(Level.SEVERE, null, ex); } } @Test public void test_discoverMapping_clinical_trials() { // Setup try { // Set up the entity discovery step basicEntitiesDiscovery = new BasicEntityDiscovery(parser, rdfUriBuilder, xmlUriBuilder); // Set up the mapping serialization step serializeMapping = new SerializeMapping(new XmlDocumentBuilder(), new FileOutputStream("output/clinicaltrials-mapping.xml"), transformer, TestConfigs.testRdfUriConfig()); dataDoc = parser.parse(BasicEntityDiscoveryTest.class.getResourceAsStream( "/clinicaltrials/data/content.xml"), -1); mapping = new XmlBasedMapping("http://www.cs.toronto.edu/xcurator", "xcurator"); discoverer = new MappingDiscoverer(dataDoc, mapping); } catch (SAXException | IOException | ParserConfigurationException ex) { Logger.getLogger(BasicEntityDiscoveryTest.class.getName()).log(Level.SEVERE, null, ex); } // Add discovery steps discoverer.addStep(basicEntitiesDiscovery).addStep(serializeMapping); // Test discoverer.discoverMapping(); // Verify Assert.assertTrue(mapping.isInitialized()); Iterator<Schema> iter = mapping.getEntityIterator(); while (iter.hasNext()) { System.out.println(iter.next().getId()); } Schema example = mapping.getEntity("biospec_descr"); Assert.assertNotNull(example); } @Test public void test_discoverMapping_fb_XBRL() { try { // Set up the entity discovery step basicEntitiesDiscovery = new BasicEntityDiscovery(parser, rdfUriBuilder, xmlUriBuilder); // Set up the mapping serialization step serializeMapping = new SerializeMapping(new XmlDocumentBuilder(), new FileOutputStream("output/fb-20121231-mapping.xml"), transformer, TestConfigs.testRdfUriConfig()); dataDoc = parser.parse(BasicEntityDiscoveryTest.class.getResourceAsStream( "/secxbrls/data/fb-20121231.xml"), -1); mapping = new XmlBasedMapping(); discoverer = new MappingDiscoverer(dataDoc, mapping); } catch (SAXException | IOException | ParserConfigurationException ex) { Logger.getLogger(BasicEntityDiscoveryTest.class.getName()).log(Level.SEVERE, null, ex); } // Add discovery steps discoverer.addStep(basicEntitiesDiscovery) .addStep(new KeyAttributeDiscovery()) .addStep(new HashBasedEntityInterlinking(rdfUriBuilder)) .addStep(serializeMapping); // Test discoverer.discoverMapping(); // Verify Assert.assertTrue(mapping.isInitialized()); Schema e = mapping.getEntity("http://fasb.org/us-gaap/2012-01-31/NonoperatingIncomeExpense"); Assert.assertNotNull(e); org.junit.Assert.assertTrue(e.hasRelation("http://fasb.org/us-gaap/2012-01-31/NonoperatingIncomeExpense.http://www.xbrl.org/2003/instance/unit")); Assert.assertNotNull(mapping.getEntity("http://www.xbrl.org/2003/instance/segment")); Assert.assertNotNull(mapping.getEntity("http://www.xbrl.org/2003/instance/period")); } @Test public void test_discoverMapping_multiple_XBRLs() throws FileNotFoundException, SAXException, IOException, ParserConfigurationException, XPathExpressionException { // Set up the entity discovery step basicEntitiesDiscovery = new BasicEntityDiscovery(parser, rdfUriBuilder, xmlUriBuilder); // Set up the mapping serialization step serializeMapping = new SerializeMapping(new XmlDocumentBuilder(), new FileOutputStream("output/xbrl-mapping.xml"), transformer, TestConfigs.testRdfUriConfig()); Document fb2013 = parser.parse(BasicEntityDiscoveryTest.class.getResourceAsStream( "/secxbrls/data/fb-20131231.xml"), -1); Document msft2013 = parser.parse(BasicEntityDiscoveryTest.class.getResourceAsStream( "/secxbrls/data/msft-20130630.xml"), -1); Document goog2013 = parser.parse(BasicEntityDiscoveryTest.class.getResourceAsStream( "/secxbrls/data/goog-20131231.xml"), -1); mapping = new XmlBasedMapping("http://www.cs.toronto.edu/xcurator", "xcurator"); discoverer = new MappingDiscoverer(mapping); discoverer.addDataDocument(new DataDocument(fb2013)) .addDataDocument(new DataDocument(msft2013)) .addDataDocument(new DataDocument(goog2013)); // Add discovery steps discoverer.addStep(basicEntitiesDiscovery).addStep(serializeMapping); // Test discoverer.discoverMapping(); // Verify Assert.assertTrue(mapping.isInitialized()); Schema example = mapping.getEntity("http://fasb.org/us-gaap/2013-01-31/NonoperatingIncomeExpense"); Assert.assertNotNull(example); XPathFinder xpath = new XPathFinder(); NodeList nl = xpath.getNodesByPath(example.getPath(), msft2013, example.getNamespaceContext()); Assert.assertTrue(nl.getLength() > 0); } @Test public void test_discoverMapping_XBRL_msft() throws FileNotFoundException, SAXException, IOException, ParserConfigurationException, XPathExpressionException { // Set up the entity discovery step basicEntitiesDiscovery = new BasicEntityDiscovery(parser, rdfUriBuilder, xmlUriBuilder); // Set up the mapping serialization step serializeMapping = new SerializeMapping(new XmlDocumentBuilder(), new FileOutputStream("output/msft-20130630-mapping.xml"), transformer, TestConfigs.testRdfUriConfig()); Document msft2013 = parser.parse(BasicEntityDiscoveryTest.class.getResourceAsStream( "/secxbrls/data/msft-20130630.xml"), -1); mapping = new XmlBasedMapping("http://www.cs.toronto.edu/xcurator", "xcurator"); discoverer = new MappingDiscoverer(mapping); discoverer.addDataDocument(new DataDocument(msft2013)); // Add discovery steps discoverer.addStep(basicEntitiesDiscovery) .addStep(new KeyAttributeDiscovery()) .addStep(new HashBasedEntityInterlinking(rdfUriBuilder)) .addStep(serializeMapping); // Test discoverer.discoverMapping(); // Verify Assert.assertTrue(mapping.isInitialized()); Assert.assertNotNull(mapping.getEntity("http://fasb.org/us-gaap/2013-01-31/NonoperatingIncomeExpense")); Assert.assertNotNull(mapping.getEntity("http://www.xbrl.org/2003/instance/segment")); Assert.assertNotNull(mapping.getEntity("http://www.xbrl.org/2003/instance/period")); Schema example = mapping.getEntity("http://fasb.org/us-gaap/2013-01-31/NonoperatingIncomeExpense"); Assert.assertNotNull(example); XPathFinder xpath = new XPathFinder(); NodeList nl = xpath.getNodesByPath(example.getPath(), msft2013, example.getNamespaceContext()); Assert.assertTrue(nl.getLength() > 0); } }