/*
* Copyright (c) 2013, University of Toronto.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package edu.toronto.cs.xml2rdf.mapping;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import junit.framework.TestCase;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import edu.toronto.cs.xml2rdf.string.NoWSCaseInsensitiveStringMetric;
import edu.toronto.cs.xml2rdf.utils.LogUtils;
import edu.toronto.cs.xml2rdf.xml.XMLUtils;
public class MappingTest extends TestCase {
public void testLoadMapping() throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
String[] blacklist = {
"http://rdf.freebase.com/rdf/m.04mp1fp",
"http://rdf.freebase.com/rdf/location.dated_location",
"http://rdf.freebase.com/rdf/location.statistical_region",
"http://rdf.freebase.com/rdf/location.administrative_division",
"http://rdf.freebase.com/rdf/music.artist",
"http://rdf.freebase.com/rdf/base.whoami.answer",
"http://rdf.freebase.com/rdf/base.legislation.vote_value",
"http://rdf.freebase.com/rdf/m.04lqt84",
"http://rdf.freebase.com/rdf/base.umltools.design_pattern",
"http://rdf.freebase.com/rdf/base.braziliangovt.brazilian_governmental_vote_type",
"http://sw.opencyc.org/concept/Mx4rJ3ZbguI8QdeGDNhCi9LL3Q",
"http://rdf.freebase.com/rdf/base.whoami.answer",
"http://sw.opencyc.org/concept/Mx4rvUCoPtoTQdaZVdw2OtjsAg",
"http://sw.opencyc.org/concept/Mx4rveI9NpwpEbGdrcN5Y29ycA",
"http://rdf.freebase.com/rdf/m.04mp1fp",
"http://sw.opencyc.org/concept/Mx4rIGTaIPAIQdaffLzGWDo0Zw",
"http://rdf.freebase.com/rdf/m.04lqt84",
"http://sw.opencyc.org/concept/Mx4rvVj8VZwpEbGdrcN5Y29ycA",
"http://rdf.freebase.com/rdf/military.military_combatant",
"http://rdf.freebase.com/rdf/book.book_subject",
"http://rdf.freebase.com/rdf/sports.sports_team_location",
"http://rdf.freebase.com/rdf/user.tsegaran.random.taxonomy_subject",
"http://rdf.freebase.com/rdf/food.beer_country_region",
"http://rdf.freebase.com/rdf/user.skud.flags.flag_having_thing",
"http://rdf.freebase.com/rdf/m.04l1354",
"http://rdf.freebase.com/rdf/olympics.olympic_participating_country",
"http://rdf.freebase.com/rdf/organization.organization_member",
"http://rdf.freebase.com/rdf/biology.breed_origin",
"http://rdf.freebase.com/rdf/user.robert.military.military_power",
"http://rdf.freebase.com/rdf/base.ontologies.ontology_instance",
"http://rdf.freebase.com/rdf/government.governmental_jurisdiction"
};
Mapping mapping = new Mapping("output/output.ct.1.100.xml", new HashSet<String>(Arrays.asList(blacklist)));
//"/home/soheil/workspaces/workspace-xml2rdf/xml2rdf-java/resources/clinicaltrials/mapping/linkedct-mapping.xml");
BufferedReader br = new BufferedReader(new FileReader("resources/clinicaltrials/data/filelist.txt"));
String line = null;
while ((line = br.readLine()) != null) {
String url = "http://clinicaltrials.gov/show/" + line.trim() + "?displayxml=true";
Document dataDoc = XMLUtils.parse((InputStream) new URL(url).getContent(), -1);
dataDoc = XMLUtils.addRoot(dataDoc, "clinical_studies");
// OutputFormat format = new OutputFormat(dataDoc);
// format.setLineWidth(65);
// format.setIndenting(true);
// format.setIndent(2);
// XMLSerializer serializer = new XMLSerializer (
// System.out, format);
// serializer.asDOMSerializer();
// serializer.serialize(dataDoc);
Date from = new Date();
String typePrefix = "http://www.linkedct.org/0.1#";
mapping.generateRDFSchema("finaltdb", dataDoc, typePrefix, null, "RDF/XML-ABBREV",
new NoWSCaseInsensitiveStringMetric(), 1);
mapping.generateRDFs("finaltdb", dataDoc, typePrefix, null, "RDF/XML-ABBREV",
new NoWSCaseInsensitiveStringMetric(), 1);
LogUtils.info(MappingTest.class, "Imported : " + line + " @ " + (new Date().getTime() - from.getTime()));
//"/home/soheil/workspaces/workspace-xml2rdf/xml2rdf-java/resources/clinicaltrials/data/content.xml", -1);
}
// "/home/soheil/workspaces/workspace-xml2rdf/xml2rdf-java/resources/clinicaltrials/data/NCT00000219.xml");
}
}