package edu.toronto.cs.xcurator.cli.mapping;
import edu.toronto.cs.xcurator.discoverer.BasicEntityDiscovery;
import edu.toronto.cs.xcurator.common.DataDocument;
import edu.toronto.cs.xcurator.discoverer.MappingDiscoverer;
import edu.toronto.cs.xcurator.discoverer.SerializeMapping;
import edu.toronto.cs.xcurator.mapping.Mapping;
import edu.toronto.cs.xcurator.mapping.XmlBasedMapping;
import edu.toronto.cs.xcurator.common.RdfUriBuilder;
import edu.toronto.cs.xcurator.common.XmlDocumentBuilder;
import edu.toronto.cs.xcurator.common.XmlParser;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import edu.toronto.cs.xcurator.common.XmlUriBuilder;
import edu.toronto.cs.xcurator.discoverer.HashBasedEntityInterlinking;
import edu.toronto.cs.xcurator.discoverer.KeyAttributeDiscovery;
import edu.toronto.cs.xcurator.cli.config.RunConfig;
import edu.toronto.cs.xcurator.discoverer.MappingDiscoveryStep;
import edu.toronto.cs.xcurator.discoverer.RemoveGroupingNodes;
import org.w3c.dom.Document;
public class MappingFactory {
private final RunConfig config;
public MappingFactory(RunConfig config) {
this.config = config;
}
/**
* Create a mapping instance by discovering entities in the XML document, do
* not serialize the mapping.
*
* @param xmlDocument
* @param steps
* @return
*/
public Mapping createInstance(Document xmlDocument, String steps) {
List<Document> docList = new ArrayList<>();
docList.add(xmlDocument);
return createInstance(docList, steps);
}
/**
* Create a mapping instance by discovering entities in the XBRL document,
* and the mapping will be serialized to the mapping file
*
* @param xmlDocument
* @param mappingFile
* @param steps
* @return
* @throws TransformerConfigurationException
* @throws FileNotFoundException
*/
public Mapping createInstance(Document xmlDocument, String mappingFile, String steps)
throws TransformerConfigurationException, FileNotFoundException {
List<Document> docList = new ArrayList<>();
docList.add(xmlDocument);
return createInstance(docList, mappingFile, steps);
}
/**
* Create a mapping instance by discovering entities in the multiple XBRL
* documents, do not serialize the mapping.
*
* @param xmlDocuments
* @return
*/
public Mapping createInstance(List<Document> xmlDocuments, String steps) {
Mapping mapping = buildXmlBasedMapping();
MappingDiscoverer discoverer = buildDiscoverer(xmlDocuments, mapping, steps);
discoverer.discoverMapping();
return mapping;
}
/**
* Create a mapping instance by discovering entities in the multiple XBRL
* documents, and the mapping will be serialized to the mapping file.
*
* @param xmlDocuments
* @param fileName
* @param steps
* @return
* @throws TransformerConfigurationException
* @throws FileNotFoundException
*/
public Mapping createInstance(List<Document> xmlDocuments, String fileName, String steps)
throws TransformerConfigurationException, FileNotFoundException {
Mapping mapping = buildXmlBasedMapping();
MappingDiscoverer discoverer = buildDiscoverer(xmlDocuments, mapping, steps);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
discoverer.addStep(new SerializeMapping(new XmlDocumentBuilder(),
new FileOutputStream(fileName),
transformer, config));
discoverer.discoverMapping();
return mapping;
}
private MappingDiscoverer buildDiscoverer(List<Document> xmlDocuments,
Mapping mapping, String steps) {
MappingDiscoverer discoverer = new MappingDiscoverer(mapping);
String resourceUriPattern = config.getResourceUriBase() + "${UUID}";
for (Document document : xmlDocuments) {
discoverer.addDataDocument(new DataDocument(document, resourceUriPattern));
}
steps.replace("I", "KI"); // key identification is mandatory when use chooses Inter Linking
for (char step : steps.toCharArray()) {
if (step == MappingDiscoveryStep.TYPE.BASIC.getValue()) {
discoverer.addStep(new BasicEntityDiscovery(
new XmlParser(),
new RdfUriBuilder(config), new XmlUriBuilder(), true));
} else if (step == MappingDiscoveryStep.TYPE.KEYATTRIBUTE.getValue()) {
discoverer.addStep(new KeyAttributeDiscovery());
} else if (step == MappingDiscoveryStep.TYPE.INTERLIKNING.getValue()) {
discoverer.addStep(new HashBasedEntityInterlinking(new RdfUriBuilder(config)));
} else if (step == MappingDiscoveryStep.TYPE.REMOVE_GROUPING_NODES.getValue()) {
discoverer.addStep(new RemoveGroupingNodes(new RdfUriBuilder(config)));
} else {
System.out.println("Unsupported DiscovererStep: " + step);
}
// discoverer.addStep(new XbrlEntityFiltering());
}
return discoverer;
}
private Mapping buildXmlBasedMapping() {
return new XmlBasedMapping();
}
}