package com.tyndalehouse.step.tools.osis; import org.apache.commons.io.FileUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.*; import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXResult; import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.sax.TransformerHandler; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import java.io.*; import java.util.Collection; /** * Strips out the '/' character from the original source. * @author chrisburrell */ public class OHBParser { /** * @param args the list of arguments */ public static void main(String[] args) throws TransformerException, ParserConfigurationException, SAXException, IOException { if(args.length != 2) { System.out.println("Args: inputDirectory outputFile"); System.exit(-1); } DOMSource source = new DOMSource(joinFiles(args[0])); SAXTransformerFactory stf = (SAXTransformerFactory) TransformerFactory.newInstance(); // These templates objects could be reused and obtained from elsewhere. Templates templates1 = stf.newTemplates(new StreamSource(OHBParser.class.getResourceAsStream("ohb_parser.xsl"))); TransformerHandler th1 = stf.newTransformerHandler(templates1); BufferedOutputStream fos = new BufferedOutputStream(new FileOutputStream(args[1])); th1.setResult(new StreamResult(fos)); Transformer t = stf.newTransformer(); t.transform(source, new SAXResult(th1)); fos.close(); } /** * Joins the files found in GitHub. */ private static Document joinFiles(String directory) throws IOException, ParserConfigurationException, SAXException { final Collection<File> files = FileUtils.listFiles(new File(directory), new String[]{"xml"}, false); Document masterDoc = null; Node osisText = null; for (File f : files) { if("VerseMap.xml".equals(f.getName())) { continue; } System.out.println("Processing: " + f.getName()); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(f); if (masterDoc == null) { osisText = findElement(doc, "osisText"); masterDoc = doc; } else { //we need to merge it, at the level of the osisText element Node otherOsisText = findElement(doc, "osisText"); final NodeList childNodes = otherOsisText.getChildNodes(); for (int ii = 0; ii < childNodes.getLength(); ii++) { Node child = childNodes.item(ii); if (!"header".equals(child.getNodeName())) { osisText.appendChild(masterDoc.importNode(child, true)); } } } } return masterDoc; } private static Node findElement(final Document doc, String nodeName) { final NodeList childNodes = doc.getChildNodes().item(0).getChildNodes(); for (int ii = 0; ii < childNodes.getLength(); ii++) { final Node item = childNodes.item(ii); if (item instanceof Element) { if (nodeName.equals(item.getNodeName())) { return item; } } } return null; } }