/** * ============================================================================= * * ORCID (R) Open Source * http://orcid.org * * Copyright (c) 2012-2014 ORCID, Inc. * Licensed under an MIT-Style License (MIT) * http://orcid.org/open-source-license * * This copyright and license information (including a link to the full license) * shall be included in its entirety in all copies or substantial portion of * the software. * * ============================================================================= */ package org.orcid.core.cli; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.SAXException; public class FilterEdugain { public static void main(String[] args) throws FileNotFoundException, IOException, ParserConfigurationException, SAXException, TransformerException { List<String> idps = IOUtils.readLines(new FileInputStream(args[0])); File edugainFile = new File(args[1]); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(edugainFile); NodeList nodeList = doc.getElementsByTagName("md:EntityDescriptor"); int matched = 0; int originalLength = nodeList.getLength(); List<Node> nodesToRemove = new ArrayList<>(); for (int i = 0; i < originalLength; i++) { Node item = nodeList.item(i); String entityId = item.getAttributes().getNamedItem("entityID").getTextContent(); if (idps.contains(entityId)) { matched++; System.out.println("Found entity " + entityId); } else { nodesToRemove.add(item); Node previousSibling = item.getPreviousSibling(); if (previousSibling != null && previousSibling instanceof Text) { if (StringUtils.isBlank(previousSibling.getTextContent())) { nodesToRemove.add(previousSibling); } } } } nodesToRemove.stream().forEach(n -> n.getParentNode().removeChild(n)); System.out.println("Number of entities matched = " + matched + "/" + originalLength); TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(new FileOutputStream("/tmp/federation-metatdata-filtered.xml")); transformer.transform(source, result); } }