/******************************************************************************* * Australian National University Data Commons * Copyright (C) 2013 The Australian National University * * This file is part of Australian National University Data Commons. * * Australian National University Data Commons is free software: you * can redistribute it and/or modify it under the terms of the GNU * General Public License as published by the Free Software Foundation, * either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package au.edu.anu.datacommons.publish; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import javax.validation.ConstraintViolation; import javax.validation.Validation; import javax.validation.Validator; import javax.validation.ValidatorFactory; import javax.ws.rs.core.MediaType; import javax.xml.bind.JAXBException; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import au.edu.anu.datacommons.ands.check.ActivityCheck; import au.edu.anu.datacommons.ands.check.CollectionCheck; import au.edu.anu.datacommons.ands.check.PartyCheck; import au.edu.anu.datacommons.ands.check.ServiceCheck; import au.edu.anu.datacommons.ands.xml.RegistryObjects; import au.edu.anu.datacommons.data.db.dao.ExternalLinkDAO; import au.edu.anu.datacommons.data.db.dao.ExternalLinkDAOImpl; import au.edu.anu.datacommons.data.db.model.ExternalLinkPattern; import au.edu.anu.datacommons.data.fedora.FedoraBroker; import au.edu.anu.datacommons.properties.GlobalProps; import au.edu.anu.datacommons.search.ExternalPoster; import au.edu.anu.datacommons.search.SparqlQuery; import au.edu.anu.datacommons.util.Constants; import au.edu.anu.datacommons.xml.transform.JAXBTransform; import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.core.util.MultivaluedMapImpl; import com.yourmediashelf.fedora.client.FedoraClientException; /** * ANDSValidate * * Australian National University Data Commons * * Validates records to send to ANDS. * * JUnit Coverage: * None * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner (GT) Initial * 0.2 15/10/2012 Genevieve Turner (GT) Updated to perform RIF-CS validation * 0.3 20/11/2012 Genevieve Turner (GT) Added a way to match external links with particular object types * </pre> * */ public class ANDSValidate implements Validate{ static final Logger LOGGER = LoggerFactory.getLogger(ANDSValidate.class); private List<String> errorMessages_; /** * Constructor * * Performs initialisation for some fields * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * </pre> * */ public ANDSValidate() { errorMessages_ = new ArrayList<String>(); } /** * isValid * * Checks if the pid is valid * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * </pre> * * @param pid * @return * @see au.edu.anu.datacommons.publish.Validate#isValid(java.lang.String) */ @Override public boolean isValid(String pid) { boolean isValid = false; try { InputStream xmlSource = FedoraBroker.getDatastreamAsStream(pid, Constants.XML_SOURCE); Document doc = getStreamAsDocument(xmlSource); NodeList typeNodes = doc.getElementsByTagName("type"); if (typeNodes.getLength() > 0) { Node typeNode = typeNodes.item(0); if ("collection".equals(typeNode.getTextContent().toLowerCase())) { LOGGER.info("Is collection"); isValid = isValidCollection(pid, doc); } else if ("activity".equals(typeNode.getTextContent().toLowerCase())) { LOGGER.info("Is activity"); isValid = isValidActivity(pid, doc); } else if ("party".equals(typeNode.getTextContent().toLowerCase())) { LOGGER.info("Is party"); isValid = isValidParty(pid, doc); } else if ("service".equals(typeNode.getTextContent().toLowerCase())) { LOGGER.info("Is service"); isValid = isValidService(pid, doc); } else { LOGGER.error("This type of field should not go to ANDS, Type: {}", typeNode.getTextContent()); isValid = false; } } else { LOGGER.error("Element has no type"); isValid = false; } } catch (FedoraClientException e) { isValid = false; } return isValid; } /** * getStreamAsDocument * * Gets the inputstream as a Document * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * </pre> * * @param inputStream An inputstream to transform to a XML Document * @return The xml document */ private Document getStreamAsDocument(InputStream inputStream) { Document doc = null; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputStream); } catch (ParserConfigurationException e) { } catch (IOException e) { } catch (SAXException e) { } return doc; } /** * getErrorMessages * * Returns a list of error messages from the validation * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * </pre> * * @return A list of error messages * @see au.edu.anu.datacommons.publish.Validate#getErrorMessages() */ @Override public List<String> getErrorMessages() { return errorMessages_; } /** * isValidCollection * * Determins if it is a valid collection * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * 0.2 15/10/2012 Genevieve Turner (GT) Updated to perform RIF-CS validation * </pre> * * @param pid The pid of the object to check if it is valid * @param doc The xml document to validate * @return true if it is valid otherwise false */ private boolean isValidCollection(String pid, Document doc) { // Required Assocation Types // Party // Activity boolean isValid = true; if(!hasAssociatedType(pid, "party", 2)) { isValid = false; } if(!hasAssociatedType(pid, "activity", 3)) { isValid = false; } boolean validationErrors = xmlValidate(pid, CollectionCheck.class); /*if (!validationErrors) { isValid = false; }*/ return isValid; } /** * isValidParty * * Placeholder * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * 0.2 15/10/2012 Genevieve Turner (GT) Updated to perform RIF-CS validation * </pre> * * @param pid The pid of the object to check if it is valid * @param doc The xml document to validate * @return true if it is valid otherwise false */ private boolean isValidParty(String pid, Document doc) { // Required Assocation Types // Collection // Recommended Association Types - Nothing is done with recommended at this point // Activity boolean isValid = true; if(!hasAssociatedType(pid, "collection", 2)) { isValid = false; } // Relationship not require but recommended hasAssociatedType(pid,"activity", 3); boolean validationErrors = xmlValidate(pid, PartyCheck.class); /*if (!validationErrors) { isValid = false; }*/ return isValid; } /** * isValidActivity * * Determines if it is a valid Activity * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * 0.2 15/10/2012 Genevieve Turner (GT) Updated to perform RIF-CS validation * </pre> * * @param pid The pid of the object to check if it is valid * @param doc The xml document to validate * @return true if it is valid otherwise false */ private boolean isValidActivity(String pid, Document doc) { // Required Assocation Types // Party // Collection boolean isValid = true; if(!hasAssociatedType(pid, "collection", 2)) { isValid = false; } if(!hasAssociatedType(pid, "party", 3)) { isValid = false; } boolean validationErrors = xmlValidate(pid, ActivityCheck.class); /*if (!validationErrors) { isValid = false; }*/ return isValid; } /** * isValidService * * Determines if it is a valid service * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * 0.2 15/10/2012 Genevieve Turner (GT) Updated to perform RIF-CS validation * </pre> * * @param pid The pid of the object to check if it is valid * @param doc The xml document to validate * @return true if it is valid otherwise false */ private boolean isValidService(String pid, Document doc) { // Required Assocation Types // Collection // Recommended Association Types - Nothing is done with recommended at this point // Party boolean isValid = true; if(!hasAssociatedType(pid, "collection", 2)) { isValid = false; } hasAssociatedType(pid, "party", 3); boolean validationErrors = xmlValidate(pid, ServiceCheck.class); /*if (!validationErrors) { isValid = false; }*/ return isValid; } /** * hasAssociatedType * * Checks if there is an assocation type for the record * * <pre> * Version Date Developer Description * 0.1 17/07/2012 Genevieve Turner(GT) Initial * 0.3 20/11/2012 Genevieve Turner (GT) Added a way to match external links with particular object types * </pre> * * @param pid The pid check the associations for * @param type The type of association to check * @return If there is an associated type */ private boolean hasAssociatedType(String pid, String type, int qualityLevel) { boolean isValid = false; SparqlQuery sparqlQuery = new SparqlQuery(); sparqlQuery.addVar("?item"); sparqlQuery.addVar("?type"); sparqlQuery.addVar("?predicate"); StringBuilder tripleString = new StringBuilder(); tripleString.append("{ <info:fedora/"); tripleString.append(pid); tripleString.append("> ?predicate ?item . } "); tripleString.append("UNION "); tripleString.append("{ ?item ?predicate <info:fedora/"); tripleString.append(pid); tripleString.append("> } "); sparqlQuery.addTripleSet(tripleString.toString()); sparqlQuery.addTriple("?item", "<dc:type>", "?type", Boolean.TRUE); //Ensure that the linked to item is active (i.e. it hasn't been deleted) //sparqlQuery.addTriple("?item", "<fedora-model:state>", "<fedora-model:Active>", false); sparqlQuery.addTriple("?item", "<fedora-model:state>", "?state", Boolean.TRUE); StringBuilder filterString = new StringBuilder(); // Add the predicate filter filterString.append("regex(str(?predicate), '"); filterString.append(GlobalProps.getProperty(GlobalProps.PROP_FEDORA_RELATEDURI)); filterString.append("', 'i') "); // Add the type filter filterString.append("&& "); filterString.append("regex(?type , '"); filterString.append(type); filterString.append("', 'i') "); // Ensure that the relation is for an active object filterString.append("&& "); filterString.append("regex(str(?state), 'Active')"); sparqlQuery.addFilter(filterString.toString(), ""); // Match external link patterns to particular types e.g. nla id's to parties ExternalLinkDAO externalLinkDAO = new ExternalLinkDAOImpl(); List<ExternalLinkPattern> patterns = externalLinkDAO.getByObjectType(type); for (ExternalLinkPattern pattern : patterns) { StringBuilder patternString = new StringBuilder(); patternString.append("regex(str(?item), '"); patternString.append(pattern.getPattern()); patternString.append("', 'i') "); sparqlQuery.addFilter(patternString.toString(), "||"); } String queryString = sparqlQuery.generateQuery(); LOGGER.debug("Validation relation Sparql query string: {}", queryString); //TODO see if there is an easier way to get this information ExternalPoster poster = new ExternalPoster(); poster.setUrl(GlobalProps.getProperty(GlobalProps.PROP_FEDORA_URI) + GlobalProps.getProperty(GlobalProps.PROP_FEDORA_RISEARCHURL)); poster.setUsername(GlobalProps.getProperty(GlobalProps.PROP_FEDORA_USERNAME)); poster.setPassword(GlobalProps.getProperty(GlobalProps.PROP_FEDORA_PASSWORD)); poster.setType(MediaType.APPLICATION_FORM_URLENCODED); poster.setAcceptType(MediaType.TEXT_XML); MultivaluedMapImpl parameters = new MultivaluedMapImpl(); parameters.add("dt", "on"); parameters.add("format", "Sparql"); parameters.add("lang", "sparql"); parameters.add("limit", "1"); parameters.add("type", "tuples"); poster.setParameters(parameters); ClientResponse response = poster.post("query", queryString.toString()); Document responseDoc = response.getEntity(Document.class); NodeList resultNodes = responseDoc.getElementsByTagName("result"); if (resultNodes.getLength() > 0) { LOGGER.debug("Number of results for validation sparql query: {}", resultNodes.getLength()); isValid = true; } else { LOGGER.debug("No Results returned for validation sparql query"); errorMessages_.add("Quality Level " + qualityLevel + " - Link with item type " + type); isValid = false; } return isValid; } /** * xmlValidate * * Validates the RIF-CS XML * * <pre> * Version Date Developer Description * 0.2 15/10/2012 Genevieve Turner(GT) Initial * </pre> * * @param pid The pid of the xml to validate * @param clazz The class type to validate * @return Whether the xml is valid */ private boolean xmlValidate(String pid, Class clazz) { boolean isValid = false; try { InputStream xmlStream = FedoraBroker.getDatastreamAsStream(pid, Constants.XML_SOURCE); InputStream xslStream = FedoraBroker.getDatastreamAsStream("def:rif-cs", Constants.XSL_SOURCE); StringWriter sw = new StringWriter(); TransformerFactory transformerFactory = TransformerFactory.newInstance(); Source xmlSource = new StreamSource (xmlStream); Source xslSource = new StreamSource (xslStream); Transformer transformer = transformerFactory.newTransformer(xslSource); transformer.transform(xmlSource, new StreamResult(sw)); //transformer.tra LOGGER.debug("page: {}", sw.toString()); JAXBTransform jaxbTransform = new JAXBTransform(); InputStream rifcsStream = new ByteArrayInputStream(sw.toString().getBytes("UTF-8")); RegistryObjects registryObjects = (RegistryObjects) jaxbTransform.unmarshalStream(rifcsStream, RegistryObjects.class); ValidatorFactory factory = Validation.buildDefaultValidatorFactory(); Validator validator = factory.getValidator(); Set<ConstraintViolation<RegistryObjects>> constraintViolations = validator.validate(registryObjects, clazz); if (constraintViolations.size() > 0) { Iterator<ConstraintViolation<RegistryObjects>> it = constraintViolations.iterator(); while (it.hasNext()) { ConstraintViolation<RegistryObjects> violation = it.next(); //LOGGER.info("Violation: {}", violation.getMessage()); errorMessages_.add(violation.getMessage()); } } else { isValid = true; } } catch (FedoraClientException e) { LOGGER.error("Exception retrieving stream", e); } catch (TransformerException e) { LOGGER.error("Exception executing transform", e); } catch (UnsupportedEncodingException e) { LOGGER.error("Exception getting string as utf-8", e); } catch (JAXBException e) { LOGGER.error("Exception transforming document to JAXB", e); } return isValid; } }