/** * JHOVE2 - Next-generation architecture for format-aware characterization * <p> * Copyright (c) 2009 by The Regents of the University of California, Ithaka * Harbors, Inc., and The Board of Trustees of the Leland Stanford Junior * University. All rights reserved. * </p> * <p> * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * </p> * <ul> * <li>Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer.</li> * <li>Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution.</li> * <li>Neither the name of the University of California/California Digital * Library, Ithaka Harbors/Portico, or Stanford University, nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission.</li> * </ul> * <p> * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * </p> */ package org.jhove2.module.format.xml; import java.util.ArrayList; import java.util.Collection; import java.util.TreeMap; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jhove2.annotation.ReportableProperty; import org.jhove2.core.reportable.AbstractReportable; import com.sleepycat.persist.model.Persistent; /** * <p> * The XML namespace mechanism provide a simple method for qualifying element * and attribute names by associating them with unique URI references. * </p> * <p> * A namespace binding is declared using an attribute whose name must either be * <i>xmlns</i> or <i>xmlns:{prefix}</i>. The value of the attribute is the * <i>URI</i> that identifies the namespace. * </p> * <p> * If a <i>prefix</i> is declared, then the prefix can be used to construct * <i>qualified names</i> within the scope of the element to which the * declaration is attached. If no prefix is declared, then all unqualified * element names within the scope of the declaration are considered to be * associated with that <i>default namespace</i>. * </p> * In an instance document, the attribute <i>xsi:schemaLocation</i> provides * hints from the author to a processor regarding the location of schema * documents ... The schemaLocation attribute value consists of one or more * pairs of URI references, separated by white space. The first member of each * pair is a namespace name, and the second member of the pair is a hint * describing where to find an appropriate schema document for that namespace. A * schema is not required to have a namespace and so there is a * noNamespaceSchemaLocation attribute which is used to provide hints for the * locations of schema documents that do not have target namespaces. * * @author rnanders * @see <a href="http://www.w3.org/TR/xml-names">Namespaces in XML 1.0</a> * @see <a href="http://www.w3.org/TR/xmlschema-0/#schemaLocation">schemaLocation</a> */ @Persistent public class NamespaceInformation extends AbstractReportable { /** The regex pattern for a namespace URI and namespace location value pair. */ private static final String SCHEMA_LOCATION_REGEX = "(?:\\s*([^\\s]+)\\s+([^\\s]+))"; /** The compiled regex pattern for the namespace URI and namespace location value pair. */ private static final Pattern SCHEMA_LOCATION_PATTERN = Pattern.compile(SCHEMA_LOCATION_REGEX); /** The de-duplicated list of namespaces declared in the XML document. */ protected TreeMap<String, Namespace> namespaces = new TreeMap<String, Namespace>(); /** Flag to test if schemaLocations were specified */ protected boolean hasSchemaLocations; protected NamespaceInformation(){ super(); } /** * Gets the count of unique namespaces. * * @return the namespace count */ @ReportableProperty(order = 1, value = "Namespace Count") public int getNamespaceCount() { return namespaces.entrySet().size(); } /** * Gets the list of namespaces. * * @return the namespaces */ @ReportableProperty(order = 2, value = "Namespace List") public ArrayList<Namespace> getNamespaces() { return new ArrayList<Namespace>(namespaces.values()); } /** * Gets the namespace warnings. * * @return the namespace warnings */ @ReportableProperty(order = 3, value = "Namespace Warnings") public ArrayList<String> getNamespaceWarnings() { ArrayList<String> namespaceWarnings = new ArrayList<String>(); TreeMap<String, Integer> prefixUses = new TreeMap<String, Integer>(); for (Namespace namespace : namespaces.values()) { Collection<NamespaceDeclaration> declarationList = namespace.declarations.values(); if (declarationList.size() > 1) { namespaceWarnings.add("Multiple prefixes used for " + namespace.uri); } for (NamespaceDeclaration declaration : declarationList) { if (declaration.count > 1) { namespaceWarnings.add("Prefix " + declaration.prefix + " declared multiple times for " + namespace.uri); } Integer use = prefixUses.get(declaration.prefix); if (use != null) { prefixUses.put(declaration.prefix, use + 1); } else { prefixUses.put(declaration.prefix, 1); } } Collection<SchemaLocation> schemaLocationList = namespace.schemaLocations.values(); if (schemaLocationList.size() > 1) { namespaceWarnings.add("Multiple schema locations used for " + namespace.uri); } for (SchemaLocation schemaLocation : schemaLocationList) { if (schemaLocation.count > 1) { namespaceWarnings.add("Schema location " + schemaLocation.location + " declared multiple times for " + namespace.uri); } } } for (Entry<String, Integer> prefixUse : prefixUses.entrySet()) { if (prefixUse.getValue() > 1) { namespaceWarnings.add("Prefix " + prefixUse.getKey() + " is used for multiple namespaces"); } } return namespaceWarnings; } /** * Gets a namespace object, creating it if necessary * * @param uri * the URI of the namespace to be returned * * @return the namespace object */ private Namespace getNamespace(String uri) { Namespace namespace = namespaces.get(uri); if (namespace == null) { namespace = new Namespace(uri); namespaces.put(uri, namespace); } return namespace; } /** * Tally a xmlns declaration of a namespace. * * @param uri * the namespace URI * @param prefix * the namespace prefix */ protected void tallyDeclaration(String uri, String prefix) { getNamespace(uri).tallyDeclaration(prefix); } /** * Parses the schemaLocation and noNamsepaceSchemaLocation attributes of an element. * * @param schemaLocation * the schema location * @param noNamespaceSchemaLocation * the no-namespace schema location */ protected void parseSchemaLocation(String schemaLocation, String noNamespaceSchemaLocation) { if (schemaLocation != null) { /* Use regular expression capture groups to extract values */ Matcher m = SCHEMA_LOCATION_PATTERN.matcher(schemaLocation); /* for each pair of space-delimited values found in the string */ while (m.find()) { String uri = m.group(1); String location = m.group(2); tallySchemaLocation(uri, location); } } if (noNamespaceSchemaLocation != null) { tallySchemaLocation("[noNamespace]", noNamespaceSchemaLocation); } } /** * Tally a single pair of space-delimited values mapping a uri to a location. * * @param uri * the uri * @param location * the location */ protected void tallySchemaLocation(String uri, String location) { getNamespace(uri).tallySchemaLocation(location); hasSchemaLocations = true; } }